update

2024-05-06 14:48:03 +07:00
parent 097a51b956
commit 89c6af780f
2 changed files with 16 additions and 5 deletions
--- a/src/mcts.jl
+++ b/src/mcts.jl
@@ -349,7 +349,7 @@ function selectChildNode(node::MCTSNode)::MCTSNode
  # loop thought node children dictionary to find the highest progress value
  for (k, childNode) in node.children
    if childNode.statevalue > highestProgressValue
-      highestProgressValue = childNode.statevalue
+      highestProgressValue = childNode.statevalue + childNode.reward
      nodekey = childNode.nodekey
    end
  end
@@ -402,7 +402,9 @@ isroot(node::MCTSNode)::Bool = node.nodekey == "root" ? true : false
  - `n::Integer`
    how many times action will be sampled from decisionMaker 
  - `w::Float64`
-    exploration weight
+    exploration weight. Value is usually between 1 to 2. 
    Value 1.0 makes MCTS balance between exploration and exploitation like 50%-50%
    Value 2.0 makes MCTS aggressively search the tree
 # Return
  - `plan::Vector{Dict}`
@@ -441,7 +443,7 @@ function runMCTS(
    expand(a, node, decisionMaker, progressValueEstimator, isterminal, n=n)
-    leaf_node = selectChildNode(node)
+    leaf_node = UCTselect(node, w)
    simTrajectoryReward = simulate(a, leaf_node, decisionMaker, progressValueEstimator,
                      isterminal, maxDepth, n=n)
    backpropagate(leaf_node, simTrajectoryReward)
--- a/test/test_1.jl
+++ b/test/test_1.jl
@@ -79,7 +79,17 @@ result = GeneralUtils.sendMqttMsg(outgoingMsg)
 outgoingMsg = Dict(
  :msgMeta=> msgMeta,
  :payload=> Dict(
-    :text=> "<<ok>>",
+    :text=> "<<OK, I'll take it.>>",
  )
 )
 result = GeneralUtils.sendMqttMsg(outgoingMsg)
 outgoingMsg = Dict(
  :msgMeta=> msgMeta,
  :payload=> Dict(
    :text=> "Dry please.",
  )
 )
 result = GeneralUtils.sendMqttMsg(outgoingMsg)
@@ -93,4 +103,3 @@ result = GeneralUtils.sendMqttMsg(outgoingMsg)