diff --git a/src/mcts.jl b/src/mcts.jl index b751556..680d87c 100644 --- a/src/mcts.jl +++ b/src/mcts.jl @@ -349,7 +349,7 @@ function selectChildNode(node::MCTSNode)::MCTSNode # loop thought node children dictionary to find the highest progress value for (k, childNode) in node.children if childNode.statevalue > highestProgressValue - highestProgressValue = childNode.statevalue + highestProgressValue = childNode.statevalue + childNode.reward nodekey = childNode.nodekey end end @@ -402,7 +402,9 @@ isroot(node::MCTSNode)::Bool = node.nodekey == "root" ? true : false - `n::Integer` how many times action will be sampled from decisionMaker - `w::Float64` - exploration weight + exploration weight. Value is usually between 1 to 2. + Value 1.0 makes MCTS balance between exploration and exploitation like 50%-50% + Value 2.0 makes MCTS aggressively search the tree # Return - `plan::Vector{Dict}` @@ -441,7 +443,7 @@ function runMCTS( expand(a, node, decisionMaker, progressValueEstimator, isterminal, n=n) - leaf_node = selectChildNode(node) + leaf_node = UCTselect(node, w) simTrajectoryReward = simulate(a, leaf_node, decisionMaker, progressValueEstimator, isterminal, maxDepth, n=n) backpropagate(leaf_node, simTrajectoryReward) diff --git a/test/test_1.jl b/test/test_1.jl index 192e9df..413bb74 100644 --- a/test/test_1.jl +++ b/test/test_1.jl @@ -79,7 +79,17 @@ result = GeneralUtils.sendMqttMsg(outgoingMsg) outgoingMsg = Dict( :msgMeta=> msgMeta, :payload=> Dict( - :text=> "<>", + :text=> "<>", + ) +) +result = GeneralUtils.sendMqttMsg(outgoingMsg) + + + +outgoingMsg = Dict( + :msgMeta=> msgMeta, + :payload=> Dict( + :text=> "Dry please.", ) ) result = GeneralUtils.sendMqttMsg(outgoingMsg) @@ -93,4 +103,3 @@ result = GeneralUtils.sendMqttMsg(outgoingMsg) -