diff --git a/src/mcts.jl b/src/mcts.jl index 95e698e..c4d8d47 100644 --- a/src/mcts.jl +++ b/src/mcts.jl @@ -452,13 +452,14 @@ function runMCTS( if node.isterminal # MCTS arrive at the leaf node that is also a terminal state, # do nothing then go directly to backpropagation + backpropagate(leafNode, node.reward) else expand(a, node, decisionMaker, progressValueEstimator, isterminal, n=n) leafNode = UCTselect(node, w) simTrajectoryReward = simulate(a, leafNode, decisionMaker, progressValueEstimator, isterminal, maxDepth, n=n) + backpropagate(leafNode, simTrajectoryReward) end - backpropagate(leafNode, simTrajectoryReward) end best_child_state = argmax([child.total_reward / child.visits for child in values(root.children)])