This commit is contained in:
narawat lamaiin
2024-05-06 20:46:34 +07:00
parent 99ec9a4960
commit f4729b7039

View File

@@ -452,13 +452,14 @@ function runMCTS(
if node.isterminal
# MCTS arrive at the leaf node that is also a terminal state,
# do nothing then go directly to backpropagation
backpropagate(leafNode, node.reward)
else
expand(a, node, decisionMaker, progressValueEstimator, isterminal, n=n)
leafNode = UCTselect(node, w)
simTrajectoryReward = simulate(a, leafNode, decisionMaker, progressValueEstimator,
isterminal, maxDepth, n=n)
backpropagate(leafNode, simTrajectoryReward)
end
backpropagate(leafNode, simTrajectoryReward)
end
best_child_state = argmax([child.total_reward / child.visits for child in values(root.children)])