update
This commit is contained in:
@@ -452,13 +452,14 @@ function runMCTS(
|
||||
if node.isterminal
|
||||
# MCTS arrive at the leaf node that is also a terminal state,
|
||||
# do nothing then go directly to backpropagation
|
||||
backpropagate(leafNode, node.reward)
|
||||
else
|
||||
expand(a, node, decisionMaker, progressValueEstimator, isterminal, n=n)
|
||||
leafNode = UCTselect(node, w)
|
||||
simTrajectoryReward = simulate(a, leafNode, decisionMaker, progressValueEstimator,
|
||||
isterminal, maxDepth, n=n)
|
||||
backpropagate(leafNode, simTrajectoryReward)
|
||||
end
|
||||
backpropagate(leafNode, simTrajectoryReward)
|
||||
end
|
||||
|
||||
best_child_state = argmax([child.total_reward / child.visits for child in values(root.children)])
|
||||
|
||||
Reference in New Issue
Block a user