This commit is contained in:
narawat lamaiin
2024-05-10 20:27:05 +07:00
parent e5e91fc15f
commit d88c613b0e

View File

@@ -237,7 +237,8 @@ julia>
function backpropagate(node::MCTSNode, simTrajectoryReward::T; function backpropagate(node::MCTSNode, simTrajectoryReward::T;
discountRewardCoeff::AbstractFloat=0.9) where {T<:Number} discountRewardCoeff::AbstractFloat=0.9) where {T<:Number}
# [WORKING] store best trajectory # [WORKING] store best trajectory
isLeafNodeTerminalState = true fullTrajectoryReward = 0
isLeafNodeTerminalState = node.isterminal
terminalStateReward = node.reward terminalStateReward = node.reward
while !isroot(node) while !isroot(node)
# Update the statistics of the current node based on the result of the playout # Update the statistics of the current node based on the result of the playout
@@ -246,6 +247,7 @@ function backpropagate(node::MCTSNode, simTrajectoryReward::T;
simTrajectoryReward *= discountRewardCoeff # discount because future reward is uncertain simTrajectoryReward *= discountRewardCoeff # discount because future reward is uncertain
node = node.parent node = node.parent
end end
#XXX should I discount reward for fullTrajectoryReward calculation?
end end