update
This commit is contained in:
@@ -237,7 +237,8 @@ julia>
|
||||
function backpropagate(node::MCTSNode, simTrajectoryReward::T;
|
||||
discountRewardCoeff::AbstractFloat=0.9) where {T<:Number}
|
||||
# [WORKING] store best trajectory
|
||||
isLeafNodeTerminalState = true
|
||||
fullTrajectoryReward = 0
|
||||
isLeafNodeTerminalState = node.isterminal
|
||||
terminalStateReward = node.reward
|
||||
while !isroot(node)
|
||||
# Update the statistics of the current node based on the result of the playout
|
||||
@@ -246,6 +247,7 @@ function backpropagate(node::MCTSNode, simTrajectoryReward::T;
|
||||
simTrajectoryReward *= discountRewardCoeff # discount because future reward is uncertain
|
||||
node = node.parent
|
||||
end
|
||||
#XXX should I discount reward for fullTrajectoryReward calculation?
|
||||
end
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user