update
This commit is contained in:
@@ -237,7 +237,8 @@ julia>
|
|||||||
function backpropagate(node::MCTSNode, simTrajectoryReward::T;
|
function backpropagate(node::MCTSNode, simTrajectoryReward::T;
|
||||||
discountRewardCoeff::AbstractFloat=0.9) where {T<:Number}
|
discountRewardCoeff::AbstractFloat=0.9) where {T<:Number}
|
||||||
# [WORKING] store best trajectory
|
# [WORKING] store best trajectory
|
||||||
isLeafNodeTerminalState = true
|
fullTrajectoryReward = 0
|
||||||
|
isLeafNodeTerminalState = node.isterminal
|
||||||
terminalStateReward = node.reward
|
terminalStateReward = node.reward
|
||||||
while !isroot(node)
|
while !isroot(node)
|
||||||
# Update the statistics of the current node based on the result of the playout
|
# Update the statistics of the current node based on the result of the playout
|
||||||
@@ -246,6 +247,7 @@ function backpropagate(node::MCTSNode, simTrajectoryReward::T;
|
|||||||
simTrajectoryReward *= discountRewardCoeff # discount because future reward is uncertain
|
simTrajectoryReward *= discountRewardCoeff # discount because future reward is uncertain
|
||||||
node = node.parent
|
node = node.parent
|
||||||
end
|
end
|
||||||
|
#XXX should I discount reward for fullTrajectoryReward calculation?
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user