diff --git a/src/mcts.jl b/src/mcts.jl index 7bdd1d9..95c1d8d 100644 --- a/src/mcts.jl +++ b/src/mcts.jl @@ -237,7 +237,8 @@ julia> function backpropagate(node::MCTSNode, simTrajectoryReward::T; discountRewardCoeff::AbstractFloat=0.9) where {T<:Number} # [WORKING] store best trajectory - isLeafNodeTerminalState = true + fullTrajectoryReward = 0 + isLeafNodeTerminalState = node.isterminal terminalStateReward = node.reward while !isroot(node) # Update the statistics of the current node based on the result of the playout @@ -246,6 +247,7 @@ function backpropagate(node::MCTSNode, simTrajectoryReward::T; simTrajectoryReward *= discountRewardCoeff # discount because future reward is uncertain node = node.parent end + #XXX should I discount reward for fullTrajectoryReward calculation? end