update

2024-10-14 09:13:46 +07:00
parent 96bee0341a
commit 351bccc059
4 changed files with 169 additions and 93 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -45,13 +45,14 @@ function runMCTS(
  transition::Function,
  transitionargs::NamedTuple,
  ;
-  totalsample::Integer=3, 
-  maxdepth::Integer=3, 
-  maxiterations::Integer=10, 
+  totalsample::Integer=3,
+  maxdepth::Integer=3,
+  maxiterations::Integer=10,
  explorationweight::Number=1.0,
-  )::NamedTuple{(:bestNextState, :bestFinalState), Tuple{T, T}} where {T<:Any}
-  
-  root = MCTSNode("root", initialstate, 0, 0, 0, 0, false, nothing, Dict{String, MCTSNode}())
+  earlystop::Union{Function,Nothing}=nothing
+)::NamedTuple{(:bestNextState, :bestFinalState),Tuple{T,T}} where {T<:Any}
+
+  root = MCTSNode("root", initialstate, 0, 0, 0, 0, false, nothing, Dict{String,MCTSNode}())

  for nth in 1:maxiterations
    node = root
@@ -60,19 +61,20 @@ function runMCTS(
    while !isleaf(node)
      node = UCTselect(node, explorationweight)
    end
+
    if node.isterminal
      # MCTS arrive at the leaf node that is also a terminal state,
-      # do nothing then go directly to backpropagation
-      backpropagate(leafNode, node.reward)
+      # do nothing then go directly to backpropagation. It means the end of this iteration
+      backpropagate(node, node.reward)
    else
-      expand(node, transition, transitionargs; 
-            totalsample=totalsample)
+      expand(node, transition, transitionargs;
+        totalsample=totalsample)
      leafNode = selectChildNode(node)
-      simTrajectoryReward, terminalstate = simulate(leafNode, transition, transitionargs; 
-                                                  maxdepth=maxdepth, totalsample=totalsample)
-      if terminalstate !== nothing  #XXX not sure why I need this 
-        terminalstate[:totalTrajectoryReward] = simTrajectoryReward
-      end
+      simTrajectoryReward, terminalstate = simulate(leafNode, transition, transitionargs;
+        maxdepth=maxdepth, totalsample=totalsample)
+      # if terminalstate !== nothing  #XXX not sure why I need this 
+      #   terminalstate[:totalTrajectoryReward] = simTrajectoryReward
+      # end

      #[] write best state to file if it has higher simTrajectoryReward. Use to improve evaluation
      # open("trajectory.json", "w") do io
@@ -81,6 +83,13 @@ function runMCTS(

      backpropagate(leafNode, simTrajectoryReward)
    end
+
+    # stop if the tree early stop condition is met
+    if typeof(earlystop) <: Function
+      result = earlystop(node.state)
+
+      break
+    end
  end

  bestNextState = selectBestNextNode(root)
@@ -90,6 +99,56 @@ function runMCTS(
 end


+# function runMCTS(
+#   initialstate::T,
+#   transition::Function,
+#   transitionargs::NamedTuple,
+#   ;
+#   totalsample::Integer=3,
+#   maxdepth::Integer=3,
+#   maxiterations::Integer=10,
+#   explorationweight::Number=1.0,
+# )::NamedTuple{(:bestNextState, :bestFinalState),Tuple{T,T}} where {T<:Any}
+
+#   root = MCTSNode("root", initialstate, 0, 0, 0, 0, false, nothing, Dict{String,MCTSNode}())
+
+#   for nth in 1:maxiterations
+#     node = root
+#     node.visits += 1
+
+#     while !isleaf(node)
+#       node = UCTselect(node, explorationweight)
+#     end
+#     if node.isterminal
+#       # MCTS arrive at the leaf node that is also a terminal state,
+#       # do nothing then go directly to backpropagation. It means the end of this iteration
+#       backpropagate(leafNode, node.reward)
+#     else
+#       expand(node, transition, transitionargs;
+#         totalsample=totalsample)
+#       leafNode = selectChildNode(node)
+#       simTrajectoryReward, terminalstate = simulate(leafNode, transition, transitionargs;
+#         maxdepth=maxdepth, totalsample=totalsample)
+#       # if terminalstate !== nothing  #XXX not sure why I need this 
+#       #   terminalstate[:totalTrajectoryReward] = simTrajectoryReward
+#       # end
+
+#       #[] write best state to file if it has higher simTrajectoryReward. Use to improve evaluation
+#       # open("trajectory.json", "w") do io
+#       #   JSON3.pretty(io, terminalstate)
+#       # end
+
+#       backpropagate(leafNode, simTrajectoryReward)
+#     end
+#   end
+
+#   bestNextState = selectBestNextNode(root)
+#   besttrajectory = selectBestTrajectoryNode(root)
+
+#   return (bestNextState=bestNextState.state, bestFinalState=besttrajectory.state)
+# end
+
+