update

2024-10-14 09:13:46 +07:00
parent 96bee0341a
commit 351bccc059
4 changed files with 169 additions and 93 deletions
--- a/src/LLMMCTS.jl
+++ b/src/LLMMCTS.jl
@@ -22,6 +22,16 @@ module LLMMCTS
  
 # ---------------------------------------------- 100 --------------------------------------------- #

+""" version 0.0.2
+    Todo:
+        - [] 
+
+    Change from version: 0.0.1
+        - 
+    
+    All features
+        
+"""



--- a/src/interface.jl
+++ b/src/interface.jl
@@ -45,13 +45,14 @@ function runMCTS(
  transition::Function,
  transitionargs::NamedTuple,
  ;
-  totalsample::Integer=3, 
-  maxdepth::Integer=3, 
-  maxiterations::Integer=10, 
+  totalsample::Integer=3,
+  maxdepth::Integer=3,
+  maxiterations::Integer=10,
  explorationweight::Number=1.0,
-  )::NamedTuple{(:bestNextState, :bestFinalState), Tuple{T, T}} where {T<:Any}
-  
-  root = MCTSNode("root", initialstate, 0, 0, 0, 0, false, nothing, Dict{String, MCTSNode}())
+  earlystop::Union{Function,Nothing}=nothing
+)::NamedTuple{(:bestNextState, :bestFinalState),Tuple{T,T}} where {T<:Any}
+
+  root = MCTSNode("root", initialstate, 0, 0, 0, 0, false, nothing, Dict{String,MCTSNode}())

  for nth in 1:maxiterations
    node = root
@@ -60,19 +61,20 @@ function runMCTS(
    while !isleaf(node)
      node = UCTselect(node, explorationweight)
    end
+
    if node.isterminal
      # MCTS arrive at the leaf node that is also a terminal state,
-      # do nothing then go directly to backpropagation
-      backpropagate(leafNode, node.reward)
+      # do nothing then go directly to backpropagation. It means the end of this iteration
+      backpropagate(node, node.reward)
    else
-      expand(node, transition, transitionargs; 
-            totalsample=totalsample)
+      expand(node, transition, transitionargs;
+        totalsample=totalsample)
      leafNode = selectChildNode(node)
-      simTrajectoryReward, terminalstate = simulate(leafNode, transition, transitionargs; 
-                                                  maxdepth=maxdepth, totalsample=totalsample)
-      if terminalstate !== nothing  #XXX not sure why I need this 
-        terminalstate[:totalTrajectoryReward] = simTrajectoryReward
-      end
+      simTrajectoryReward, terminalstate = simulate(leafNode, transition, transitionargs;
+        maxdepth=maxdepth, totalsample=totalsample)
+      # if terminalstate !== nothing  #XXX not sure why I need this 
+      #   terminalstate[:totalTrajectoryReward] = simTrajectoryReward
+      # end

      #[] write best state to file if it has higher simTrajectoryReward. Use to improve evaluation
      # open("trajectory.json", "w") do io
@@ -81,6 +83,13 @@ function runMCTS(

      backpropagate(leafNode, simTrajectoryReward)
    end
+
+    # stop if the tree early stop condition is met
+    if typeof(earlystop) <: Function
+      result = earlystop(node.state)
+
+      break
+    end
  end

  bestNextState = selectBestNextNode(root)
@@ -90,6 +99,56 @@ function runMCTS(
 end


+# function runMCTS(
+#   initialstate::T,
+#   transition::Function,
+#   transitionargs::NamedTuple,
+#   ;
+#   totalsample::Integer=3,
+#   maxdepth::Integer=3,
+#   maxiterations::Integer=10,
+#   explorationweight::Number=1.0,
+# )::NamedTuple{(:bestNextState, :bestFinalState),Tuple{T,T}} where {T<:Any}
+
+#   root = MCTSNode("root", initialstate, 0, 0, 0, 0, false, nothing, Dict{String,MCTSNode}())
+
+#   for nth in 1:maxiterations
+#     node = root
+#     node.visits += 1
+
+#     while !isleaf(node)
+#       node = UCTselect(node, explorationweight)
+#     end
+#     if node.isterminal
+#       # MCTS arrive at the leaf node that is also a terminal state,
+#       # do nothing then go directly to backpropagation. It means the end of this iteration
+#       backpropagate(leafNode, node.reward)
+#     else
+#       expand(node, transition, transitionargs;
+#         totalsample=totalsample)
+#       leafNode = selectChildNode(node)
+#       simTrajectoryReward, terminalstate = simulate(leafNode, transition, transitionargs;
+#         maxdepth=maxdepth, totalsample=totalsample)
+#       # if terminalstate !== nothing  #XXX not sure why I need this 
+#       #   terminalstate[:totalTrajectoryReward] = simTrajectoryReward
+#       # end
+
+#       #[] write best state to file if it has higher simTrajectoryReward. Use to improve evaluation
+#       # open("trajectory.json", "w") do io
+#       #   JSON3.pretty(io, terminalstate)
+#       # end
+
+#       backpropagate(leafNode, simTrajectoryReward)
+#     end
+#   end
+
+#   bestNextState = selectBestNextNode(root)
+#   besttrajectory = selectBestTrajectoryNode(root)
+
+#   return (bestNextState=bestNextState.state, bestFinalState=besttrajectory.state)
+# end
+
+



--- a/src/type.jl
+++ b/src/type.jl
@@ -44,8 +44,8 @@ mutable struct MCTSNode{T1<:AbstractDict, T2<:AbstractString}
  state::T1
  visits::Integer
  progressvalue::Number # estimate value by LLM's reasoning
-  statevalue::Number  # store discounted commulative reward (gather from its child node)
-  reward::Number  # this node's own reward
+  statevalue::Number  # current state value. store the node's immediate reward and all future discounted rewards (gather from its child node)
+  reward::Number  # this node's immediate reward
  isterminal::Bool
  parent::Union{MCTSNode, Nothing} 
  children::Dict{String, MCTSNode}