update

2025-03-14 21:57:59 +07:00
parent 7e160f2031
commit 2eff443f70
1 changed files with 40 additions and 15 deletions
--- a/src/mcts.jl
+++ b/src/mcts.jl
@@ -211,7 +211,6 @@ function expand(node::MCTSNode,transition::Function, transitionargs::NamedTuple;
  end
 end

-
 function _expand(node::MCTSNode,transition::Function, transitionargs::NamedTuple)
    result = transition(node.state, transitionargs)
    newNodeKey::AbstractString = result[:newNodeKey]
@@ -244,17 +243,24 @@ end
    Arguments for everything the user will use within transition().
  - `maxSimulationDepth::Integer`
    maximum depth level MCTS goes vertically during simulation.
-  - horizontalSample::Integer
+  - `horizontalSample::Integer`
    Total number to sample from the current node (i.e. expand new node horizontally)

+# Keyword Arguments
+  - `multithread::Bool`
+    Whether to run expansion in parallel using multiple threads. Defaults to false.
+    
 # Return
-  - `::NamedTuple{(:simTrajectoryReward, :terminalstate), Tuple{Number, Union{Dict{Symbol, Any}, Nothing}}}`
+  - `simTrajectoryReward::Number`
+    Cumulative reward collected along the simulation trajectory
+  - `terminalstate::Union{Dict{Symbol, Any}, Nothing}`
+    Final state if terminal state reached, nothing otherwise

 # Signature
 """
 function simulate(node::MCTSNode, transition::Function, transitionargs::NamedTuple; 
-  maxSimulationDepth::Integer=3, horizontalSample::Integer=3, multithread=false)
-# )::NamedTuple{(:simTrajectoryReward, :terminalstate), Tuple{Number, Union{Dict{Symbol, Any}, Nothing}}}
+  maxSimulationDepth::Integer=3, horizontalSample::Integer=3, multithread=false
+)::NamedTuple{(:simTrajectoryReward, :terminalstate), Tuple{<:Number, Union{Dict{Symbol, Any}, Nothing}}}

  simTrajectoryReward = 0.0
  terminalstate = nothing
@@ -275,38 +281,54 @@ function simulate(node::MCTSNode, transition::Function, transitionargs::NamedTup
  return (simTrajectoryReward=simTrajectoryReward, terminalstate=terminalstate)
 end

-
-"""
+""" Make new state

 # Arguments
+  - `currentstate::T1`
+    Current state dictionary containing thought history and metadata
+  - `thoughtDict::T4`
+    Dictionary containing new thought and action
+  - `response::T2`
+    Response string from the environment
+  - `select::Union{T3, Nothing}`
+    Selection value or nothing
+  - `reward::T3`
+    Reward value for this state
+  - `isterminal::Bool`
+    Whether this state is terminal

 # Return
+  - `Tuple{String, Dict{Symbol, <:Any}}`
+    A tuple containing:
+    - A unique node key string
+    - A new state dictionary with updated thought history and metadata

 # Example
 ```jldoctest
 julia> 
 ```

-# TODO
-  - [] update docstring
-  - [x] implement the function
-
 # Signature
 """
 function makeNewState(currentstate::T1, thoughtDict::T4, response::T2, select::Union{T3, Nothing}, 
  reward::T3, isterminal::Bool
  )::Tuple{String, Dict{Symbol, <:Any}} where {T1<:AbstractDict, T2<:AbstractString, T3<:Number, T4<:AbstractDict}

+  # Find the latest thought key and index from current state's thought history
  currentstate_latestThoughtKey, currentstate_latestThoughtIndice = 
      GeneralUtils.findHighestIndexKey(currentstate[:thoughtHistory], "thought")
+  # Calculate next index for new thought/action
  currentstate_nextIndice = 
              currentstate_latestThoughtKey == :NA ? 1 : currentstate_latestThoughtIndice + 1
+  # Create new keys for thought and action based on next index
  currentstate_latestThoughtKey = Symbol("thought_$currentstate_nextIndice")
  latestActionKey = Symbol("action_$currentstate_nextIndice")

+  # Find the latest thought index from input thought dictionary
  _, thoughtDict_latestThoughtIndice = 
      GeneralUtils.findHighestIndexKey(thoughtDict, "thought")
  
+  # Determine thought and action keys from thought dictionary
  thoughtDict_latestThoughtKey, thoughtDict_latestActionKey =
  if thoughtDict_latestThoughtIndice == -1
    (:thought, :action)
@@ -317,17 +339,22 @@ function makeNewState(currentstate::T1, thoughtDict::T4, response::T2, select::U
    )
  end

-  # add Thought, action, observation to thoughtHistory
+  # Create new state by deep copying current state
  newstate = deepcopy(currentstate)
+  # Update thought history with new thought
  newstate[:thoughtHistory][currentstate_latestThoughtKey] = 
      thoughtDict[thoughtDict_latestThoughtKey]
+  # Update thought history with new action
  newstate[:thoughtHistory][latestActionKey] = thoughtDict[thoughtDict_latestActionKey]
+  # Create and add new observation to thought history
  newObservationKey = Symbol("observation_$(currentstate_nextIndice)")
  newstate[:thoughtHistory][newObservationKey] = response
+  # Update state metadata
  newstate[:reward] = reward
  newstate[:select] = select
  newstate[:isterminal] = isterminal

+  # Generate unique ID for new node
  newNodeKey = GeneralUtils.uuid4snakecase()

  return (newNodeKey, newstate)
@@ -394,8 +421,6 @@ end



-
-