update

2025-03-07 13:33:38 +07:00
parent 6920be2334
commit 9add88b145
10 changed files with 28 additions and 1401 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -2,6 +2,7 @@ module interface

 export runMCTS

+using Base.Threads
 using ..type, ..mcts, ..util


@@ -45,7 +46,8 @@ function runMCTS(
  transition::Function,
  transitionargs::NamedTuple,
  ;
-  totalsample::Integer=3,
+  horizontalSampleExpansionPhase::Integer=3,
+  horizontalSampleSimulationPhase::Integer=3,
  maxdepth::Integer=3,
  maxiterations::Integer=10,
  explorationweight::Number=1.0,
@@ -69,10 +71,20 @@ function runMCTS(
      backpropagate(node, node.reward)
    else
      _ = expand(node, transition, transitionargs;
-        totalsample=totalsample)
+                horizontalSample=horizontalSampleExpansionPhase)
+      #[WORKING] make simulation parallel, leafNodes must be newly expanded nodes
+      
      leafNode = selectChildNode(node)
-      simTrajectoryReward, terminalstate = simulate(leafNode, transition, transitionargs;
-        maxdepth=maxdepth, totalsample=totalsample)
+      
+      
+
+
+      # outputch = Channel(8)
+
+      #[WORKING] test whether multiple spawn retain result leafNode's child node
+
+      @spawn simulate(outputch, leafNode, transition, transitionargs;
+        maxdepth=maxdepth, horizontalSample=horizontalSampleSimulationPhase)
      # if terminalstate !== nothing  #XXX not sure why I need this 
      #   terminalstate[:totalTrajectoryReward] = simTrajectoryReward
      # end
@@ -82,10 +94,9 @@ function runMCTS(
      #   JSON3.pretty(io, terminalstate)
      # end

-      backpropagate(leafNode, simTrajectoryReward)
+      # result = take!(outputch)

-      # delete all child node, no need for child node that was created during simulation
-      leafNode.children = Dict{String,MCTSNode}() 
+      backpropagate(leafNode, simTrajectoryReward)
    end

    # stop if the early stop condition is met
--- a/src/mcts.jl
+++ b/src/mcts.jl
@@ -231,13 +231,13 @@ end
 #   end
 # end
 function expand(node::MCTSNode,transition::Function, transitionargs::NamedTuple; 
-          totalsample::Integer=3)
+                horizontalSample::Integer=3)

  nthSample = 0
  listOfNewNodeId = []
  while true
    nthSample += 1
-    if nthSample <= totalsample
+    if nthSample <= horizontalSample
      result = transition(node.state, transitionargs)
      newNodeKey::AbstractString = result[:newNodeKey]
      newstate::AbstractDict = result[:newstate]
@@ -252,9 +252,9 @@ function expand(node::MCTSNode,transition::Function, transitionargs::NamedTuple;
      """
      if newNodeKey ∉ keys(node.children)
        push!(listOfNewNodeId, newNodeKey)
-        node.children[newNodeKey] = 
-          MCTSNode(newNodeKey, newstate, 0, progressvalue, 0, newstate[:reward], 
+        newNode = MCTSNode(newNodeKey, newstate, 0, progressvalue, 0, newstate[:reward], 
            newstate[:isterminal], node, Dict{String, MCTSNode}(), Dict{Symbol, Any}())
+        node.children[newNodeKey] = newNode
      end
    else
      return listOfNewNodeId
@@ -274,7 +274,7 @@ end
    Arguments for everything the user will use within transition().
  - `maxdepth::Integer`
    maximum depth level MCTS goes vertically.
-  - totalsample::Integer
+  - horizontalSample::Integer
    Total number to sample from the current node (i.e. expand new node horizontally)
    
 # Return
@@ -282,8 +282,8 @@ end

 # Signature
 """
-function simulate(node::MCTSNode, transition::Function, transitionargs::NamedTuple; 
-  maxdepth::Integer=3, totalsample::Integer=3
+function simulate(outputchannel::Channel, node::MCTSNode, transition::Function, transitionargs::NamedTuple; 
+  maxdepth::Integer=3, horizontalSample::Integer=3
 )::NamedTuple{(:simTrajectoryReward, :terminalstate), Tuple{Number, Union{Dict{Symbol, Any}, Nothing}}}

  simTrajectoryReward = 0.0
@@ -297,12 +297,13 @@ function simulate(node::MCTSNode, transition::Function, transitionargs::NamedTup
      break
    else
      _ = expand(node, transition, transitionargs;
-                              totalsample=totalsample)
+                horizontalSample=horizontalSample)
      node = selectChildNode(node)
    end 
  end

-  return (simTrajectoryReward=simTrajectoryReward, terminalstate=terminalstate)
+  put!(outputchannel, (simTrajectoryReward=simTrajectoryReward, terminalstate=terminalstate))
+  # return (simTrajectoryReward=simTrajectoryReward, terminalstate=terminalstate)
 end