update

2025-03-18 21:23:09 +07:00
parent 693cbfd82d
commit ee5f8a8a52
2 changed files with 92 additions and 84 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -9,7 +9,6 @@ using ..type, ..mcts, ..util
 # ---------------------------------------------- 100 --------------------------------------------- #


-
 """ Search the best action to take for a given state and task 

 # Arguments
@@ -63,14 +62,15 @@ function runMCTS(
  explorationweight::Number=1.0,
  earlystop::Union{Function,Nothing}=nothing,
  saveSimulatedNode::Bool=false,
-  multithread=false
-  )::NamedTuple{(:root, :bestNextState, :bestFinalState),Tuple{MCTSNode,T,T}} where {T<:Any}
+  multithread=false,
+  )::NamedTuple{(:root, :bestNextState, :bestTerminalState, :highValueStateList),
+                Tuple{MCTSNode,T,T,Vector{Any}}} where {T<:Any}

  root = MCTSNode("root", initialstate, 0, 0, 0, 0, false, nothing, Dict{String,MCTSNode}(),
                  Dict{Symbol,Any}())

-  # [WORKING] storage for holding all high reward terminal nodes
-  highStateValueNode = Channel{Any}(100)
+  # storage for holding all high reward terminal nodes
+  highValueState = Channel{Any}(100)

  for nth in 1:maxiterations
    node = root
@@ -99,7 +99,7 @@ function runMCTS(
                  horizontalSampleSimulationPhase=horizontalSampleSimulationPhase,
                  saveSimulatedNode=saveSimulatedNode,
                  multithread=multithread,
-                  highStateValueNode=highStateValueNode,
+                  highValueState=highValueState,
                  )
        end
      else
@@ -109,7 +109,7 @@ function runMCTS(
            horizontalSampleSimulationPhase=horizontalSampleSimulationPhase,
            saveSimulatedNode=saveSimulatedNode,
            multithread=multithread,
-            highStateValueNode=highStateValueNode)
+            highValueState=highValueState)
        end
      end
    end
@@ -120,15 +120,23 @@ function runMCTS(
    end
  end

-  # select the best next state and the best final state
+  # select the best next state and the best terminal state along the best trajectory
  bestNextState = selectBestNextNode(root)
-  besttrajectory = selectBestTrajectoryNode(root)
+  bestTerminalState = selectBestTrajectoryNode(root)

-  #[WORKING] compare all high value answer then select the best one
+  # take all high value state from highValueState channel and put it in a list
+  highValueStateList = Any[]
+  while !isempty(highValueState)
+    push!(highValueStateList, take!(highValueState))
+  end

-  return (root=root, bestNextState=bestNextState.state, bestFinalState=besttrajectory.state)
+  return (root=root, 
+          bestNextState=bestNextState.state, 
+          bestTerminalState=bestTerminalState.state,
+          highValueStateList=highValueStateList)
 end

+
 """ Search the best action to take for a given state and task 

 # Arguments
@@ -156,18 +164,18 @@ function simulateThenBackpropagate(node::MCTSNode, transition::Function, transit
                maxSimulationDepth::Integer=3, horizontalSampleSimulationPhase::Integer=3,
                saveSimulatedNode::Bool=false,
                multithread=false,
-                highStateValueNode=Union{Nothing,Any}=nothing)
+                highValueState=Union{Nothing,Any}=nothing)
  simTrajectoryReward, terminalstate = 
    simulate(node, transition, transitionargs;
            maxSimulationDepth=maxSimulationDepth, 
            horizontalSample=horizontalSampleSimulationPhase,
            multithread=multithread)
-  # if a node has state value >= 8, store it in highStateValueNode
-  if highStateValueNode !== nothing && 
+  # if a node has state value >= 8, store it in highValueState
+  if highValueState !== nothing && 
    terminalstate !== nothing &&
    terminalstate[:reward] >= 8

-    put!(highStateValueNode, deepcopy(terminalstate))
+    put!(highValueState, deepcopy(terminalstate))
  end

  backpropagate(node, simTrajectoryReward)