update

2024-04-22 17:41:52 +07:00
parent ee1446b1e2
commit 1962035990
3 changed files with 86 additions and 50 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -108,9 +108,12 @@ function conversation(a::T, userinput::Dict) where {T<:agent}
  
    
    else    #[WORKING] new thinking 
-      
-
-      initialState = 0
+      initialState = Dict(
+        :info=> Dict(),           # keyword info
+        :thought=> nothing,
+        :action=> nothing,
+        :observation=> nothing,
+      )
      bestplan = runMCTS(initialState, decisionMaker, stateValueEstimator, reflector,
                  3, 10, 1000, 1.0)
      error("---> bestplan")
--- a/src/mcts.jl
+++ b/src/mcts.jl
@@ -5,17 +5,22 @@

 module mcts
  
-export runMCTS
+export MCTSNode, runMCTS, decisionMaker, stateValueEstimator, reflector

 using Dates, UUIDs, DataStructures, JSON3, Random
 using GeneralUtils

 # ---------------------------------------------- 100 --------------------------------------------- #

-"""
+""" a node for MCTS search tree

  Arguments\n
  -----
+    state::T
+      Represent a state of a game. Can be a Dict or something else.
+    visits::Integer 
+      number of time the game visits this state
+    stateValue::Float64
    
  Return\n
  -----
@@ -29,15 +34,15 @@ using GeneralUtils
  TODO\n
  -----
    [] update docstring
-    [] implement the function
+    [DONE] implement the function

  Signature\n
  -----
 """
 struct MCTSNode{T}
  state::T
-  visits::Int
-  stateValue::Float64
+  visits::Integer
+  stateValue::AbstractFloat
  children::Dict{T, MCTSNode}
 end

@@ -107,20 +112,9 @@ end
 function expand(node::MCTSNode, state::T, decisionMaker::Function, stateValueEstimator::Function;
                n::Integer=3) where {T<:Any}

-  actions = []
-
  # sampling action from decisionMaker
-  # for nth in 1:n
-
-
-  # end
-
-
-
-
-
-  for action in actions
-      newState = transition(node.state, action)  # Implement your transition function
+  for sample in 1:n
+    newState = transition(node.state, action)  #[] Implement your transition function
    if newState ∉ keys(node.children)
        node.children[newState] = MCTSNode(newState, 0, 0.0, Dict{T, MCTSNode}())
    end
@@ -145,6 +139,7 @@ end
  -----
    [] update docstring
    [] implement the function
+    [] reward only comes at terminal state

  Signature\n
  -----
@@ -152,8 +147,12 @@ end
 function simulate(state::T, max_depth::Int) where {T<:Any}
    total_reward = 0.0
    for _ in 1:max_depth
-        action = select_action(state)  # Implement your action selection function
-        state, reward = transition(state, action)  # Implement your transition function
+      #[] Implement your action selection function based on highest stateValue
+      action = select_action(state) # current state 
+      state, reward = transition(state, action)  # Implement transition function to a new state
+
+      #[] check for the terminal state
+
      total_reward += reward
    end
    return total_reward
@@ -183,6 +182,8 @@ end
 """
 function backpropagate(node::MCTSNode, reward::Float64)
    node.visits += 1
+
+    # [] there is no total_reward in the paper, buy they use stateValue
    node.total_reward += reward 
    if !isempty(node.children)
        best_child = argmax([child.total_reward / child.visits for child in values(node.children)])
@@ -216,25 +217,27 @@ function transition(state, action)

 end

-""" Check whether a node is a leaf node
+""" Check whether a node is a leaf node of a tree

  Arguments\n
  -----
+    node::MCTSNode
+      node of a tree
    
  Return\n
  -----
-    a task represent an agent
+    result::Bool
+      true if the node is a leaf node of a tree otherwise false

  Example\n
  -----
  ```jldoctest
-  julia> 
+  julia> using 
  ```

  TODO\n
  -----
    [] update docstring
-    [DONE] implement isLeaf()

  Signature\n
  -----
@@ -320,6 +323,34 @@ function reflector()

 end

+"""
+
+  Arguments\n
+  -----
+    
+  Return\n
+  -----
+
+  Example\n
+  -----
+  ```jldoctest
+  julia> 
+  ```
+
+  TODO\n
+  -----
+    [] update docstring
+    [] implement the function
+    [] implement RAG to pull similar experience
+
+  Signature\n
+  -----
+"""
+function isTerminal()
+
+end
+
+
 # ------------------------------------------------------------------------------------------------ #
 #                    Create a complete example using the defined MCTS functions                    #
 # ------------------------------------------------------------------------------------------------ #
@@ -342,6 +373,8 @@ end
    
  Return\n
  -----
+    plan::Vector{Dict}
+      best plan

  Example\n
  -----
@@ -357,20 +390,20 @@ end
  -----
 """
 function runMCTS(initialState, decisionMaker::Function, stateValueEstimator::Function, 
-                reflector::Function, totalActionSampled::Integer, maxDepth::Integer, 
+                reflector::Function, n::Integer, maxDepth::Integer, 
                maxIterations::Integer, w::Float64)
-    root = MCTSNode(initialState, 0, 0.0, Dict())
-
+  statetype = typeof(initialState)
+  root = MCTSNode(initialState, 0, 0.0, Dict{statetype, MCTSNode}())
+  error("---> runMCTS")
  for _ in 1:maxIterations
    node = root
    while !isLeaf(node)
      node = select(node, w)
    end

-      expand(node, node.state, decisionMaker, stateValueEstimator, 
-            n=n)
+    expand(node, node.state, decisionMaker, stateValueEstimator, n=n)

-      leaf_node = node.children[node.state]
+    leaf_node = node.children[node.state] # mark leaf node
    reward = simulate(leaf_node.state, maxDepth)
    backpropagate(leaf_node, reward)
  end
--- a/test/runtest.jl
+++ b/test/runtest.jl
@@ -63,7 +63,7 @@ tools=Dict( # update input format
    tools=tools,
  )

-response = YiemAgent.conversation(a, Dict(:text=> "newtopic", ) )
+response = YiemAgent.conversation(a, Dict(:text=> "hello", ) )