update

2024-05-12 13:30:04 +07:00
parent 898fa46af0
commit 8431258f1c
2 changed files with 19 additions and 43 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -1,6 +1,6 @@
 module interface
    
-export addNewMessage, conversation, decisionMaker, progressValueEstimator, reflector
+export addNewMessage, conversation, decisionMaker, evaluator, reflector
      # isterminal,

 using JSON3, DataStructures, Dates, UUIDs, HTTP, Random, MQTTClient, PrettyPrinting
@@ -264,7 +264,7 @@ julia>

 # Signature
 """
-function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where {T1<:agent, T2<:AbstractDict}
+function evaluator(a::T1, state::T2)::Tuple{String, Integer} where {T1<:agent, T2<:AbstractDict}
  
  _prompt = 
  """
@@ -279,7 +279,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
  analysis in detail. Focus on the latest thought, action, and observation. Incomplete trajectories 
  can be correct if the thoughts and actions so far are correct, even if the answer is not found 
  yet. Do not generate additional thoughts or actions. Then ending with the correctness score s 
-  where s is an integer from 1 to 10.
+  where s is an integer from 0 to 10.

  You should only respond in JSON format as describe below:
  {"evaluation": "your evaluation", "score": "your evaluation score"}
@@ -295,7 +295,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
  }
  {"evaluation": "This trajectory is correct as it is reasonable to check an inventory for info provided in the question.
                                    It is also better to have simple searches corresponding to a single entity, making this the best action.",
-    "score": 10
+    "score": 7
  }

  {
@@ -309,7 +309,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
  }
  {"evaluation": "This trajectory is incorrect as my search term should be related to a 4-colors pen with a pencil in it,
                  not a pen and a pencil seperately. A better search term should have been a 4-colors pen with a pencil, all-in-one.",
-    "score": 2
+    "score": 3
  }

  Let's begin!:
@@ -329,7 +329,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where

  msgMeta = GeneralUtils.generate_msgMeta(
    a.config[:externalservice][:text2textinstruct][:mqtttopic],
-    senderName= "progressValueEstimator",
+    senderName= "evaluator",
    senderId= a.id,
    receiverName= "text2textinstruct",
    mqttBroker= a.config[:mqttServerInfo][:broker],
@@ -374,7 +374,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
      println("")
    end
  end
-  error("progressValueEstimator failed to generate an evaluation")
+  error("evaluator failed to generate an evaluation")
 end


@@ -640,7 +640,7 @@ function conversation(a::T, userinput::Dict) where {T<:agent}
          :question=> userinput[:text],
          )  
        )
-      bestplan = runMCTS(a, initialState, decisionMaker, progressValueEstimator, reflector,
+      bestplan = runMCTS(a, initialState, decisionMaker, evaluator, reflector,
                         2, 3, 4, 1.0)
      error("---> bestplan")

--- a/src/mcts.jl
+++ b/src/mcts.jl
@@ -99,27 +99,6 @@ function UCTselect(node::MCTSNode, w::T)::MCTSNode where {T<:AbstractFloat}

  return selectedNode
 end
-# function UCTselect(node::MCTSNode, w::T)::MCTSNode where {T<:AbstractFloat}
-#     max_uct = -Inf
-#     selectedNode = nothing
-
-#     for (childState, childNode) in node.children
-#       weightedterm = 
-#       if node.visits == 0 || childNode.visits == 0  # node.visits == 0 makes sqrt() error
-#         0
-#       else
-#         w * sqrt(log(node.visits) / childNode.visits)
-#       end
-#       uctValue = childNode.statevalue + weightedterm
-                  
-#       if uctValue > max_uct
-#           max_uct = uctValue
-#           selectedNode = childNode
-#       end
-#     end
-
-#     return selectedNode
-# end


 """ Expand selected node 
@@ -133,7 +112,7 @@ end
    a state of a game. Can be a Dict or something else.
  - `decisionMaker::Function`
    a function that output Thought and Action
-  - `progressValueEstimator::Function`
+  - `evaluator::Function`
    a function that output trajectory progress score
  
 # Return
@@ -147,15 +126,13 @@ julia>
  [] update docstring
  [] try loop should limit to 3 times. if not succeed, skip
  [] newNodeKey ∉ keys(node.children). New state may have semantic vector close enought to one of existing child state. Which can be assume that they are the same state semantically-wise. 
-  [WORKING] store feedback -> state -> agent. 
-    But 1). how should i store state in agent?
-        2). how should I retrieve and use feedback?
+  [x] store feedback -> state -> agent.


 # Signature
 """
 function expand(a::T1, node::MCTSNode, decisionMaker::Function, 
-                progressValueEstimator::Function, reflector::Function; n::Integer=3) where {T1<:agent}
+                evaluator::Function, reflector::Function; n::Integer=3) where {T1<:agent}

  nthSample = 0
  while true
@@ -168,8 +145,8 @@ function expand(a::T1, node::MCTSNode, decisionMaker::Function,
      newNodeKey, newstate, reward, isterminalstate = 
                                      MCTStransition(a, node.state, thoughtDict)
      
-      # add progressValueEstimator
-      stateevaluation, progressvalue = progressValueEstimator(a, newstate)
+      # add evaluator
+      stateevaluation, progressvalue = evaluator(a, newstate)

      if reward < 0
        pprint(newstate[:thoughtHistory])
@@ -221,7 +198,7 @@ julia>

 # Signature
 """
-function simulate(a::T, node::MCTSNode, decisionMaker::Function, progressValueEstimator::Function, 
+function simulate(a::T, node::MCTSNode, decisionMaker::Function, evaluator::Function, 
  reflector::Function; maxDepth::Integer=3, n::Integer=3)::Number where {T<:agent}

  simTrajectoryReward = 0.0
@@ -231,7 +208,7 @@ function simulate(a::T, node::MCTSNode, decisionMaker::Function, progressValueEs
    if node.isterminal
      break
    else
-      expand(a, node, decisionMaker, progressValueEstimator, reflector; n=n)
+      expand(a, node, decisionMaker, evaluator, reflector; n=n)
      node = selectChildNode(node)
    end
  end
@@ -266,7 +243,6 @@ function backpropagate(node::MCTSNode, simTrajectoryReward::T;
    simTrajectoryReward *= discountRewardCoeff  # discount because future reward is uncertain
    node = node.parent
  end
-  #XXX should I discount reward for fullTrajectoryReward calculation?
 end


@@ -451,7 +427,7 @@ isroot(node::MCTSNode)::Bool = node.nodekey == "root" ? true : false
    initial state
  - `decisionMaker::Function`
    decide what action to take
-  - `progressValueEstimator::Function`
+  - `evaluator::Function`
    assess the value of the state
  - `reflector::Function`
    generate lesson from trajectory and reward
@@ -483,7 +459,7 @@ function runMCTS(
  a::T1, 
  initialState, 
  decisionMaker::Function, 
-  progressValueEstimator::Function, 
+  evaluator::Function, 
  reflector::Function,
  n::Integer, 
  maxDepth::Integer, 
@@ -505,9 +481,9 @@ function runMCTS(
      # do nothing then go directly to backpropagation
      backpropagate(leafNode, node.reward)
    else
-      expand(a, node, decisionMaker, progressValueEstimator, reflector; n=n)
+      expand(a, node, decisionMaker, evaluator, reflector; n=n)
      leafNode = selectChildNode(node)
-      simTrajectoryReward = simulate(a, leafNode, decisionMaker, progressValueEstimator, 
+      simTrajectoryReward = simulate(a, leafNode, decisionMaker, evaluator, 
                                    reflector; maxDepth=maxDepth, n=n)
      backpropagate(leafNode, simTrajectoryReward)
    end