update

2024-05-07 21:24:51 +07:00
parent 19d6746749
commit 68c0e7631a
2 changed files with 9 additions and 1 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -440,6 +440,7 @@ function conversation(a::T, userinput::Dict) where {T<:agent}
        :select=> nothing,
        :reward=> 0,
        :isterminal=> false,
+        :feedback=> nothing,
        :thoughtHistory=> OrderedDict{Symbol, Any}( # contain question, thought_1, action_1, observation_1, thought_2, ...
          # :recap=>,
          :question=> userinput[:text],
--- a/src/mcts.jl
+++ b/src/mcts.jl
@@ -126,7 +126,10 @@ julia>
  [] update docstring
  [] try loop should limit to 3 times. if not succeed, skip
  [] newNodeKey ∉ keys(node.children). New state may have semantic vector close enought to one of existing child state. Which can be assume that they are the same state semantically-wise. 
-  [WORKING] add reflector()
+  [WORKING] store feedback -> state -> agent. 
+    But 1). how should i store state in agent?
+        2). how should I retrieve and use feedback?
+

 # Signature
 """
@@ -144,6 +147,10 @@ function expand(a::T1, node::MCTSNode, decisionMaker::Function,
      # add progressValueEstimator
      stateevaluation, statevalue = progressValueEstimator(a, newstate)

+      if reward < 0
+        newstate.feedback = stateevaluation
+      end
+
      if newNodeKey ∉ keys(node.children)
        node.children[newNodeKey] = MCTSNode(newNodeKey, newstate, 0, stateevaluation, statevalue, 
                            reward, isterminalstate, node, Dict{String, MCTSNode}())