update

2024-04-27 17:44:55 +07:00
parent 1ead7dba67
commit db42a55e00
3 changed files with 108 additions and 48 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -1,6 +1,6 @@
 module interface
    
-export addNewMessage, conversation, decisionMaker
+export addNewMessage, conversation, decisionMaker, isterminal

 using JSON3, DataStructures, Dates, UUIDs, HTTP, Random, MQTTClient
 using GeneralUtils
@@ -73,15 +73,60 @@ function decisionMaker(a::T1, state::T2) where {T1<:agent, T2<:AbstractDict}
  $(JSON3.write(state[:storeinfo]))
  """

+  reflect = ""
+  # """
+  # You have attempted to answer the following question before and failed. The following 
+  # reflection(s) give a plan to avoid failing to answer the question in the same way you did 
+  # previously. Use them to improve your strategy of correctly answering the given question.
+  # (trajectories)
+  # """
+
+
  #[WORKING] 
  _prompt = 
  """
  You are a helpful sommelier working for a wine store. 
-    You helps users by searching wine that match the user preferences from your inventory.
+  Your goal is to reccommend the best wine from your inventory that match the user preferences.

  $customerinfo

-    You must follow the 
+  You must follow the following criteria:
+  1) Get to know what occasion the user is buying wine for
+  2) Get to know what food the user will have with wine
+  3) Get to know how much the user willing to spend
+  4) Get to know type of wine the user is looking for 
+    e.g. Red, White, Sparkling, Rose, Dessert, Fortified
+  5) Get to know what wine characteristics the user is looking for 
+    e.g. tannin, sweetness, intensity, acidity
+  6) Check your inventory for the best wine that match the user preference
+
+  You should only respond with interleaving step-by-step Thought, Action, Observation steps. 
+  Thought can reason about the current situation, and Action can be three types:
+  1) Chatbox[text], which you can use to interact with the user.
+  2) Winestock[query], which you can use to find wine in your inventory.
+  3) Finish[answer], which returns your wine reccommendation to the user.
+
+  You should only respond in JSON format as describe below:
+  {
+    "Thought_1": "reasoning",
+    "Thought_2": "reasoning",
+    ...
+    "Thought_n": "reasoning",
+    "Action_1": "action to take",
+    "Observation_1": "result of the action"
+  }
+
+  Here are some examples:
+  {
+    "Question": "I would like to buy a sedan",
+    "Thought_1": "I have many car in my inventory suitable for several usage scenarios",
+    "Thought_2": "It would be better if I know what the user intend to do with his car",
+    "Thought_3": "
+  }
+
+  $reflect
+
+  $(JSON3.write(state[:thoughtHistory]))
  """

  prompt =  formatLLMtext_llama3instruct("system", _prompt)
@@ -89,6 +134,7 @@ function decisionMaker(a::T1, state::T2) where {T1<:agent, T2<:AbstractDict}
  thought = iterativeprompting(a, prompt, syntaxcheck_json)


+
  error("--> decisionMaker")
  return thought
 end
@@ -148,6 +194,35 @@ function reflector()
 end


+"""
+
+  Arguments\n
+  -----
+    
+  Return\n
+  -----
+
+  Example\n
+  -----
+  ```jldoctest
+  julia> 
+  ```
+
+  TODO\n
+  -----
+    [] update docstring
+    [] implement the function
+    [] implement RAG to pull similar experience
+
+  Signature\n
+  -----
+"""
+function isterminal()
+
+end
+
+
+

 """ Chat with llm.

--- a/src/mcts.jl
+++ b/src/mcts.jl
@@ -264,36 +264,9 @@ end
  Signature\n
  -----
 """
-isLeaf(node::MCTSNode)::Bool = isempty(node.children)
+isleaf(node::MCTSNode)::Bool = isempty(node.children)


-"""
-
-  Arguments\n
-  -----
-    
-  Return\n
-  -----
-
-  Example\n
-  -----
-  ```jldoctest
-  julia> 
-  ```
-
-  TODO\n
-  -----
-    [] update docstring
-    [] implement the function
-    [] implement RAG to pull similar experience
-
-  Signature\n
-  -----
-"""
-function isTerminal()
-
-end
-
 """

  Arguments\n
@@ -324,10 +297,12 @@ end
 # ------------------------------------------------------------------------------------------------ #
 #                    Create a complete example using the defined MCTS functions                    #
 # ------------------------------------------------------------------------------------------------ #
-""" Search for best action
+""" Search the best action to take for a given state and task 

  Arguments\n
  -----
+    a::agent
+      one of Yiem's agents
    initial state
      initial state
    decisionMaker::Function
@@ -336,6 +311,8 @@ end
      assess the value of the state
    reflector::Function
      generate lesson from trajectory and reward
+    isterminal::Function
+      determine whether a given state is a terminal state
    n::Integer
      how many times action will be sampled from decisionMaker 
    w::Float64
@@ -359,15 +336,23 @@ end
  Signature\n
  -----
 """
-function runMCTS(a::T, initialState, decisionMaker::Function, stateValueEstimator::Function, 
-                reflector::Function, n::Integer, maxDepth::Integer, 
-                maxIterations::Integer, w::Float64) where {T<:agent}
+function runMCTS(
+  a::T1, 
+  initialState, 
+  decisionMaker::Function, 
+  stateValueEstimator::Function, 
+  reflector::Function,
+  isterminal::Function,
+  n::Integer, 
+  maxDepth::Integer, 
+  maxIterations::Integer, w::Float64) where {T1<:agent}
+  
  statetype = typeof(initialState)
  root = MCTSNode(initialState, 0, 0.0, Dict{statetype, MCTSNode}())
  
  for _ in 1:maxIterations
    node = root
-    while !isLeaf(node)
+    while !isleaf(node)
      node = select(node, w)
    end

--- a/src/util.jl
+++ b/src/util.jl
@@ -307,8 +307,9 @@ function iterativeprompting(a::T, prompt::String, verification::Function) where
    )
  )

-  result = nothing
  success = nothing
+  result = nothing
+  critique = ""

  # iteration loop
  while true
@@ -316,20 +317,19 @@ function iterativeprompting(a::T, prompt::String, verification::Function) where
    response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
    error("--> iterativeprompting")
    # check for correctness and get feedback
-    success, critique = verification(response)
+    success, _critique = verification(response)

    if success
      result = response
      break
    else  
      # add critique to prompt
+      critique *= _critique * "\n"
+      replace!(prompt, "Critique: ..." => "Critique: $critique")
+    end
  end
  
-    
-
-  end
-  
-  return (success=sucess, result=response)
+  return (success=success, result=result)
 end