From db42a55e00a1f93e385de0d320f8013097168e52 Mon Sep 17 00:00:00 2001
From: narawat lamaiin <narawat@outlook.com>
Date: Sat, 27 Apr 2024 17:44:55 +0700
Subject: [PATCH] update

---
 src/interface.jl | 93 +++++++++++++++++++++++++++++++++++++++++++-----
 src/mcts.jl      | 51 ++++++++++----------------
 src/util.jl      | 12 +++----
 3 files changed, 108 insertions(+), 48 deletions(-)

diff --git a/src/interface.jl b/src/interface.jl
index 74b685c..ed19a5c 100644
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -1,6 +1,6 @@
 module interface
     
-export addNewMessage, conversation, decisionMaker
+export addNewMessage, conversation, decisionMaker, isterminal
 
 using JSON3, DataStructures, Dates, UUIDs, HTTP, Random, MQTTClient
 using GeneralUtils
@@ -63,25 +63,70 @@ using ..type, ..util, ..llmfunction, ..mcts
 function decisionMaker(a::T1, state::T2) where {T1<:agent, T2<:AbstractDict}
   customerinfo = 
   """
-    I will give you the following information about customer:
-    $(JSON3.write(state[:customerinfo]))
+  I will give you the following information about customer:
+  $(JSON3.write(state[:customerinfo]))
   """
 
   storeinfo =
   """
-    I will give you the following information about your store:
-    $(JSON3.write(state[:storeinfo]))
+  I will give you the following information about your store:
+  $(JSON3.write(state[:storeinfo]))
   """
 
+  reflect = ""
+  # """
+  # You have attempted to answer the following question before and failed. The following 
+  # reflection(s) give a plan to avoid failing to answer the question in the same way you did 
+  # previously. Use them to improve your strategy of correctly answering the given question.
+  # (trajectories)
+  # """
+
+
   #[WORKING] 
   _prompt = 
   """
-    You are a helpful sommelier working for a wine store. 
-    You helps users by searching wine that match the user preferences from your inventory.
+  You are a helpful sommelier working for a wine store. 
+  Your goal is to reccommend the best wine from your inventory that match the user preferences.
 
-    $customerinfo
+  $customerinfo
 
-    You must follow the 
+  You must follow the following criteria:
+  1) Get to know what occasion the user is buying wine for
+  2) Get to know what food the user will have with wine
+  3) Get to know how much the user willing to spend
+  4) Get to know type of wine the user is looking for 
+    e.g. Red, White, Sparkling, Rose, Dessert, Fortified
+  5) Get to know what wine characteristics the user is looking for 
+    e.g. tannin, sweetness, intensity, acidity
+  6) Check your inventory for the best wine that match the user preference
+
+  You should only respond with interleaving step-by-step Thought, Action, Observation steps. 
+  Thought can reason about the current situation, and Action can be three types:
+  1) Chatbox[text], which you can use to interact with the user.
+  2) Winestock[query], which you can use to find wine in your inventory.
+  3) Finish[answer], which returns your wine reccommendation to the user.
+
+  You should only respond in JSON format as describe below:
+  {
+    "Thought_1": "reasoning",
+    "Thought_2": "reasoning",
+    ...
+    "Thought_n": "reasoning",
+    "Action_1": "action to take",
+    "Observation_1": "result of the action"
+  }
+
+  Here are some examples:
+  {
+    "Question": "I would like to buy a sedan",
+    "Thought_1": "I have many car in my inventory suitable for several usage scenarios",
+    "Thought_2": "It would be better if I know what the user intend to do with his car",
+    "Thought_3": "
+  }
+
+  $reflect
+
+  $(JSON3.write(state[:thoughtHistory]))
   """
 
   prompt =  formatLLMtext_llama3instruct("system", _prompt)
@@ -89,6 +134,7 @@ function decisionMaker(a::T1, state::T2) where {T1<:agent, T2<:AbstractDict}
   thought = iterativeprompting(a, prompt, syntaxcheck_json)
 
 
+
   error("--> decisionMaker")
   return thought
 end
@@ -148,6 +194,35 @@ function reflector()
 end
 
 
+"""
+
+  Arguments\n
+  -----
+    
+  Return\n
+  -----
+
+  Example\n
+  -----
+  ```jldoctest
+  julia> 
+  ```
+
+  TODO\n
+  -----
+    [] update docstring
+    [] implement the function
+    [] implement RAG to pull similar experience
+
+  Signature\n
+  -----
+"""
+function isterminal()
+
+end
+
+
+
 
 """ Chat with llm.
 
diff --git a/src/mcts.jl b/src/mcts.jl
index 1f5e8a3..7a57a6c 100644
--- a/src/mcts.jl
+++ b/src/mcts.jl
@@ -264,36 +264,9 @@ end
   Signature\n
   -----
 """
-isLeaf(node::MCTSNode)::Bool = isempty(node.children)
+isleaf(node::MCTSNode)::Bool = isempty(node.children)
 
 
-"""
-
-  Arguments\n
-  -----
-    
-  Return\n
-  -----
-
-  Example\n
-  -----
-  ```jldoctest
-  julia> 
-  ```
-
-  TODO\n
-  -----
-    [] update docstring
-    [] implement the function
-    [] implement RAG to pull similar experience
-
-  Signature\n
-  -----
-"""
-function isTerminal()
-
-end
-
 """
 
   Arguments\n
@@ -324,10 +297,12 @@ end
 # ------------------------------------------------------------------------------------------------ #
 #                    Create a complete example using the defined MCTS functions                    #
 # ------------------------------------------------------------------------------------------------ #
-""" Search for best action
+""" Search the best action to take for a given state and task 
 
   Arguments\n
   -----
+    a::agent
+      one of Yiem's agents
     initial state
       initial state
     decisionMaker::Function
@@ -336,6 +311,8 @@ end
       assess the value of the state
     reflector::Function
       generate lesson from trajectory and reward
+    isterminal::Function
+      determine whether a given state is a terminal state
     n::Integer
       how many times action will be sampled from decisionMaker 
     w::Float64
@@ -359,15 +336,23 @@ end
   Signature\n
   -----
 """
-function runMCTS(a::T, initialState, decisionMaker::Function, stateValueEstimator::Function, 
-                reflector::Function, n::Integer, maxDepth::Integer, 
-                maxIterations::Integer, w::Float64) where {T<:agent}
+function runMCTS(
+  a::T1, 
+  initialState, 
+  decisionMaker::Function, 
+  stateValueEstimator::Function, 
+  reflector::Function,
+  isterminal::Function,
+  n::Integer, 
+  maxDepth::Integer, 
+  maxIterations::Integer, w::Float64) where {T1<:agent}
+  
   statetype = typeof(initialState)
   root = MCTSNode(initialState, 0, 0.0, Dict{statetype, MCTSNode}())
   
   for _ in 1:maxIterations
     node = root
-    while !isLeaf(node)
+    while !isleaf(node)
       node = select(node, w)
     end
 
diff --git a/src/util.jl b/src/util.jl
index 972e85c..8b79911 100644
--- a/src/util.jl
+++ b/src/util.jl
@@ -307,8 +307,9 @@ function iterativeprompting(a::T, prompt::String, verification::Function) where
     )
   )
 
-  result = nothing
   success = nothing
+  result = nothing
+  critique = ""
 
   # iteration loop
   while true
@@ -316,20 +317,19 @@ function iterativeprompting(a::T, prompt::String, verification::Function) where
     response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
     error("--> iterativeprompting")
     # check for correctness and get feedback
-    success, critique = verification(response)
+    success, _critique = verification(response)
 
     if success
       result = response
       break
     else  
       # add critique to prompt
+      critique *= _critique * "\n"
+      replace!(prompt, "Critique: ..." => "Critique: $critique")
     end
-
-    
-
   end
   
-  return (success=sucess, result=response)
+  return (success=success, result=result)
 end