update

2024-07-15 21:19:55 +07:00
parent 180bd16018
commit fdc50d1b90
9 changed files with 2396 additions and 591 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -39,8 +39,8 @@ macro executeStringFunction(functionStr, args...)
  func_expr = Meta.parse(functionStr)
  
  # Create a new function with the parsed expression
-  function_to_call = eval(Expr(:function, Expr(:call, func_expr, args...), 
-                                                func_expr.args[2:end]...))
+  function_to_call = eval(Expr(:function, 
+                              Expr(:call, func_expr, args...), func_expr.args[2:end]...))
  
  # Call the newly created function with the provided arguments
  function_to_call(args...)
@@ -97,169 +97,328 @@ julia> output_thoughtDict = Dict(

 # Signature
 """
-function decisionMaker(config::T1, state::T2)::Dict{Symbol, Any} where {T1<:AbstractDict, T2<:AbstractDict}
-  customerinfo = 
+function decisionMaker(a::T)::Dict{Symbol, Any} where {T<:agent}
+
+  # lessonDict = copy(JSON3.read("lesson.json"))
+
+  # lesson =
+  # if isempty(lessonDict)
+  #   ""
+  # else
+  #   lessons = Dict{Symbol, Any}()
+  #   for (k, v) in lessonDict
+  #     lessons[k] = lessonDict[k][:lesson]
+  #   end
+
+  #   """
+  #   You have attempted to help the user before and failed, either because your reasoning for the 
+  #   recommendation was incorrect or your response did not exactly match the user expectation. 
+  #   The following lesson(s) give a plan to avoid failing to help the user in the same way you 
+  #   did previously. Use them to improve your strategy to help the user.
+
+  #   Here are some lessons in JSON format:
+  #   $(JSON3.write(lessons))
+
+  #   When providing the thought and action for the current trial, that into account these failed 
+  #   trajectories and make sure not to repeat the same mistakes and incorrect answers. 
+  #   """
+  # end
+
+  # _prompt = 
+  # """
+  # You are a helpful sommelier working for a wine store. 
+  # Your goal is to recommend the best wine from your inventory that match the user preferences.
+  # You are also keen to improve your recommendation with lesson(s).
+  
+  # You must follow the following criteria:
+  # 1) Get to know how much the user willing to spend
+  # 2) Get to know type of wine the user is looking for e.g. red, white, sparkling, rose, dessert, fortified
+  # 3) Get to know what occasion the user is buying wine for
+  # 4) Get to know what characteristics of wine the user is looking for 
+  #     e.g. tannin, sweetness, intensity, acidity
+  # 5) Get to know what food the user will have with wine
+  # 6) Check your inventory for the best wine that match the user preference
+  # 7) Recommend wine to the user
+  
+  # You should only respond with interleaving Thought, Action, Observation steps. 
+  # Thought can reason about the current situation, and Action can be three types:
+  # 1) winestock[query], which you can use to find wine in your inventory. The more input data the better.
+  # 2) chatbox[text], which you can use to interact with the user.
+  # After each observation, provide the next Thought and next Action.
+  
+  # You should only respond in JSON format as describe below:
+  # {
+  #   "thought": "your reasoning",
+  #   "action": {"name": "action to take", "input": "action input"},
+  #   "observation": "result of the action"
+  # }
+  
+  # Here are some examples:
+  # {
+  # "question": "I would like to buy a sedan with 8 seats.",
+  # "thought_1": "Our showroom carries various vehicle model. But I'm not sure whether we have a models that fits the user demand, I need to check our inventory.",
+  # "action_1": {"name": "inventory", "input": "sedan with 8 seats."},
+  # "observation_1": "Several model has 8 seats. Available color are black, red green"
+  # }
+  # {
+  #   "thought": "I have a few color for the user to choose from. I will ask him what color he likes.",
+  #   "action": {"name": "chatbox", "input": "Which color do you like?"}
+  #   "observation": "I'll take black."
+  # }
+
+  # $lesson
+
+  # Let's begin!
+
+  # $(JSON3.write(state[:thoughtHistory]))
+  # {"thought"
+  # """
+
+  systemmsg = 
  """
-  I will give you the following information about customer:
-  $(JSON3.write(state[:customerinfo]))
+    You are a helpful sommelier working for a wine store. 
+    Your task is to help the user choose the best wine that match the user preferences from your inventory.
+    You are also eager to improve your helpfulness.
+    
+    You must follow the following guidelines:
+    - Get to know how much the user willing to spend
+    - Get to know type of wine the user is looking for e.g. red, white, sparkling, rose, dessert, fortified
+    - Get to know what occasion the user is buying wine for
+    - Get to know what characteristics of wine the user is looking for e.g. tannin, sweetness, intensity, acidity
+    - Get to know what food the user will have with wine
+
+    At each round of conversation, the user will give you the current situation:
+    Context: ...
+    Your earlier conversation with the user: ...
+
+    You should then respond to the user with interleaving Thought, Plan, Action and Observation:
+    - thought: 
+        1) State your reasoning about the current situation.
+    - plan: Based on the current situation, what would you do to complete the task? Be specific.
+    - action (Must be aligned with your plan): Can be one of the following functions:
+        1) CHATBOX[text], which you can use to talk with the user. "text" is in verbal English. 
+        2) WINESTOCK[query], which you can use to find info about wine in your inventory. "query" is a search term in verbal English.
+    - observation: result of the action.
+
+    You should only respond in format as described below:
+    thought: ...
+    plan: ...
+    action_name: ...
+    action_input: ...
+    observation: ...
+
+    Let's begin!
  """

-  storeinfo =
+  usermsg =
  """
-  I will give you the following information about your store:
-  $(JSON3.write(state[:storeinfo]))
+  Context: None
+  Your earlier conversation with the user: $(chatHistoryToString(a))
  """

-  lessonDict = copy(JSON3.read("lesson.json"))
-
-  lesson =
-  if isempty(lessonDict)
-    ""
-  else
-    lessons = Dict{Symbol, Any}()
-    for (k, v) in lessonDict
-      lessons[k] = lessonDict[k][:lesson]
-    end
-
-    """
-    You have attempted to help the user before and failed, either because your reasoning for the 
-    recommendation was incorrect or your response did not exactly match the user expectation. 
-    The following lesson(s) give a plan to avoid failing to help the user in the same way you 
-    did previously. Use them to improve your strategy to help the user.
-
-    Here are some lessons in JSON format:
-    $(JSON3.write(lessons))
-
-    When providing the thought and action for the current trial, that into account these failed 
-    trajectories and make sure not to repeat the same mistakes and incorrect answers. 
-    """
-  end
-
  _prompt = 
-  """
-  You are a helpful sommelier working for a wine store. 
-  Your goal is to recommend the best wine from your inventory that match the user preferences.
-  You are also keen to improve your recommendation with lesson(s).
-  
-  You must follow the following criteria:
-  1) Get to know how much the user willing to spend
-  2) Get to know type of wine the user is looking for e.g. red, white, sparkling, rose, dessert, fortified
-  3) Get to know what occasion the user is buying wine for
-  4) Get to know what characteristics of wine the user is looking for 
-      e.g. tannin, sweetness, intensity, acidity
-  5) Get to know what food the user will have with wine
-  6) Check your inventory for the best wine that match the user preference
-  7) Recommend wine to the user
-  
-  You should only respond with interleaving Thought, Action, Observation steps. 
-  Thought can reason about the current situation, and Action can be three types:
-  1) winestock[query], which you can use to find wine in your inventory. The more input data the better.
-  2) chatbox[text], which you can use to interact with the user.
-  3) recommendbox[answer], which returns your wine recommendation to the user. 
-  After each observation, provide the next Thought and next Action.
-  
-  You should only respond in JSON format as describe below:
-  {
-    "thought": "your reasoning",
-    "action": {"name": "action to take", "input": "action input"},
-    "observation": "result of the action"
-  }
-  
-  Here are some examples:
-  {
-  "question": "I would like to buy a sedan with 8 seats.",
-  "thought_1": "Our showroom carries various vehicle model. But I'm not sure whether we have a models that fits the user demand, I need to check our inventory.",
-  "action_1": {"name": "inventory", "input": "sedan with 8 seats."},
-  "observation_1": "Several model has 8 seats. Available color are black, red green"
-  }
-  {
-    "thought": "I have a few color for the user to choose from. I will ask him what color he likes.",
-    "action": {"name": "chatbox", "input": "Which color do you like?"}
-    "observation": "I'll take black."
-  }
+  [
+    Dict(:name=> "system", :text=> systemmsg),
+    Dict(:name=> "user", :text=> usermsg)
+  ]

-  $lesson
-
-  Let's begin!
-
-  $(JSON3.write(state[:thoughtHistory]))
-  {"thought"
-  """
-
-  # apply LLM specific instruct format
-  externalService = config[:externalservice][:text2textinstruct]
-  llminfo = externalService[:llminfo]
-  prompt = 
-  if llminfo[:name] == "llama3instruct"
-    formatLLMtext_llama3instruct("system", _prompt)
-  else
-    error("llm model name is not defied yet $(@__LINE__)")
-  end
-
-  msgMeta = GeneralUtils.generate_msgMeta(
-    externalService[:mqtttopic],
-    senderName= "decisionMaker",
-    senderId= string(uuid4()),
-    receiverName= "text2textinstruct",
-    mqttBroker= config[:mqttServerInfo][:broker],
-    mqttBrokerPort= config[:mqttServerInfo][:port],
-  )
-
-  outgoingMsg = Dict(
-    :msgMeta=> msgMeta,
-    :payload=> Dict(
-      :text=> prompt,
-      :kwargs=> Dict(
-        :max_tokens=> 512,
-        :stop=> ["<|eot_id|>"],
-      )
-    )
-  )
-  @show outgoingMsg
-
-  for attempt in 1:5
+  # put in model format
+  prompt = GeneralUtils.formatLLMtext(_prompt, "llama3instruct")
+  prompt *=
+    """
+    <|start_header_id|>assistant<|end_header_id|>
+    """
+  response = nothing # store for show when error msg show up
+  for attempt in 1:10
    try
-      response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
-      _responseJsonStr = response[:response][:text]
-      expectedJsonExample =
-      """
-      Here is an expected JSON format:
-      {
-        "thought": "...",
-        "action": {"name": "...", "input": "..."},
-        "observation": "..."
-      }
-      """
-      responseJsonStr = jsoncorrection(config, _responseJsonStr, expectedJsonExample)
-      thoughtDict = copy(JSON3.read(responseJsonStr)) 
+      response = a.text2textInstructLLM(prompt)
+      responsedict = GeneralUtils.textToDict(response,
+                        ["thought", "plan", "action_name", "action_input", "observation"], 
+                        rightmarker=":", symbolkey=true)

-      # check if dict has all required value
-      thought::AbstractString = thoughtDict[:thought]
-      actionname::AbstractString = thoughtDict[:action][:name]
-      actioninput::AbstractString = thoughtDict[:action][:input]
-      if actionname ∈ ["winestock", "chatbox", "recommendbox"]
-        # LLM use available function
-      elseif thought == ""
-        error("DecisionMaker has no thought")
-      elseif length(actioninput) == 0
-        error("DecisionMaker has no actioninput")
-      else 
-        error("DecisionMaker use wrong function")
+      if responsedict[:action_name] ∉ ["CHATBOX", "WINESTOCK"]
+        error("decisionMaker didn't use the given functions ", @__LINE__)
      end

-      return thoughtDict
+      for i ∈ [:thought, :plan, :action_name]
+        if length(JSON3.write(responsedict[i])) == 0
+          error("$i is empty ", @__LINE__)
+        end
+      end
+
+      # check if there are more than 1 key per categories
+      for i ∈ [:thought, :plan, :action_name, :action_input, :observation]
+        matchkeys = GeneralUtils.findMatchingDictKey(responsedict, i)
+        if length(matchkeys) > 1
+          error("DecisionMaker has more than one key per categories")
+        end
+      end
+      println("--> 1")
+      pprintln(responsedict)
+      return responsedict
    catch e
      io = IOBuffer()
      showerror(io, e)
      errorMsg = String(take!(io))
      st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
      println("")
-      @warn "Attempt $attempt. Error occurred: $errorMsg\n$st"
+      println("Attempt $attempt. Error occurred: $errorMsg\n$st")
      println("")
    end
  end
-  error("DecisionMaker failed to generate a thought")
+  error("DecisionMaker failed to generate a thought ", response)
 end
+# function decisionMaker(a::T)::Dict{Symbol, Any} where {T<:agent}
+
+#   # lessonDict = copy(JSON3.read("lesson.json"))
+
+#   # lesson =
+#   # if isempty(lessonDict)
+#   #   ""
+#   # else
+#   #   lessons = Dict{Symbol, Any}()
+#   #   for (k, v) in lessonDict
+#   #     lessons[k] = lessonDict[k][:lesson]
+#   #   end
+
+#   #   """
+#   #   You have attempted to help the user before and failed, either because your reasoning for the 
+#   #   recommendation was incorrect or your response did not exactly match the user expectation. 
+#   #   The following lesson(s) give a plan to avoid failing to help the user in the same way you 
+#   #   did previously. Use them to improve your strategy to help the user.
+
+#   #   Here are some lessons in JSON format:
+#   #   $(JSON3.write(lessons))
+
+#   #   When providing the thought and action for the current trial, that into account these failed 
+#   #   trajectories and make sure not to repeat the same mistakes and incorrect answers. 
+#   #   """
+#   # end
+
+#   _prompt = 
+#   """
+#   You are a helpful sommelier working for a wine store. 
+#   Your goal is to recommend the best wine from your inventory that match the user preferences.
+#   You are also keen to improve your recommendation with lesson(s).
+  
+#   You must follow the following criteria:
+#   1) Get to know how much the user willing to spend
+#   2) Get to know type of wine the user is looking for e.g. red, white, sparkling, rose, dessert, fortified
+#   3) Get to know what occasion the user is buying wine for
+#   4) Get to know what characteristics of wine the user is looking for 
+#       e.g. tannin, sweetness, intensity, acidity
+#   5) Get to know what food the user will have with wine
+#   6) Check your inventory for the best wine that match the user preference
+#   7) Recommend wine to the user
+  
+#   You should only respond with interleaving Thought, Action, Observation steps. 
+#   Thought can reason about the current situation, and Action can be three types:
+#   1) winestock[query], which you can use to find wine in your inventory. The more input data the better.
+#   2) chatbox[text], which you can use to interact with the user.
+#   After each observation, provide the next Thought and next Action.
+  
+#   You should only respond in JSON format as describe below:
+#   {
+#     "thought": "your reasoning",
+#     "action": {"name": "action to take", "input": "action input"},
+#     "observation": "result of the action"
+#   }
+  
+#   Here are some examples:
+#   {
+#   "question": "I would like to buy a sedan with 8 seats.",
+#   "thought_1": "Our showroom carries various vehicle model. But I'm not sure whether we have a models that fits the user demand, I need to check our inventory.",
+#   "action_1": {"name": "inventory", "input": "sedan with 8 seats."},
+#   "observation_1": "Several model has 8 seats. Available color are black, red green"
+#   }
+#   {
+#     "thought": "I have a few color for the user to choose from. I will ask him what color he likes.",
+#     "action": {"name": "chatbox", "input": "Which color do you like?"}
+#     "observation": "I'll take black."
+#   }
+
+#   $lesson
+
+#   Let's begin!
+
+#   $(JSON3.write(state[:thoughtHistory]))
+#   {"thought"
+#   """
+
+#   # apply LLM specific instruct format
+#   externalService = config[:externalservice][:text2textinstruct]
+#   llminfo = externalService[:llminfo]
+#   prompt = 
+#   if llminfo[:name] == "llama3instruct"
+#     formatLLMtext_llama3instruct("system", _prompt)
+#   else
+#     error("llm model name is not defied yet $(@__LINE__)")
+#   end
+
+#   msgMeta = GeneralUtils.generate_msgMeta(
+#     externalService[:mqtttopic],
+#     senderName= "decisionMaker",
+#     senderId= string(uuid4()),
+#     receiverName= "text2textinstruct",
+#     mqttBroker= config[:mqttServerInfo][:broker],
+#     mqttBrokerPort= config[:mqttServerInfo][:port],
+#   )
+
+#   outgoingMsg = Dict(
+#     :msgMeta=> msgMeta,
+#     :payload=> Dict(
+#       :text=> prompt,
+#       :kwargs=> Dict(
+#         :max_tokens=> 512,
+#         :stop=> ["<|eot_id|>"],
+#       )
+#     )
+#   )
+#   @show outgoingMsg
+
+#   for attempt in 1:5
+#     try
+#       response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
+#       _responseJsonStr = response[:response][:text]
+#       expectedJsonExample =
+#       """
+#       Here is an expected JSON format:
+#       {
+#         "thought": "...",
+#         "action": {"name": "...", "input": "..."},
+#         "observation": "..."
+#       }
+#       """
+#       responseJsonStr = jsoncorrection(config, _responseJsonStr, expectedJsonExample)
+#       thoughtDict = copy(JSON3.read(responseJsonStr)) 
+
+#       # check if dict has all required value
+#       thought::AbstractString = thoughtDict[:thought]
+#       actionname::AbstractString = thoughtDict[:action][:name]
+#       actioninput::AbstractString = thoughtDict[:action][:input]
+#       if actionname ∈ ["winestock", "chatbox", "recommendbox"]
+#         # LLM use available function
+#       elseif thought == ""
+#         error("DecisionMaker has no thought")
+#       elseif length(actioninput) == 0
+#         error("DecisionMaker has no actioninput")
+#       else 
+#         error("DecisionMaker use wrong function")
+#       end
+
+#       return thoughtDict
+#     catch e
+#       io = IOBuffer()
+#       showerror(io, e)
+#       errorMsg = String(take!(io))
+#       st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
+#       println("")
+#       @warn "Attempt $attempt. Error occurred: $errorMsg\n$st"
+#       println("")
+#     end
+#   end
+#   error("DecisionMaker failed to generate a thought")
+# end


 """ Assigns a scalar value to each new child node to be used for selec-
@@ -551,97 +710,132 @@ function reflector(config::T1, state::T2)::String where {T1<:AbstractDict, T2<:A
 end


-""" Get a new state

-# Arguments
-  - `a::T1`
-    one of YiemAgent's agent
-  - `state::T2`
-    current game state
-  - `thoughtDict::T3`
-    contain Thought, Action, Observation
-  - `isterminal::Function`
-    a function to determine terminal state

-# Return
-  - `(newNodeKey, newstate, isterminalstate, reward)::Tuple{String, Dict{Symbol, <:Any}, Bool, <:Number}`

-# Example
-```jldoctest
-julia> state = Dict{Symbol, Dict{Symbol, Any}}(
-          :thoughtHistory => Dict(:question => "Hello, I want to buy a bottle of wine."), 
-          :storeinfo => Dict(), 
-          :customerinfo => Dict()
-          )
-julia>  thoughtDict = Dict(
-          :question=> "I want to buy a bottle of wine.",
-          :thought_1=>  "The customer wants to buy a bottle of wine.",
-          :action_1=>   Dict{Symbol, Any}(
-                          :name=>"Chatbox", 
-                          :input=>"What occasion are you buying the wine for?",
-                          ),
-          :observation_1 => ""
-          )
-```
+# """ Chat with llm.

-# TODO
-  - [] add other actions
-  - [WORKING] add embedding of newstate and store in newstate[:embedding]
+# # Arguments
+#   `a::agent`
+#     an agent
+  
+# # Return
+#   None

-# Signature
-"""
-function transition(state::T2, config::T1, decisionMaker::Function, evaluator::Function, 
-  reflector::Function
-  )::Tuple{String, Dict{Symbol, <:Any}, Integer} where {T1<:AbstractDict, T2<:AbstractDict}
+# # Example
+# ```jldoctest
+# julia> using JSON3, UUIDs, Dates, FileIO, MQTTClient, ChatAgent
+# julia> const mqttBroker = "mqtt.yiem.cc"
+# julia> mqttclient, connection = MakeConnection(mqttBroker, 1883)
+# julia> tools=Dict( # update input format
+#         "askbox"=>Dict(
+#           :description => "<askbox tool description>Useful for when you need to ask the user for more context. Do not ask the user their own question.</askbox tool description>",
+#           :input => "<input>Input is a text in JSON format.</input><input example>{\"Q1\": \"How are you doing?\", \"Q2\": \"How may I help you?\"}</input example>",
+#           :output => "" ,
+#           :func => nothing,
+#         ),
+#       )
+# julia> msgMeta = Dict(
+#   :msgPurpose=> "updateStatus",
+#   :from=> "agent",       
+#   :to=> "llmAI",
+#   :requestresponse=> "request",
+#   :sendto=> "",      # destination topic
+#   :replyTo=> "agent/api/v0.1.0/txt/response",     # requester ask responseer to send reply to this topic
+#   :repondToMsgId=> "",   # responseer is responseing to this msg id
+#   :taskstatus=> "",       # "complete", "fail", "waiting" or other status
+#   :timestamp=> Dates.now(),
+#   :msgId=> "$(uuid4())",
+# )
+# julia> a = ChatAgent.agentReflex(
+#         "Jene",
+#         mqttclient,
+#         msgMeta,
+#         agentConfigTopic, # I need a function to send msg to config topic to get load balancer
+#         role=:sommelier,
+#         tools=tools
+#       )
+# julia> newAgent = ChatAgent.agentReact(agent)
+# julia> response = ChatAgent.conversation(newAgent, "Hi! how are you?")
+# ```

-  thoughtDict = decisionMaker(config, state)
+# # TODO
+#   - [] update docstring
+#   - [x] MCTS() for planning
+#   - [] add recap to initialState for earlier completed question
+#   - [WORKING] conversation loop

-  actionname = thoughtDict[:action][:name]
-  actioninput = thoughtDict[:action][:input]
+# # Signature
+# """
+# function conversation(a::T, userinput::Dict) where {T<:agent}
+#   config = deepcopy(a.config)
+#   pprint(config)
+#   if userinput[:text] == "newtopic"
+#     clearhistory(a)
+#     return "Okay. What shall we talk about?"
+#   else
+#     # add usermsg to a.chathistory
+#     addNewMessage(a, "user", userinput[:text])

-  # map action and input() to llm function
-  response, select, reward, isterminal =
-  if actionname == "chatbox"
-    # deepcopy(state[:virtualCustomerChatHistory]) because I want to keep it clean
-    # so that other simulation start from this same node is not contaminated with actioninput
-    virtualWineUserChatbox(config, actioninput, deepcopy(state[:virtualCustomerChatHistory]))  # virtual customer
-  elseif actionname == "winestock"
-    winestock(config, actioninput)
-  elseif actionname == "recommendbox"  
-    virtualWineUserRecommendbox(config, actioninput)
-  else
-    error("undefined LLM function. Requesting $actionname")
-  end
+#     if isempty(a.plan[:currenttrajectory])

-  newNodeKey, newstate = LLMMCTS.makeNewState(state, thoughtDict, response, select, reward, 
-                                            isterminal)
-  if actionname == "chatbox"
-    push!(newstate[:virtualCustomerChatHistory], Dict(:name=>"assistant", :text=> actioninput) )
-    push!(newstate[:virtualCustomerChatHistory], Dict(:name=>"user", :text=> response))
-  end
+#       # initial state
+#       a.plan[:currenttrajectory] = Dict{Symbol, Any}(
+#         # deepcopy the info to prevent modifying the info unintentionally during MCTS planning
+#         :customerinfo=> deepcopy(a.keywordinfo[:customerinfo]),
+#         :storeinfo=> deepcopy(a.keywordinfo[:storeinfo]),
+#         :userselect=> nothing,
+#         :reward=> 0,
+#         :isterminal=> false,
+#         :evaluation=> nothing,
+#         :lesson=> nothing,

-  stateevaluation, progressvalue = evaluator(config, newstate)
+#         :totalTrajectoryReward=> nothing,

-  if newstate[:reward] < 0
-    pprint(newstate[:thoughtHistory])
-    newstate[:evaluation] = stateevaluation
-    newstate[:lesson] = reflector(config, newstate)
+#         # contain question, thought_1, action_1, observation_1, thought_2, ...
+#         :thoughtHistory=> OrderedDict{Symbol, Any}( 
+#           #[] :recap=>,
+#           :question=> userinput[:text],
+#           ),

-    # store new lesson for later use
-    lessonDict = copy(JSON3.read("lesson.json"))
-    latestLessonKey, latestLessonIndice = 
-      GeneralUtils.findHighestIndexKey(lessonDict, "lesson")
-    nextIndice = latestLessonKey == :NA ? 1 : latestLessonIndice + 1
-    newLessonKey = Symbol("lesson_$(nextIndice)")
-    lessonDict[newLessonKey] = newstate
-    open("lesson.json", "w") do io
-      JSON3.pretty(io, lessonDict)
-    end
-    print("---> reflector()")
-  end
+#         # store conversation for virtual customer because the virtual customer agent is just 
+#         # a function and stateless.
+#         :virtualCustomerChatHistory=> Vector{Dict{Symbol, Any}}(
+#           [Dict(:name=> "user", :text=> userinput[:text])]
+#           ),  
+#         )
+#     else
+#       _, a.plan[:currenttrajectory] = makeNewState(a.plan[:currenttrajectory], 
+#                   a.plan[:activeplan][:thoughtHistory], userinput[:text], userinput[:select], 
+#                   userinput[:reward], userinput[:isterminal])
+#     end
+#   end

-  return (newNodeKey, newstate, progressvalue)
-end
+#   while true
+#     bestNextState, besttrajectory = LLMMCTS.runMCTS(a.plan[:currenttrajectory], 
+#                               transition, config, decisionMaker, evaluator, reflector;
+#                               totalsample=2, maxDepth=3, maxiterations=3, explorationweight=1.0)
+#     a.plan[:activeplan] = bestNextState
+
+#     latestActionKey, latestActionIndice = 
+#       GeneralUtils.findHighestIndexKey(bestNextState[:thoughtHistory], "action")
+#     actionname = bestNextState[:thoughtHistory][latestActionKey][:name]
+#     actioninput = bestNextState[:thoughtHistory][latestActionKey][:input]
+
+#     # transition
+#     if actionname == "chatbox"
+#       # add usermsg to a.chathistory
+#       addNewMessage(a, "assistant", actioninput)
+#       return actioninput
+#     elseif actionname == "recommendbox"
+#       # add usermsg to a.chathistory
+#       addNewMessage(a, "assistant", actioninput)
+#       return actioninput
+#     else
+#       _, a.plan[:currenttrajectory] = transition(a, a.plan[:currenttrajectory], a.plan[:activeplan])
+#     end
+#   end
+# end



@@ -700,8 +894,7 @@ julia> response = ChatAgent.conversation(newAgent, "Hi! how are you?")
 # Signature
 """
 function conversation(a::T, userinput::Dict) where {T<:agent}
-  config = deepcopy(a.config)
-  pprint(config)
+
  if userinput[:text] == "newtopic"
    clearhistory(a)
    return "Okay. What shall we talk about?"
@@ -709,64 +902,246 @@ function conversation(a::T, userinput::Dict) where {T<:agent}
    # add usermsg to a.chathistory
    addNewMessage(a, "user", userinput[:text])

-    if isempty(a.plan[:currenttrajectory])
+    thought = think(a)

-      # initial state
-      a.plan[:currenttrajectory] = Dict{Symbol, Any}(
-        # deepcopy the info to prevent modifying the info unintentionally during MCTS planning
-        :customerinfo=> deepcopy(a.keywordinfo[:customerinfo]),
-        :storeinfo=> deepcopy(a.keywordinfo[:storeinfo]),
-        :userselect=> nothing,
-        :reward=> 0,
-        :isterminal=> false,
-        :evaluation=> nothing,
-        :lesson=> nothing,
+    # thought will be added to chat model via context
+    chatresponse = generatechat(a, thought)

-        :totalTrajectoryReward=> nothing,
-
-        # contain question, thought_1, action_1, observation_1, thought_2, ...
-        :thoughtHistory=> OrderedDict{Symbol, Any}( 
-          #[] :recap=>,
-          :question=> userinput[:text],
-          ),
-
-        # store conversation for virtual customer because the virtual customer agent is just 
-        # a function and stateless.
-        :virtualCustomerChatHistory=> Vector{Dict{Symbol, Any}}(
-          [Dict(:name=> "user", :text=> userinput[:text])]
-          ),  
-        )
-    else
-      _, a.plan[:currenttrajectory] = makeNewState(a.plan[:currenttrajectory], 
-                  a.plan[:activeplan][:thoughtHistory], userinput[:text], userinput[:select], 
-                  userinput[:reward], userinput[:isterminal])
-    end
+    return chatresponse
  end

-  while true
-    bestNextState, besttrajectory = LLMMCTS.runMCTS(a.plan[:currenttrajectory], 
-                              transition, config, decisionMaker, evaluator, reflector;
-                              totalsample=2, maxDepth=3, maxiterations=3, explorationweight=1.0)
-    a.plan[:activeplan] = bestNextState

-    latestActionKey, latestActionIndice = 
-      GeneralUtils.findHighestIndexKey(bestNextState[:thoughtHistory], "action")
-    actionname = bestNextState[:thoughtHistory][latestActionKey][:name]
-    actioninput = bestNextState[:thoughtHistory][latestActionKey][:input]
+end
+# function conversation(a::T, userinput::Dict) where {T<:agent}
+#   config = deepcopy(a.config)
+#   pprint(config)
+#   if userinput[:text] == "newtopic"
+#     clearhistory(a)
+#     return "Okay. What shall we talk about?"
+#   else
+#     # add usermsg to a.chathistory
+#     addNewMessage(a, "user", userinput[:text])

-    # transition
-    if actionname == "chatbox"
-      # add usermsg to a.chathistory
-      addNewMessage(a, "assistant", actioninput)
-      return actioninput
-    elseif actionname == "recommendbox"
-      # add usermsg to a.chathistory
-      addNewMessage(a, "assistant", actioninput)
-      return actioninput
+#     if isempty(a.plan[:currenttrajectory])
+
+#       # initial state
+#       a.plan[:currenttrajectory] = Dict{Symbol, Any}(
+#         # deepcopy the info to prevent modifying the info unintentionally during MCTS planning
+#         :customerinfo=> deepcopy(a.keywordinfo[:customerinfo]),
+#         :storeinfo=> deepcopy(a.keywordinfo[:storeinfo]),
+#         :userselect=> nothing,
+#         :reward=> 0,
+#         :isterminal=> false,
+#         :evaluation=> nothing,
+#         :lesson=> nothing,
+
+#         :totalTrajectoryReward=> nothing,
+
+#         # contain question, thought_1, action_1, observation_1, thought_2, ...
+#         :thoughtHistory=> OrderedDict{Symbol, Any}( 
+#           #[] :recap=>,
+#           :question=> userinput[:text],
+#           ),
+
+#         # store conversation for virtual customer because the virtual customer agent is just 
+#         # a function and stateless.
+#         :virtualCustomerChatHistory=> Vector{Dict{Symbol, Any}}(
+#           [Dict(:name=> "user", :text=> userinput[:text])]
+#           ),  
+#         )
+#     else
+#       _, a.plan[:currenttrajectory] = makeNewState(a.plan[:currenttrajectory], 
+#                   a.plan[:activeplan][:thoughtHistory], userinput[:text], userinput[:select], 
+#                   userinput[:reward], userinput[:isterminal])
+#     end
+#   end
+
+#   while true
+#     bestNextState, besttrajectory = LLMMCTS.runMCTS(a.plan[:currenttrajectory], 
+#                               transition, config, decisionMaker, evaluator, reflector;
+#                               totalsample=2, maxDepth=3, maxiterations=3, explorationweight=1.0)
+#     a.plan[:activeplan] = bestNextState
+
+#     latestActionKey, latestActionIndice = 
+#       GeneralUtils.findHighestIndexKey(bestNextState[:thoughtHistory], "action")
+#     actionname = bestNextState[:thoughtHistory][latestActionKey][:name]
+#     actioninput = bestNextState[:thoughtHistory][latestActionKey][:input]
+
+#     # transition
+#     if actionname == "chatbox"
+#       # add usermsg to a.chathistory
+#       addNewMessage(a, "assistant", actioninput)
+#       return actioninput
+#     elseif actionname == "recommendbox"
+#       # add usermsg to a.chathistory
+#       addNewMessage(a, "assistant", actioninput)
+#       return actioninput
+#     else
+#       _, a.plan[:currenttrajectory] = transition(a, a.plan[:currenttrajectory], a.plan[:activeplan])
+#     end
+#   end
+# end
+
+
+"""
+
+# Arguments
+  
+# Return
+
+# Example
+```jldoctest
+julia> 
+```
+
+# TODO
+  - [] update docstring
+  - [x] implement the function
+  - [x] add try block. check result that it is expected before returning
+
+# Signature
+"""
+function think(a::T) where {T<:agent}
+  thoughtDict = decisionMaker(a)
+  actionname = thoughtDict[:action_name]
+  actioninput = thoughtDict[:action_input]
+
+  # map action and input() to llm function
+  response =
+    if actionname == "CHATBOX"
+      (result=actioninput, errormsg=nothing, success=true)
+    elseif actionname == "WINESTOCK"
+      DBconnection = LibPQ.Connection("host=192.168.88.12 port=5432 dbname=yiem_wine_assistant user=yiem password=yiem@Postgres_0.0")
+      winestock(actioninput, DBconnection, a.text2textInstructLLM)
    else
-      _, a.plan[:currenttrajectory] = transition(a, a.plan[:currenttrajectory], a.plan[:activeplan])
+      error("undefined LLM function. Requesting $actionname")
+    end
+  
+  # this section allow LLM functions above to have different return values.
+  result = haskey(response, :result) ? response[:result] : nothing
+  select = haskey(response, :select) ? response[:select] : nothing
+  reward::Integer = haskey(response, :reward) ? response[:reward] : 0
+  isterminal::Bool = haskey(response, :isterminal) ? response[:isterminal] : false
+  errormsg::Union{AbstractString, Nothing} = haskey(response, :errormsg) ? response[:errormsg] : nothing
+  success::Bool = haskey(response, :success) ? response[:success] : false
+  a.shortmem
+  return result
+end
+
+
+""" 
+
+# Arguments
+  - `a::T1` 
+    one of ChatAgent's agent.
+  - `input::T2`
+# Return
+  A JSON string of available wine
+
+# Example
+```jldoctest
+julia> 
+```
+
+# TODO
+  - [] update docs
+  - [WORKING] implement the function
+
+# Signature
+"""
+function generatechat(a::T1, input::T2) where {T1<:agent, T2<:AbstractString}
+  systemmsg = 
+  """
+  You are a helpful sommelier working for a wine store. 
+  Your task is to help the user choose the best wine that match the user preferences from your inventory.
+  You are also eager to improve your helpfulness.
+  
+  You must follow the following guidelines:
+  - Get to know how much the user willing to spend
+  - Get to know type of wine the user is looking for e.g. red, white, sparkling, rose, dessert, fortified
+  - Get to know what occasion the user is buying wine for
+  - Get to know what characteristics of wine the user is looking for e.g. tannin, sweetness, intensity, acidity
+  - Get to know what food the user will have with wine
+
+  At each round of conversation, the user will give you:
+  Context: ...
+  Your thoughts: Your current thinking in your mind
+  Your earlier conversation with the user: ...
+
+  You should then respond to the user with:
+  - chat: what do you want to say to the user
+
+  You should only respond in format as described below:
+  chat: ...
+
+  Let's begin!
+  """
+
+  usermsg =
+  """
+  Context: None
+  Your thoughts: $input
+  Your earlier conversation with the user: $(chatHistoryToString(a))
+  """
+
+  _prompt = 
+  [
+    Dict(:name=> "system", :text=> systemmsg),
+    Dict(:name=> "user", :text=> usermsg)
+  ]
+
+  # put in model format
+  prompt = GeneralUtils.formatLLMtext(_prompt, "llama3instruct")
+  prompt *=
+    """
+    <|start_header_id|>assistant<|end_header_id|>
+    """
+
+  for attempt in 1:5
+    try
+      response = text2textInstructLLM(prompt)
+      responsedict = GeneralUtils.textToDict(response,
+            ["chat"], 
+            rightmarker=":", symbolkey=true)
+
+      # check if dict has all required value
+      evaluationtext::AbstractString = responsedict[:evaluation]
+      responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
+      score::Integer = responsedict[:score]
+      accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
+      suggestion::AbstractString = responsedict[:suggestion]
+
+      # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
+      state[:evaluation] = responsedict[:evaluation] 
+      state[:evaluationscore] = responsedict[:score] 
+      state[:accepted_as_answer] = responsedict[:accepted_as_answer]
+      state[:suggestion] = responsedict[:suggestion]
+
+      # mark as terminal state when the answer is achieved
+      if accepted_as_answer == "Yes"
+        state[:isterminal] = true
+        state[:reward] = 1
+      end
+
+      return responsedict[:score]
+    catch e
+      io = IOBuffer()
+      showerror(io, e)
+      errorMsg = String(take!(io))
+      st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
+      println("")
+      println("Attempt $attempt. Error occurred: $errorMsg\n$st")
+      println("")
    end
  end
+  error("evaluator failed to generate an evaluation")
+
+
+
+
+
+
+
 end