update

2024-05-04 15:36:15 +07:00
parent 15702973b0
commit 0286bc13c7
3 changed files with 215 additions and 59 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -103,52 +103,50 @@ function decisionMaker(a::T1, state::T2)::Dict{Symbol, Any} where {T1<:agent, T2
  You are a helpful sommelier working for a wine store. 
  Your goal is to reccommend the best wine from your inventory that match the user preferences.
  $customerinfo
  You must follow the following criteria:
  1) Get to know what occasion the user is buying wine for
  2) Get to know what food the user will have with wine
  3) Get to know how much the user willing to spend
-  4) Get to know type of wine the user is looking for 
+  4) Get to know type of wine the user is looking for e.g. Red, White, Sparkling, Rose, Dessert, Fortified
-    e.g. Red, White, Sparkling, Rose, Dessert, Fortified
+  5) Get to know what characteristics of wine the user is looking for 
-  5) Get to know what wine characteristics the user is looking for 
+      e.g. tannin, sweetness, intensity, acidity
    e.g. tannin, sweetness, intensity, acidity
  6) Check your inventory for the best wine that match the user preference
  7) Recommend wine to the user
-  You should only respond with interleaving step-by-step Thought, Action, Observation steps. 
+  You should only respond with interleaving Thought, Action, Observation steps. 
  Thought can reason about the current situation, and Action can be three types:
-  1) winestock[query], which you can use to find wine in your inventory.
+  1) winestock[query], which you can use to find wine in your inventory. The more input data the better.
  2) chatbox[text], which you can use to interact with the user.
-  3) finish[answer], which returns your wine reccommendation to the user.
+  3) recommendation[answer], which returns your wine reccommendation to the user. 
  You should only respond in JSON format as describe below:
  {
-    "Thought_1": "reasoning 1",
+    "Thought": "your reasoning",
-    "Thought_2": "reasoning 2",
+    "Action": {"name": "action to take", "input": "Action input"},
-    ...
+    "Observation": "result of the action"
    "Thought_n": "reasoning n",
    "Action_1": {"name": "action to take", "input": "Action input"},
    "Observation_1": "result of the action"
  }
  Here are some examples:
  {
-    "Question": "I'm looking for a sedan with an automatic driving feature.",
+  "Question": "I would like to buy a sedan with 8 seats.",
-    "Thought_1": "I have many types of sedans in my inventory, each with diverse features.",
+  "Thought_1": "Our showroom carries various vehicle model. But I'm not sure whether we have a models that fits the user demand, I need to check our inventory.",
-    "Thought_2": "But there is only 1 car that has the feature customer wanted.",
+  "Action_1": {"name": "inventory", "input": "sedan with 8 seats."},
-    "Action_1": {"name": "finish", "input": "I recommend a Tesla model Y. It has your requested feature and much more."}
+  "Observation_1": "Several model has 8 seats. Available color are black, red green"
  }
  {
-    "Question": "I would like to buy a sedan with 8 seats.",
+    "Thought_2": "I have to ask the user what color he likes.",
-    "Thought_1": "I have one model that fits the user demand",
+    "Action_2": {"name": "chatbox", "input": "Which color do you like?"}
-    "Thought_2": "But I'm not sure that we have it in stock.",
+    "Observation_2": "I'll take black."
-    "Thought_3": "I need to check out inventory first.",
+  }
-    "Action_1": {"name": "inventory", "input": "Yiem model A"}
+  {
    "Thought_3": "There is only one model that fits the user preference. It's Yiem model A",
    "Action_3": {"name": "recommendation", "input": "I recommend a Yiem model A"}
  }
-  $reflect
+  Let's begin!
  $(JSON3.write(state[:thoughtHistory]))
  {Thought
  """
  prompt =  formatLLMtext_llama3instruct("system", _prompt)
@@ -168,7 +166,7 @@ function decisionMaker(a::T1, state::T2)::Dict{Symbol, Any} where {T1<:agent, T2
      :text=> prompt,
    )
  )
-
+  @show outgoingMsg
  _response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
  thoughtJsonStr = _response[:response][:text]
  thoughtDict = copy(JSON3.read(thoughtJsonStr)) 
--- a/src/mcts.jl
+++ b/src/mcts.jl
@@ -52,6 +52,7 @@ struct MCTSNode{T<:AbstractDict}
  state::T
  visits::Integer
  progressValue::Number
  reward::Number
  parent::Union{MCTSNode, Nothing} 
  children::Dict{String, MCTSNode}
 end
@@ -114,10 +115,13 @@ end
 julia> 
 ```
 # TODO
  - [] update docstring
 # Signature
 """
 function expand(a::T1, node::MCTSNode, decisionMaker::Function, 
-                progressValueEstimator::Function; n::Integer=3) where {T1<:agent, T2<:AbstractDict}
+                progressValueEstimator::Function; n::Integer=3) where {T1<:agent}
  # sampling action from decisionMaker
  for sample in 1:n
@@ -127,12 +131,13 @@ function expand(a::T1, node::MCTSNode, decisionMaker::Function,
    newNodeKey, newstate = MCTStransition(a, node.state, thoughtDict)  #[] Implement your transition function
    # add progressValueEstimator
-    _, progressValue = progressValueEstimator(a, newstate)
+    progressRationale, progressValue = progressValueEstimator(a, newstate)
    #[WORKING] check for terminal state
    if newNodeKey ∉ keys(node.children)
-      node.children[newNodeKey] = MCTSNode(newNodeKey, newstate, 0, progressValue,
+      node.children[newNodeKey] = MCTSNode(newNodeKey, newstate, 0, progressValue, 0,
                                            node, Dict{String, MCTSNode}())
    end
  end 
@@ -152,31 +157,19 @@ julia>
 # TODO
  - [] update docstring
  - [WORKING] implement the function
-  - [] reward only comes at terminal state
+  - [] [] check for the terminal state (node.reward != 0), break if it is terminal state
 # Signature
 """
-function simulate(a, node::MCTSNode, max_depth::Int; n=3)
+function simulate(a, node::MCTSNode, decisionMaker, progressValueEstimator, max_depth::Int; n=3)
  total_reward = 0.0
  for _ in 1:max_depth
    node = selectChildNode(node)
    expand(a, node, decisionMaker, progressValueEstimator, n=n)
    # if isterminal (use for loop over node to look for childNode.reward != 0)
    # #[] Implement your action selection function based on highest stateValue
    # action = select_action(state) # current state 
    # state, reward = transition(state, action)  # Implement transition function to a new state
    # #[] check for the terminal state, break if it is terminal state
    # if isterminal
    total_reward += reward
  end
  error("--> simulate")
  return total_reward
@@ -254,11 +247,12 @@ julia>  thoughtDict = Dict(
 """
 function MCTStransition(a::T1, state::T2, 
  thoughtDict::T3)::Tuple{String, Dict{Symbol, Any}} where {T1<:agent, T2<:AbstractDict, T3<:AbstractDict}
-  latestThoughtKey, _ = GeneralUtils.findHighestIndexKey(thoughtDict, "Thought")
+  println("")
-  latestActionKey, latestActionIndice = GeneralUtils.findHighestIndexKey(thoughtDict, "Action")
+  # latestThoughtKey, _ = GeneralUtils.findHighestIndexKey(thoughtDict, "Thought")
-  _action = thoughtDict[latestActionKey]
+  # latestActionKey, latestActionIndice = GeneralUtils.findHighestIndexKey(thoughtDict, "Action")
-  actionname = _action[:name]
+  # _action = thoughtDict[:Action]
-  actioninput = _action[:input]
+  actionname = thoughtDict[:Action][:name]
  actioninput = thoughtDict[:Action][:input]
  # map action and input() to llm function
  response =
@@ -272,11 +266,16 @@ function MCTStransition(a::T1, state::T2,
  end
  _, latestThoughtIndice = GeneralUtils.findHighestIndexKey(state[:thoughtHistory], "Thought")
  nextIndice = latestThoughtIndice === nothing ? 1 : latestThoughtIndice + 1
  latestThoughtKey = Symbol("Thought_$nextIndice")
  latestActionKey = Symbol("Action_$nextIndice")
  # add Thought, action, observation to thoughtHistory
  newstate = deepcopy(state)
-  newstate[:thoughtHistory][latestThoughtKey] = thoughtDict[latestThoughtKey]
+  newstate[:thoughtHistory][latestThoughtKey] = thoughtDict[:Thought]
-  newstate[:thoughtHistory][latestActionKey] = thoughtDict[latestActionKey]
+  newstate[:thoughtHistory][latestActionKey] = thoughtDict[:Action]
-  latestObservationKey = Symbol("Observation_$(latestActionIndice)")
+  latestObservationKey = Symbol("Observation_$(nextIndice)")
  newstate[:thoughtHistory][latestObservationKey] = response
  newNodeKey = GeneralUtils.uuid4snakecase()
@@ -398,7 +397,7 @@ function runMCTS(
  maxIterations::Integer, 
  w::Float64) where {T1<:agent}
-  root = MCTSNode("root", initialState, 0, 0.0, nothing, Dict{String, MCTSNode}())
+  root = MCTSNode("root", initialState, 0, 0, 0, nothing, Dict{String, MCTSNode}())
  for _ in 1:maxIterations
    node = root
@@ -410,7 +409,7 @@ function runMCTS(
    # from paper, just start simulation at this node. Not the node that newly expanded
    startsim_node = node
-    reward = simulate(a, startsim_node, maxDepth, n=n)
+    reward = simulate(a, startsim_node, decisionMaker, progressValueEstimator, maxDepth, n=n)
    backpropagate(leaf_node, reward)
  end
--- a/test/prompttest.jl
+++ b/test/prompttest.jl
@@ -0,0 +1,159 @@
 using Revise
 using YiemAgent, GeneralUtils, JSON3, DataStructures
 thoughtDict = OrderedDict(
  :Question=> "Hello, I would like a get a bottle of wine",
  :Thought_1=> "The customer wants to buy a bottle of wine, but we need more information about their preferences.",
  :Action_1=> Dict(
      :name=> "chatbox",
      :input=> "What occasion are you buying the wine for?",
    ),
  :Observation_1=> "We are having a wedding pary this weekend.",
  :Thought_2=> "A wedding party is a great occasion to have a good bottle of wine.",
  :Action_2=> Dict(
      :name=> "chatbox",
      :input=> "What type of food will you be serving with the wine?",
    ),
  :Observation_2=> "I think it is Thai dishes",
  :Thought_3=> "Now that I know the occasion and food, I need to ask about the budget.",
  :Action_3=> Dict(
      :name=> "chatbox",
      :input=> "What is your budget for this wine?",
    ),
  :Observation_3=> "50 bucks",
  :Thought_4=> "With a budget of \$50, we have a wide range of options. Now that I know it's a wedding party and Thai dishes, I need to ask about the type of wine they prefer.",
  :Action_4=> Dict(
      :name=> "chatbox",
      :input=> "What type of wine are you looking for? (Red, White, Sparkling, Rose, Dessert, Fortified)",
    ),
  :Observation_4=> "Sparkling please.",
  :Thought_5=> "Now that I know the occasion, food, budget and preferred type of wine, it's time to check our inventory for the best matching wine.",
  :Action_5=> Dict(
      :name=> "winestock",
      :input=> "wine with budget \$50, Thai dishes, sparkling, wedding party",
    ),
  :Observation_5=> "I found the following wine in stock {1 : Zena Crown Vista, 2 : Schrader Cabernet Sauvignon}",
  :Thought_6=> "Now that I have all the information, it's time to recommend a wine that fits their preferences.",
  :Action_6=> Dict(
      :name=> "recommendation",
      :input=> "I recommend Zena Crown Vista for its sparkling and affordable price.",
    ),
  :Observation_6=> "I don't like it. Do you have another option?",
  )
 _thoughtJsonStr = JSON3.write(thoughtDict)
 thoughtJsonStr = _thoughtJsonStr[1:end-1]  # remove } at the end
 # @show thoughtJsonStr
 _, latestThoughtIndice = GeneralUtils.findHighestIndexKey(thoughtDict, "Thought")
 nextThoughtIndice = latestThoughtIndice + 1
 _prompt = 
 """
 You are a helpful sommelier working for a wine store. 
 Your goal is to reccommend the best wine from your inventory that match the user preferences.
 You must follow the following criteria:
 1) Get to know what occasion the user is buying wine for
 2) Get to know what food the user will have with wine
 3) Get to know how much the user willing to spend
 4) Get to know type of wine the user is looking for e.g. Red, White, Sparkling, Rose, Dessert, Fortified
 5) Get to know what characteristics of wine the user is looking for 
    e.g. tannin, sweetness, intensity, acidity
 6) Check your inventory for the best wine that match the user preference
 7) Recommend wine to the user
 You should only respond with interleaving Thought, Action, Observation steps. 
 Thought can reason about the current situation, and Action can be three types:
 1) winestock[query], which you can use to find wine in your inventory. The more input data the better.
 2) chatbox[text], which you can use to interact with the user.
 3) recommendation[answer], which returns your wine reccommendation to the user. 
 You should only respond in JSON format as describe below:
 {
  "Thought": "your reasoning",
  "Action": {"name": "action to take", "input": "Action input"},
  "Observation": "result of the action"
 }
 Here are some examples:
 {
 "Question": "I would like to buy a sedan with 8 seats.",
 "Thought_1": "Our showroom carries various vehicle model. But I'm not sure whether we have a models that fits the user demand, I need to check our inventory.",
 "Action_1": {"name": "inventory", "input": "sedan with 8 seats."},
 "Observation_1": "Several model has 8 seats. Available color are black, red green"
 }
 {
  "Thought_2": "I have to ask the user what color he likes.",
  "Action_2": {"name": "chatbox", "input": "Which color do you like?"}
  "Observation_2": "I'll take black."
 }
 {
  "Thought_3": "There is only one model that fits the user preference. It's Yiem model A",
  "Action_3": {"name": "recommendation", "input": "I recommend a Yiem model A"}
 }
 Let's begin!
 $(JSON3.write(thoughtDict))
 {Thought_$nextThoughtIndice
 """
 prompt =  YiemAgent.formatLLMtext_llama3instruct("system", _prompt)
@show prompt
 msgMeta = Dict(:requestResponse => nothing,
  :msgPurpose => nothing,
  :receiverId => nothing,
  :getPost => nothing,
  :msgId => "4c7111e0-c30e-44c3-8f85-1c8b3f03a8be",
  :acknowledgestatus => nothing,
  :replyToMsgId => nothing,
  :msgFormatVersion => nothing,
  :mqttServerInfo => Dict(:port => 1883, :broker => "mqtt.yiem.cc"),
  :sendTopic => "/loadbalancer/requestingservice",
  :receiverName => "text2textinstruct",
  :replyTopic => nothing,
  :senderName => "decisionMaker",
  :senderSelfnote => nothing,
  :senderId => "testingSessionID",
  :timeStamp => "2024-05-04T08:06:23.561"
  )
 outgoingMsg = Dict(
  :msgMeta=> msgMeta,
  :payload=> Dict(
    :text=> prompt,
  )
 )
 _response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
 thoughtJsonStr = _response[:response][:text]