From db42a55e00a1f93e385de0d320f8013097168e52 Mon Sep 17 00:00:00 2001 From: narawat lamaiin Date: Sat, 27 Apr 2024 17:44:55 +0700 Subject: [PATCH] update --- src/interface.jl | 93 +++++++++++++++++++++++++++++++++++++++++++----- src/mcts.jl | 51 ++++++++++---------------- src/util.jl | 12 +++---- 3 files changed, 108 insertions(+), 48 deletions(-) diff --git a/src/interface.jl b/src/interface.jl index 74b685c..ed19a5c 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -1,6 +1,6 @@ module interface -export addNewMessage, conversation, decisionMaker +export addNewMessage, conversation, decisionMaker, isterminal using JSON3, DataStructures, Dates, UUIDs, HTTP, Random, MQTTClient using GeneralUtils @@ -63,25 +63,70 @@ using ..type, ..util, ..llmfunction, ..mcts function decisionMaker(a::T1, state::T2) where {T1<:agent, T2<:AbstractDict} customerinfo = """ - I will give you the following information about customer: - $(JSON3.write(state[:customerinfo])) + I will give you the following information about customer: + $(JSON3.write(state[:customerinfo])) """ storeinfo = """ - I will give you the following information about your store: - $(JSON3.write(state[:storeinfo])) + I will give you the following information about your store: + $(JSON3.write(state[:storeinfo])) """ + reflect = "" + # """ + # You have attempted to answer the following question before and failed. The following + # reflection(s) give a plan to avoid failing to answer the question in the same way you did + # previously. Use them to improve your strategy of correctly answering the given question. + # (trajectories) + # """ + + #[WORKING] _prompt = """ - You are a helpful sommelier working for a wine store. - You helps users by searching wine that match the user preferences from your inventory. + You are a helpful sommelier working for a wine store. + Your goal is to reccommend the best wine from your inventory that match the user preferences. - $customerinfo + $customerinfo - You must follow the + You must follow the following criteria: + 1) Get to know what occasion the user is buying wine for + 2) Get to know what food the user will have with wine + 3) Get to know how much the user willing to spend + 4) Get to know type of wine the user is looking for + e.g. Red, White, Sparkling, Rose, Dessert, Fortified + 5) Get to know what wine characteristics the user is looking for + e.g. tannin, sweetness, intensity, acidity + 6) Check your inventory for the best wine that match the user preference + + You should only respond with interleaving step-by-step Thought, Action, Observation steps. + Thought can reason about the current situation, and Action can be three types: + 1) Chatbox[text], which you can use to interact with the user. + 2) Winestock[query], which you can use to find wine in your inventory. + 3) Finish[answer], which returns your wine reccommendation to the user. + + You should only respond in JSON format as describe below: + { + "Thought_1": "reasoning", + "Thought_2": "reasoning", + ... + "Thought_n": "reasoning", + "Action_1": "action to take", + "Observation_1": "result of the action" + } + + Here are some examples: + { + "Question": "I would like to buy a sedan", + "Thought_1": "I have many car in my inventory suitable for several usage scenarios", + "Thought_2": "It would be better if I know what the user intend to do with his car", + "Thought_3": " + } + + $reflect + + $(JSON3.write(state[:thoughtHistory])) """ prompt = formatLLMtext_llama3instruct("system", _prompt) @@ -89,6 +134,7 @@ function decisionMaker(a::T1, state::T2) where {T1<:agent, T2<:AbstractDict} thought = iterativeprompting(a, prompt, syntaxcheck_json) + error("--> decisionMaker") return thought end @@ -148,6 +194,35 @@ function reflector() end +""" + + Arguments\n + ----- + + Return\n + ----- + + Example\n + ----- + ```jldoctest + julia> + ``` + + TODO\n + ----- + [] update docstring + [] implement the function + [] implement RAG to pull similar experience + + Signature\n + ----- +""" +function isterminal() + +end + + + """ Chat with llm. diff --git a/src/mcts.jl b/src/mcts.jl index 1f5e8a3..7a57a6c 100644 --- a/src/mcts.jl +++ b/src/mcts.jl @@ -264,36 +264,9 @@ end Signature\n ----- """ -isLeaf(node::MCTSNode)::Bool = isempty(node.children) +isleaf(node::MCTSNode)::Bool = isempty(node.children) -""" - - Arguments\n - ----- - - Return\n - ----- - - Example\n - ----- - ```jldoctest - julia> - ``` - - TODO\n - ----- - [] update docstring - [] implement the function - [] implement RAG to pull similar experience - - Signature\n - ----- -""" -function isTerminal() - -end - """ Arguments\n @@ -324,10 +297,12 @@ end # ------------------------------------------------------------------------------------------------ # # Create a complete example using the defined MCTS functions # # ------------------------------------------------------------------------------------------------ # -""" Search for best action +""" Search the best action to take for a given state and task Arguments\n ----- + a::agent + one of Yiem's agents initial state initial state decisionMaker::Function @@ -336,6 +311,8 @@ end assess the value of the state reflector::Function generate lesson from trajectory and reward + isterminal::Function + determine whether a given state is a terminal state n::Integer how many times action will be sampled from decisionMaker w::Float64 @@ -359,15 +336,23 @@ end Signature\n ----- """ -function runMCTS(a::T, initialState, decisionMaker::Function, stateValueEstimator::Function, - reflector::Function, n::Integer, maxDepth::Integer, - maxIterations::Integer, w::Float64) where {T<:agent} +function runMCTS( + a::T1, + initialState, + decisionMaker::Function, + stateValueEstimator::Function, + reflector::Function, + isterminal::Function, + n::Integer, + maxDepth::Integer, + maxIterations::Integer, w::Float64) where {T1<:agent} + statetype = typeof(initialState) root = MCTSNode(initialState, 0, 0.0, Dict{statetype, MCTSNode}()) for _ in 1:maxIterations node = root - while !isLeaf(node) + while !isleaf(node) node = select(node, w) end diff --git a/src/util.jl b/src/util.jl index 972e85c..8b79911 100644 --- a/src/util.jl +++ b/src/util.jl @@ -307,8 +307,9 @@ function iterativeprompting(a::T, prompt::String, verification::Function) where ) ) - result = nothing success = nothing + result = nothing + critique = "" # iteration loop while true @@ -316,20 +317,19 @@ function iterativeprompting(a::T, prompt::String, verification::Function) where response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg) error("--> iterativeprompting") # check for correctness and get feedback - success, critique = verification(response) + success, _critique = verification(response) if success result = response break else # add critique to prompt + critique *= _critique * "\n" + replace!(prompt, "Critique: ..." => "Critique: $critique") end - - - end - return (success=sucess, result=response) + return (success=success, result=result) end