update
This commit is contained in:
@@ -103,52 +103,50 @@ function decisionMaker(a::T1, state::T2)::Dict{Symbol, Any} where {T1<:agent, T2
|
|||||||
You are a helpful sommelier working for a wine store.
|
You are a helpful sommelier working for a wine store.
|
||||||
Your goal is to reccommend the best wine from your inventory that match the user preferences.
|
Your goal is to reccommend the best wine from your inventory that match the user preferences.
|
||||||
|
|
||||||
$customerinfo
|
|
||||||
|
|
||||||
You must follow the following criteria:
|
You must follow the following criteria:
|
||||||
1) Get to know what occasion the user is buying wine for
|
1) Get to know what occasion the user is buying wine for
|
||||||
2) Get to know what food the user will have with wine
|
2) Get to know what food the user will have with wine
|
||||||
3) Get to know how much the user willing to spend
|
3) Get to know how much the user willing to spend
|
||||||
4) Get to know type of wine the user is looking for
|
4) Get to know type of wine the user is looking for e.g. Red, White, Sparkling, Rose, Dessert, Fortified
|
||||||
e.g. Red, White, Sparkling, Rose, Dessert, Fortified
|
5) Get to know what characteristics of wine the user is looking for
|
||||||
5) Get to know what wine characteristics the user is looking for
|
e.g. tannin, sweetness, intensity, acidity
|
||||||
e.g. tannin, sweetness, intensity, acidity
|
|
||||||
6) Check your inventory for the best wine that match the user preference
|
6) Check your inventory for the best wine that match the user preference
|
||||||
|
7) Recommend wine to the user
|
||||||
|
|
||||||
You should only respond with interleaving step-by-step Thought, Action, Observation steps.
|
You should only respond with interleaving Thought, Action, Observation steps.
|
||||||
Thought can reason about the current situation, and Action can be three types:
|
Thought can reason about the current situation, and Action can be three types:
|
||||||
1) winestock[query], which you can use to find wine in your inventory.
|
1) winestock[query], which you can use to find wine in your inventory. The more input data the better.
|
||||||
2) chatbox[text], which you can use to interact with the user.
|
2) chatbox[text], which you can use to interact with the user.
|
||||||
3) finish[answer], which returns your wine reccommendation to the user.
|
3) recommendation[answer], which returns your wine reccommendation to the user.
|
||||||
|
|
||||||
You should only respond in JSON format as describe below:
|
You should only respond in JSON format as describe below:
|
||||||
{
|
{
|
||||||
"Thought_1": "reasoning 1",
|
"Thought": "your reasoning",
|
||||||
"Thought_2": "reasoning 2",
|
"Action": {"name": "action to take", "input": "Action input"},
|
||||||
...
|
"Observation": "result of the action"
|
||||||
"Thought_n": "reasoning n",
|
|
||||||
"Action_1": {"name": "action to take", "input": "Action input"},
|
|
||||||
"Observation_1": "result of the action"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Here are some examples:
|
Here are some examples:
|
||||||
{
|
{
|
||||||
"Question": "I'm looking for a sedan with an automatic driving feature.",
|
"Question": "I would like to buy a sedan with 8 seats.",
|
||||||
"Thought_1": "I have many types of sedans in my inventory, each with diverse features.",
|
"Thought_1": "Our showroom carries various vehicle model. But I'm not sure whether we have a models that fits the user demand, I need to check our inventory.",
|
||||||
"Thought_2": "But there is only 1 car that has the feature customer wanted.",
|
"Action_1": {"name": "inventory", "input": "sedan with 8 seats."},
|
||||||
"Action_1": {"name": "finish", "input": "I recommend a Tesla model Y. It has your requested feature and much more."}
|
"Observation_1": "Several model has 8 seats. Available color are black, red green"
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
"Question": "I would like to buy a sedan with 8 seats.",
|
"Thought_2": "I have to ask the user what color he likes.",
|
||||||
"Thought_1": "I have one model that fits the user demand",
|
"Action_2": {"name": "chatbox", "input": "Which color do you like?"}
|
||||||
"Thought_2": "But I'm not sure that we have it in stock.",
|
"Observation_2": "I'll take black."
|
||||||
"Thought_3": "I need to check out inventory first.",
|
}
|
||||||
"Action_1": {"name": "inventory", "input": "Yiem model A"}
|
{
|
||||||
|
"Thought_3": "There is only one model that fits the user preference. It's Yiem model A",
|
||||||
|
"Action_3": {"name": "recommendation", "input": "I recommend a Yiem model A"}
|
||||||
}
|
}
|
||||||
|
|
||||||
$reflect
|
Let's begin!
|
||||||
|
|
||||||
$(JSON3.write(state[:thoughtHistory]))
|
$(JSON3.write(state[:thoughtHistory]))
|
||||||
|
{Thought
|
||||||
"""
|
"""
|
||||||
|
|
||||||
prompt = formatLLMtext_llama3instruct("system", _prompt)
|
prompt = formatLLMtext_llama3instruct("system", _prompt)
|
||||||
@@ -168,7 +166,7 @@ function decisionMaker(a::T1, state::T2)::Dict{Symbol, Any} where {T1<:agent, T2
|
|||||||
:text=> prompt,
|
:text=> prompt,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
@show outgoingMsg
|
||||||
_response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
|
_response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
|
||||||
thoughtJsonStr = _response[:response][:text]
|
thoughtJsonStr = _response[:response][:text]
|
||||||
thoughtDict = copy(JSON3.read(thoughtJsonStr))
|
thoughtDict = copy(JSON3.read(thoughtJsonStr))
|
||||||
|
|||||||
55
src/mcts.jl
55
src/mcts.jl
@@ -52,6 +52,7 @@ struct MCTSNode{T<:AbstractDict}
|
|||||||
state::T
|
state::T
|
||||||
visits::Integer
|
visits::Integer
|
||||||
progressValue::Number
|
progressValue::Number
|
||||||
|
reward::Number
|
||||||
parent::Union{MCTSNode, Nothing}
|
parent::Union{MCTSNode, Nothing}
|
||||||
children::Dict{String, MCTSNode}
|
children::Dict{String, MCTSNode}
|
||||||
end
|
end
|
||||||
@@ -114,10 +115,13 @@ end
|
|||||||
julia>
|
julia>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
# TODO
|
||||||
|
- [] update docstring
|
||||||
|
|
||||||
# Signature
|
# Signature
|
||||||
"""
|
"""
|
||||||
function expand(a::T1, node::MCTSNode, decisionMaker::Function,
|
function expand(a::T1, node::MCTSNode, decisionMaker::Function,
|
||||||
progressValueEstimator::Function; n::Integer=3) where {T1<:agent, T2<:AbstractDict}
|
progressValueEstimator::Function; n::Integer=3) where {T1<:agent}
|
||||||
|
|
||||||
# sampling action from decisionMaker
|
# sampling action from decisionMaker
|
||||||
for sample in 1:n
|
for sample in 1:n
|
||||||
@@ -127,12 +131,13 @@ function expand(a::T1, node::MCTSNode, decisionMaker::Function,
|
|||||||
newNodeKey, newstate = MCTStransition(a, node.state, thoughtDict) #[] Implement your transition function
|
newNodeKey, newstate = MCTStransition(a, node.state, thoughtDict) #[] Implement your transition function
|
||||||
|
|
||||||
# add progressValueEstimator
|
# add progressValueEstimator
|
||||||
_, progressValue = progressValueEstimator(a, newstate)
|
progressRationale, progressValue = progressValueEstimator(a, newstate)
|
||||||
|
|
||||||
#[WORKING] check for terminal state
|
#[WORKING] check for terminal state
|
||||||
|
|
||||||
|
|
||||||
if newNodeKey ∉ keys(node.children)
|
if newNodeKey ∉ keys(node.children)
|
||||||
node.children[newNodeKey] = MCTSNode(newNodeKey, newstate, 0, progressValue,
|
node.children[newNodeKey] = MCTSNode(newNodeKey, newstate, 0, progressValue, 0,
|
||||||
node, Dict{String, MCTSNode}())
|
node, Dict{String, MCTSNode}())
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -152,31 +157,19 @@ julia>
|
|||||||
# TODO
|
# TODO
|
||||||
- [] update docstring
|
- [] update docstring
|
||||||
- [WORKING] implement the function
|
- [WORKING] implement the function
|
||||||
- [] reward only comes at terminal state
|
- [] [] check for the terminal state (node.reward != 0), break if it is terminal state
|
||||||
|
|
||||||
# Signature
|
# Signature
|
||||||
"""
|
"""
|
||||||
function simulate(a, node::MCTSNode, max_depth::Int; n=3)
|
function simulate(a, node::MCTSNode, decisionMaker, progressValueEstimator, max_depth::Int; n=3)
|
||||||
|
|
||||||
total_reward = 0.0
|
|
||||||
for _ in 1:max_depth
|
for _ in 1:max_depth
|
||||||
node = selectChildNode(node)
|
node = selectChildNode(node)
|
||||||
expand(a, node, decisionMaker, progressValueEstimator, n=n)
|
expand(a, node, decisionMaker, progressValueEstimator, n=n)
|
||||||
|
|
||||||
|
# if isterminal (use for loop over node to look for childNode.reward != 0)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# #[] Implement your action selection function based on highest stateValue
|
|
||||||
# action = select_action(state) # current state
|
|
||||||
# state, reward = transition(state, action) # Implement transition function to a new state
|
|
||||||
|
|
||||||
# #[] check for the terminal state, break if it is terminal state
|
|
||||||
# if isterminal
|
|
||||||
|
|
||||||
total_reward += reward
|
|
||||||
end
|
end
|
||||||
error("--> simulate")
|
error("--> simulate")
|
||||||
return total_reward
|
return total_reward
|
||||||
@@ -254,11 +247,12 @@ julia> thoughtDict = Dict(
|
|||||||
"""
|
"""
|
||||||
function MCTStransition(a::T1, state::T2,
|
function MCTStransition(a::T1, state::T2,
|
||||||
thoughtDict::T3)::Tuple{String, Dict{Symbol, Any}} where {T1<:agent, T2<:AbstractDict, T3<:AbstractDict}
|
thoughtDict::T3)::Tuple{String, Dict{Symbol, Any}} where {T1<:agent, T2<:AbstractDict, T3<:AbstractDict}
|
||||||
latestThoughtKey, _ = GeneralUtils.findHighestIndexKey(thoughtDict, "Thought")
|
println("")
|
||||||
latestActionKey, latestActionIndice = GeneralUtils.findHighestIndexKey(thoughtDict, "Action")
|
# latestThoughtKey, _ = GeneralUtils.findHighestIndexKey(thoughtDict, "Thought")
|
||||||
_action = thoughtDict[latestActionKey]
|
# latestActionKey, latestActionIndice = GeneralUtils.findHighestIndexKey(thoughtDict, "Action")
|
||||||
actionname = _action[:name]
|
# _action = thoughtDict[:Action]
|
||||||
actioninput = _action[:input]
|
actionname = thoughtDict[:Action][:name]
|
||||||
|
actioninput = thoughtDict[:Action][:input]
|
||||||
|
|
||||||
# map action and input() to llm function
|
# map action and input() to llm function
|
||||||
response =
|
response =
|
||||||
@@ -272,11 +266,16 @@ function MCTStransition(a::T1, state::T2,
|
|||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
_, latestThoughtIndice = GeneralUtils.findHighestIndexKey(state[:thoughtHistory], "Thought")
|
||||||
|
nextIndice = latestThoughtIndice === nothing ? 1 : latestThoughtIndice + 1
|
||||||
|
latestThoughtKey = Symbol("Thought_$nextIndice")
|
||||||
|
latestActionKey = Symbol("Action_$nextIndice")
|
||||||
|
|
||||||
# add Thought, action, observation to thoughtHistory
|
# add Thought, action, observation to thoughtHistory
|
||||||
newstate = deepcopy(state)
|
newstate = deepcopy(state)
|
||||||
newstate[:thoughtHistory][latestThoughtKey] = thoughtDict[latestThoughtKey]
|
newstate[:thoughtHistory][latestThoughtKey] = thoughtDict[:Thought]
|
||||||
newstate[:thoughtHistory][latestActionKey] = thoughtDict[latestActionKey]
|
newstate[:thoughtHistory][latestActionKey] = thoughtDict[:Action]
|
||||||
latestObservationKey = Symbol("Observation_$(latestActionIndice)")
|
latestObservationKey = Symbol("Observation_$(nextIndice)")
|
||||||
newstate[:thoughtHistory][latestObservationKey] = response
|
newstate[:thoughtHistory][latestObservationKey] = response
|
||||||
|
|
||||||
newNodeKey = GeneralUtils.uuid4snakecase()
|
newNodeKey = GeneralUtils.uuid4snakecase()
|
||||||
@@ -398,7 +397,7 @@ function runMCTS(
|
|||||||
maxIterations::Integer,
|
maxIterations::Integer,
|
||||||
w::Float64) where {T1<:agent}
|
w::Float64) where {T1<:agent}
|
||||||
|
|
||||||
root = MCTSNode("root", initialState, 0, 0.0, nothing, Dict{String, MCTSNode}())
|
root = MCTSNode("root", initialState, 0, 0, 0, nothing, Dict{String, MCTSNode}())
|
||||||
|
|
||||||
for _ in 1:maxIterations
|
for _ in 1:maxIterations
|
||||||
node = root
|
node = root
|
||||||
@@ -410,7 +409,7 @@ function runMCTS(
|
|||||||
|
|
||||||
# from paper, just start simulation at this node. Not the node that newly expanded
|
# from paper, just start simulation at this node. Not the node that newly expanded
|
||||||
startsim_node = node
|
startsim_node = node
|
||||||
reward = simulate(a, startsim_node, maxDepth, n=n)
|
reward = simulate(a, startsim_node, decisionMaker, progressValueEstimator, maxDepth, n=n)
|
||||||
backpropagate(leaf_node, reward)
|
backpropagate(leaf_node, reward)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
159
test/prompttest.jl
Normal file
159
test/prompttest.jl
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
using Revise
|
||||||
|
using YiemAgent, GeneralUtils, JSON3, DataStructures
|
||||||
|
|
||||||
|
thoughtDict = OrderedDict(
|
||||||
|
:Question=> "Hello, I would like a get a bottle of wine",
|
||||||
|
:Thought_1=> "The customer wants to buy a bottle of wine, but we need more information about their preferences.",
|
||||||
|
:Action_1=> Dict(
|
||||||
|
:name=> "chatbox",
|
||||||
|
:input=> "What occasion are you buying the wine for?",
|
||||||
|
),
|
||||||
|
:Observation_1=> "We are having a wedding pary this weekend.",
|
||||||
|
|
||||||
|
:Thought_2=> "A wedding party is a great occasion to have a good bottle of wine.",
|
||||||
|
:Action_2=> Dict(
|
||||||
|
:name=> "chatbox",
|
||||||
|
:input=> "What type of food will you be serving with the wine?",
|
||||||
|
),
|
||||||
|
:Observation_2=> "I think it is Thai dishes",
|
||||||
|
|
||||||
|
:Thought_3=> "Now that I know the occasion and food, I need to ask about the budget.",
|
||||||
|
:Action_3=> Dict(
|
||||||
|
:name=> "chatbox",
|
||||||
|
:input=> "What is your budget for this wine?",
|
||||||
|
),
|
||||||
|
:Observation_3=> "50 bucks",
|
||||||
|
|
||||||
|
:Thought_4=> "With a budget of \$50, we have a wide range of options. Now that I know it's a wedding party and Thai dishes, I need to ask about the type of wine they prefer.",
|
||||||
|
:Action_4=> Dict(
|
||||||
|
:name=> "chatbox",
|
||||||
|
:input=> "What type of wine are you looking for? (Red, White, Sparkling, Rose, Dessert, Fortified)",
|
||||||
|
),
|
||||||
|
:Observation_4=> "Sparkling please.",
|
||||||
|
|
||||||
|
:Thought_5=> "Now that I know the occasion, food, budget and preferred type of wine, it's time to check our inventory for the best matching wine.",
|
||||||
|
:Action_5=> Dict(
|
||||||
|
:name=> "winestock",
|
||||||
|
:input=> "wine with budget \$50, Thai dishes, sparkling, wedding party",
|
||||||
|
),
|
||||||
|
:Observation_5=> "I found the following wine in stock {1 : Zena Crown Vista, 2 : Schrader Cabernet Sauvignon}",
|
||||||
|
|
||||||
|
:Thought_6=> "Now that I have all the information, it's time to recommend a wine that fits their preferences.",
|
||||||
|
:Action_6=> Dict(
|
||||||
|
:name=> "recommendation",
|
||||||
|
:input=> "I recommend Zena Crown Vista for its sparkling and affordable price.",
|
||||||
|
),
|
||||||
|
:Observation_6=> "I don't like it. Do you have another option?",
|
||||||
|
)
|
||||||
|
|
||||||
|
_thoughtJsonStr = JSON3.write(thoughtDict)
|
||||||
|
thoughtJsonStr = _thoughtJsonStr[1:end-1] # remove } at the end
|
||||||
|
# @show thoughtJsonStr
|
||||||
|
|
||||||
|
_, latestThoughtIndice = GeneralUtils.findHighestIndexKey(thoughtDict, "Thought")
|
||||||
|
nextThoughtIndice = latestThoughtIndice + 1
|
||||||
|
|
||||||
|
_prompt =
|
||||||
|
"""
|
||||||
|
You are a helpful sommelier working for a wine store.
|
||||||
|
Your goal is to reccommend the best wine from your inventory that match the user preferences.
|
||||||
|
|
||||||
|
You must follow the following criteria:
|
||||||
|
1) Get to know what occasion the user is buying wine for
|
||||||
|
2) Get to know what food the user will have with wine
|
||||||
|
3) Get to know how much the user willing to spend
|
||||||
|
4) Get to know type of wine the user is looking for e.g. Red, White, Sparkling, Rose, Dessert, Fortified
|
||||||
|
5) Get to know what characteristics of wine the user is looking for
|
||||||
|
e.g. tannin, sweetness, intensity, acidity
|
||||||
|
6) Check your inventory for the best wine that match the user preference
|
||||||
|
7) Recommend wine to the user
|
||||||
|
|
||||||
|
You should only respond with interleaving Thought, Action, Observation steps.
|
||||||
|
Thought can reason about the current situation, and Action can be three types:
|
||||||
|
1) winestock[query], which you can use to find wine in your inventory. The more input data the better.
|
||||||
|
2) chatbox[text], which you can use to interact with the user.
|
||||||
|
3) recommendation[answer], which returns your wine reccommendation to the user.
|
||||||
|
|
||||||
|
You should only respond in JSON format as describe below:
|
||||||
|
{
|
||||||
|
"Thought": "your reasoning",
|
||||||
|
"Action": {"name": "action to take", "input": "Action input"},
|
||||||
|
"Observation": "result of the action"
|
||||||
|
}
|
||||||
|
|
||||||
|
Here are some examples:
|
||||||
|
{
|
||||||
|
"Question": "I would like to buy a sedan with 8 seats.",
|
||||||
|
"Thought_1": "Our showroom carries various vehicle model. But I'm not sure whether we have a models that fits the user demand, I need to check our inventory.",
|
||||||
|
"Action_1": {"name": "inventory", "input": "sedan with 8 seats."},
|
||||||
|
"Observation_1": "Several model has 8 seats. Available color are black, red green"
|
||||||
|
}
|
||||||
|
{
|
||||||
|
"Thought_2": "I have to ask the user what color he likes.",
|
||||||
|
"Action_2": {"name": "chatbox", "input": "Which color do you like?"}
|
||||||
|
"Observation_2": "I'll take black."
|
||||||
|
}
|
||||||
|
{
|
||||||
|
"Thought_3": "There is only one model that fits the user preference. It's Yiem model A",
|
||||||
|
"Action_3": {"name": "recommendation", "input": "I recommend a Yiem model A"}
|
||||||
|
}
|
||||||
|
|
||||||
|
Let's begin!
|
||||||
|
|
||||||
|
$(JSON3.write(thoughtDict))
|
||||||
|
{Thought_$nextThoughtIndice
|
||||||
|
"""
|
||||||
|
|
||||||
|
prompt = YiemAgent.formatLLMtext_llama3instruct("system", _prompt)
|
||||||
|
@show prompt
|
||||||
|
msgMeta = Dict(:requestResponse => nothing,
|
||||||
|
:msgPurpose => nothing,
|
||||||
|
:receiverId => nothing,
|
||||||
|
:getPost => nothing,
|
||||||
|
:msgId => "4c7111e0-c30e-44c3-8f85-1c8b3f03a8be",
|
||||||
|
:acknowledgestatus => nothing,
|
||||||
|
:replyToMsgId => nothing,
|
||||||
|
:msgFormatVersion => nothing,
|
||||||
|
:mqttServerInfo => Dict(:port => 1883, :broker => "mqtt.yiem.cc"),
|
||||||
|
:sendTopic => "/loadbalancer/requestingservice",
|
||||||
|
:receiverName => "text2textinstruct",
|
||||||
|
:replyTopic => nothing,
|
||||||
|
:senderName => "decisionMaker",
|
||||||
|
:senderSelfnote => nothing,
|
||||||
|
:senderId => "testingSessionID",
|
||||||
|
:timeStamp => "2024-05-04T08:06:23.561"
|
||||||
|
)
|
||||||
|
|
||||||
|
outgoingMsg = Dict(
|
||||||
|
:msgMeta=> msgMeta,
|
||||||
|
:payload=> Dict(
|
||||||
|
:text=> prompt,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
_response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
|
||||||
|
thoughtJsonStr = _response[:response][:text]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user