update
This commit is contained in:
@@ -97,6 +97,18 @@ function decisionMaker(a::T1, state::T2)::Dict{Symbol, Any} where {T1<:agent, T2
|
||||
# (trajectories)
|
||||
# """
|
||||
|
||||
|
||||
"""
|
||||
{
|
||||
"Question": "I would like to buy a sedan.",
|
||||
"Thought_1": "I have many cars in my inventory suitable for several usage scenarios.",
|
||||
"Thought_2": "It would be better if I knew what the user intends to do with his car.",
|
||||
"Thought_3": "I will ask the user what is the intended usecase",
|
||||
"Action_1": {"name": "chatbox", "input": "What will you use it for?"}
|
||||
}
|
||||
"""
|
||||
|
||||
|
||||
_prompt =
|
||||
"""
|
||||
You are a helpful sommelier working for a wine store.
|
||||
@@ -131,19 +143,19 @@ function decisionMaker(a::T1, state::T2)::Dict{Symbol, Any} where {T1<:agent, T2
|
||||
}
|
||||
|
||||
Here are some examples:
|
||||
{
|
||||
"Question": "I would like to buy a sedan.",
|
||||
"Thought_1": "I have many cars in my inventory suitable for several usage scenarios.",
|
||||
"Thought_2": "It would be better if I knew what the user intends to do with his car.",
|
||||
"Thought_3": "I will ask the user what is the intended usecase",
|
||||
"Action_1": {"name": "chatbox", "input": "What will you use it for?"}
|
||||
}
|
||||
{
|
||||
"Question": "I'm looking for a sedan with an automatic driving feature.",
|
||||
"Thought_1": "I have many types of sedans in my inventory, each with diverse features.",
|
||||
"Thought_2": "But there is only 1 car that has the feature customer wanted.",
|
||||
"Action_1": {"name": "finish", "input": "I recommend a Tesla model Y. It has your requested feature and much more."}
|
||||
}
|
||||
{
|
||||
"Question": "I would like to buy a sedan with 8 seats.",
|
||||
"Thought_1": "I have one model that fits the user demand",
|
||||
"Thought_2": "But I'm not sure that we have it in stock.",
|
||||
"Thought_3": "I need to check out inventory first.",
|
||||
"Action_1": {"name": "inventory", "input": "Yiem model A"}
|
||||
}
|
||||
|
||||
$reflect
|
||||
|
||||
|
||||
13
src/mcts.jl
13
src/mcts.jl
@@ -48,7 +48,7 @@ struct MCTSNode{T<:AbstractDict}
|
||||
state::T
|
||||
visits::Integer
|
||||
stateValue::AbstractFloat
|
||||
children::Dict{T, MCTSNode}
|
||||
children::Dict{String, MCTSNode}
|
||||
end
|
||||
|
||||
""" Select a node based on UCT score
|
||||
@@ -121,8 +121,7 @@ function expand(a::T1, node::MCTSNode, state::T2, decisionMaker::Function,
|
||||
newStatekey, newstate = MCTStransition(a, node.state, thoughtDict) #[] Implement your transition function
|
||||
|
||||
if newStatekey ∉ keys(node.children)# BUG should be "key of the newstate" here not newstate itself
|
||||
statetype = typeof(state)
|
||||
node.children[newStatekey] = MCTSNode(newstate, 0, 0.0, Dict{statetype, MCTSNode}())
|
||||
node.children[newStatekey] = MCTSNode(newstate, 0, 0.0, Dict{String, MCTSNode}())
|
||||
end
|
||||
|
||||
# add stateValueEstimator
|
||||
@@ -262,7 +261,7 @@ function MCTStransition(a::T1, state::T2,
|
||||
latestObservationKey = Symbol("Observation_$(latestActionIndice)")
|
||||
newstate[:thoughtHistory][latestObservationKey] = response
|
||||
|
||||
newStatekey = Symbol(GeneralUtils.uuid4snakecase())
|
||||
newStatekey = GeneralUtils.uuid4snakecase()
|
||||
|
||||
return newStatekey, newstate
|
||||
end
|
||||
@@ -370,8 +369,7 @@ function runMCTS(
|
||||
maxIterations::Integer,
|
||||
w::Float64) where {T1<:agent}
|
||||
|
||||
statetype = typeof(initialState)
|
||||
root = MCTSNode(initialState, 0, 0.0, Dict{statetype, MCTSNode}())
|
||||
root = MCTSNode(initialState, 0, 0.0, Dict{String, MCTSNode}())
|
||||
|
||||
for _ in 1:maxIterations
|
||||
node = root
|
||||
@@ -381,7 +379,8 @@ function runMCTS(
|
||||
|
||||
expand(a, node, node.state, decisionMaker, stateValueEstimator, n=n)
|
||||
|
||||
leaf_node = node.children[node.state] # mark leaf node
|
||||
# from paper, just start simulation at this node. Not the node that newly expanded
|
||||
leaf_node = node
|
||||
reward = simulate(leaf_node.state, maxDepth)
|
||||
backpropagate(leaf_node, reward)
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user