diff --git a/src/interface.jl b/src/interface.jl index cbb81c7..3e29722 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -97,6 +97,18 @@ function decisionMaker(a::T1, state::T2)::Dict{Symbol, Any} where {T1<:agent, T2 # (trajectories) # """ + + """ + { + "Question": "I would like to buy a sedan.", + "Thought_1": "I have many cars in my inventory suitable for several usage scenarios.", + "Thought_2": "It would be better if I knew what the user intends to do with his car.", + "Thought_3": "I will ask the user what is the intended usecase", + "Action_1": {"name": "chatbox", "input": "What will you use it for?"} + } + """ + + _prompt = """ You are a helpful sommelier working for a wine store. @@ -131,19 +143,19 @@ function decisionMaker(a::T1, state::T2)::Dict{Symbol, Any} where {T1<:agent, T2 } Here are some examples: - { - "Question": "I would like to buy a sedan.", - "Thought_1": "I have many cars in my inventory suitable for several usage scenarios.", - "Thought_2": "It would be better if I knew what the user intends to do with his car.", - "Thought_3": "I will ask the user what is the intended usecase", - "Action_1": {"name": "chatbox", "input": "What will you use it for?"} - } { "Question": "I'm looking for a sedan with an automatic driving feature.", "Thought_1": "I have many types of sedans in my inventory, each with diverse features.", "Thought_2": "But there is only 1 car that has the feature customer wanted.", "Action_1": {"name": "finish", "input": "I recommend a Tesla model Y. It has your requested feature and much more."} } + { + "Question": "I would like to buy a sedan with 8 seats.", + "Thought_1": "I have one model that fits the user demand", + "Thought_2": "But I'm not sure that we have it in stock.", + "Thought_3": "I need to check out inventory first.", + "Action_1": {"name": "inventory", "input": "Yiem model A"} + } $reflect diff --git a/src/mcts.jl b/src/mcts.jl index 2bc88aa..aa89129 100644 --- a/src/mcts.jl +++ b/src/mcts.jl @@ -48,7 +48,7 @@ struct MCTSNode{T<:AbstractDict} state::T visits::Integer stateValue::AbstractFloat - children::Dict{T, MCTSNode} + children::Dict{String, MCTSNode} end """ Select a node based on UCT score @@ -121,8 +121,7 @@ function expand(a::T1, node::MCTSNode, state::T2, decisionMaker::Function, newStatekey, newstate = MCTStransition(a, node.state, thoughtDict) #[] Implement your transition function if newStatekey ∉ keys(node.children)# BUG should be "key of the newstate" here not newstate itself - statetype = typeof(state) - node.children[newStatekey] = MCTSNode(newstate, 0, 0.0, Dict{statetype, MCTSNode}()) + node.children[newStatekey] = MCTSNode(newstate, 0, 0.0, Dict{String, MCTSNode}()) end # add stateValueEstimator @@ -262,7 +261,7 @@ function MCTStransition(a::T1, state::T2, latestObservationKey = Symbol("Observation_$(latestActionIndice)") newstate[:thoughtHistory][latestObservationKey] = response - newStatekey = Symbol(GeneralUtils.uuid4snakecase()) + newStatekey = GeneralUtils.uuid4snakecase() return newStatekey, newstate end @@ -370,8 +369,7 @@ function runMCTS( maxIterations::Integer, w::Float64) where {T1<:agent} - statetype = typeof(initialState) - root = MCTSNode(initialState, 0, 0.0, Dict{statetype, MCTSNode}()) + root = MCTSNode(initialState, 0, 0.0, Dict{String, MCTSNode}()) for _ in 1:maxIterations node = root @@ -381,7 +379,8 @@ function runMCTS( expand(a, node, node.state, decisionMaker, stateValueEstimator, n=n) - leaf_node = node.children[node.state] # mark leaf node + # from paper, just start simulation at this node. Not the node that newly expanded + leaf_node = node reward = simulate(leaf_node.state, maxDepth) backpropagate(leaf_node, reward) end