update
This commit is contained in:
@@ -442,7 +442,7 @@ function conversation(a::T, userinput::Dict) where {T<:agent}
|
||||
)
|
||||
)
|
||||
bestplan = runMCTS(a, initialState, decisionMaker, progressValueEstimator, reflector,
|
||||
isterminal, 2, 3, 2, 1.0)
|
||||
isterminal, 2, 3, 3, 1.0)
|
||||
error("---> bestplan")
|
||||
|
||||
# actor loop(bestplan)
|
||||
|
||||
@@ -101,7 +101,7 @@ function virtualWineCustomerReccommendbox(a::T1, input::T2)::String where {T1<:a
|
||||
)
|
||||
)
|
||||
@show outgoingMsg
|
||||
result = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
|
||||
result = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=120)
|
||||
response = result[:response][:text]
|
||||
|
||||
return response
|
||||
@@ -157,7 +157,7 @@ function virtualWineCustomerChatbox(a::T1, input::T2)::String where {T1<:agent,
|
||||
)
|
||||
)
|
||||
@show outgoingMsg
|
||||
result = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
|
||||
result = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=120)
|
||||
response = result[:response][:text]
|
||||
|
||||
return response
|
||||
@@ -291,7 +291,7 @@ function jsoncorrection(a::T1, input::T2,
|
||||
)
|
||||
)
|
||||
)
|
||||
result = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
|
||||
result = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=120)
|
||||
incorrectjson = result[:response][:text]
|
||||
end
|
||||
else
|
||||
|
||||
11
src/mcts.jl
11
src/mcts.jl
@@ -348,8 +348,9 @@ function selectChildNode(node::MCTSNode)::MCTSNode
|
||||
|
||||
# loop thought node children dictionary to find the highest progress value
|
||||
for (k, childNode) in node.children
|
||||
thisNodeProgressValue = childNode.statevalue + childNode.reward
|
||||
if childNode.statevalue > highestProgressValue
|
||||
highestProgressValue = childNode.statevalue + childNode.reward
|
||||
highestProgressValue = thisNodeProgressValue
|
||||
nodekey = childNode.nodekey
|
||||
end
|
||||
end
|
||||
@@ -443,10 +444,10 @@ function runMCTS(
|
||||
|
||||
expand(a, node, decisionMaker, progressValueEstimator, isterminal, n=n)
|
||||
|
||||
leaf_node = UCTselect(node, w)
|
||||
simTrajectoryReward = simulate(a, leaf_node, decisionMaker, progressValueEstimator,
|
||||
isterminal, maxDepth, n=n)
|
||||
backpropagate(leaf_node, simTrajectoryReward)
|
||||
leafNode = UCTselect(node, w)
|
||||
simTrajectoryReward = simulate(a, leafNode, decisionMaker, progressValueEstimator,
|
||||
isterminal, maxDepth, n=n)
|
||||
backpropagate(leafNode, simTrajectoryReward)
|
||||
end
|
||||
|
||||
best_child_state = argmax([child.total_reward / child.visits for child in values(root.children)])
|
||||
|
||||
Reference in New Issue
Block a user