update
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
module interface
|
||||
|
||||
export addNewMessage, conversation, decisionMaker, progressValueEstimator, reflector
|
||||
export addNewMessage, conversation, decisionMaker, evaluator, reflector
|
||||
# isterminal,
|
||||
|
||||
using JSON3, DataStructures, Dates, UUIDs, HTTP, Random, MQTTClient, PrettyPrinting
|
||||
@@ -264,7 +264,7 @@ julia>
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where {T1<:agent, T2<:AbstractDict}
|
||||
function evaluator(a::T1, state::T2)::Tuple{String, Integer} where {T1<:agent, T2<:AbstractDict}
|
||||
|
||||
_prompt =
|
||||
"""
|
||||
@@ -279,7 +279,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
|
||||
analysis in detail. Focus on the latest thought, action, and observation. Incomplete trajectories
|
||||
can be correct if the thoughts and actions so far are correct, even if the answer is not found
|
||||
yet. Do not generate additional thoughts or actions. Then ending with the correctness score s
|
||||
where s is an integer from 1 to 10.
|
||||
where s is an integer from 0 to 10.
|
||||
|
||||
You should only respond in JSON format as describe below:
|
||||
{"evaluation": "your evaluation", "score": "your evaluation score"}
|
||||
@@ -295,7 +295,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
|
||||
}
|
||||
{"evaluation": "This trajectory is correct as it is reasonable to check an inventory for info provided in the question.
|
||||
It is also better to have simple searches corresponding to a single entity, making this the best action.",
|
||||
"score": 10
|
||||
"score": 7
|
||||
}
|
||||
|
||||
{
|
||||
@@ -309,7 +309,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
|
||||
}
|
||||
{"evaluation": "This trajectory is incorrect as my search term should be related to a 4-colors pen with a pencil in it,
|
||||
not a pen and a pencil seperately. A better search term should have been a 4-colors pen with a pencil, all-in-one.",
|
||||
"score": 2
|
||||
"score": 3
|
||||
}
|
||||
|
||||
Let's begin!:
|
||||
@@ -329,7 +329,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
|
||||
|
||||
msgMeta = GeneralUtils.generate_msgMeta(
|
||||
a.config[:externalservice][:text2textinstruct][:mqtttopic],
|
||||
senderName= "progressValueEstimator",
|
||||
senderName= "evaluator",
|
||||
senderId= a.id,
|
||||
receiverName= "text2textinstruct",
|
||||
mqttBroker= a.config[:mqttServerInfo][:broker],
|
||||
@@ -374,7 +374,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
|
||||
println("")
|
||||
end
|
||||
end
|
||||
error("progressValueEstimator failed to generate an evaluation")
|
||||
error("evaluator failed to generate an evaluation")
|
||||
end
|
||||
|
||||
|
||||
@@ -640,7 +640,7 @@ function conversation(a::T, userinput::Dict) where {T<:agent}
|
||||
:question=> userinput[:text],
|
||||
)
|
||||
)
|
||||
bestplan = runMCTS(a, initialState, decisionMaker, progressValueEstimator, reflector,
|
||||
bestplan = runMCTS(a, initialState, decisionMaker, evaluator, reflector,
|
||||
2, 3, 4, 1.0)
|
||||
error("---> bestplan")
|
||||
|
||||
|
||||
46
src/mcts.jl
46
src/mcts.jl
@@ -99,27 +99,6 @@ function UCTselect(node::MCTSNode, w::T)::MCTSNode where {T<:AbstractFloat}
|
||||
|
||||
return selectedNode
|
||||
end
|
||||
# function UCTselect(node::MCTSNode, w::T)::MCTSNode where {T<:AbstractFloat}
|
||||
# max_uct = -Inf
|
||||
# selectedNode = nothing
|
||||
|
||||
# for (childState, childNode) in node.children
|
||||
# weightedterm =
|
||||
# if node.visits == 0 || childNode.visits == 0 # node.visits == 0 makes sqrt() error
|
||||
# 0
|
||||
# else
|
||||
# w * sqrt(log(node.visits) / childNode.visits)
|
||||
# end
|
||||
# uctValue = childNode.statevalue + weightedterm
|
||||
|
||||
# if uctValue > max_uct
|
||||
# max_uct = uctValue
|
||||
# selectedNode = childNode
|
||||
# end
|
||||
# end
|
||||
|
||||
# return selectedNode
|
||||
# end
|
||||
|
||||
|
||||
""" Expand selected node
|
||||
@@ -133,7 +112,7 @@ end
|
||||
a state of a game. Can be a Dict or something else.
|
||||
- `decisionMaker::Function`
|
||||
a function that output Thought and Action
|
||||
- `progressValueEstimator::Function`
|
||||
- `evaluator::Function`
|
||||
a function that output trajectory progress score
|
||||
|
||||
# Return
|
||||
@@ -147,15 +126,13 @@ julia>
|
||||
[] update docstring
|
||||
[] try loop should limit to 3 times. if not succeed, skip
|
||||
[] newNodeKey ∉ keys(node.children). New state may have semantic vector close enought to one of existing child state. Which can be assume that they are the same state semantically-wise.
|
||||
[WORKING] store feedback -> state -> agent.
|
||||
But 1). how should i store state in agent?
|
||||
2). how should I retrieve and use feedback?
|
||||
[x] store feedback -> state -> agent.
|
||||
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function expand(a::T1, node::MCTSNode, decisionMaker::Function,
|
||||
progressValueEstimator::Function, reflector::Function; n::Integer=3) where {T1<:agent}
|
||||
evaluator::Function, reflector::Function; n::Integer=3) where {T1<:agent}
|
||||
|
||||
nthSample = 0
|
||||
while true
|
||||
@@ -168,8 +145,8 @@ function expand(a::T1, node::MCTSNode, decisionMaker::Function,
|
||||
newNodeKey, newstate, reward, isterminalstate =
|
||||
MCTStransition(a, node.state, thoughtDict)
|
||||
|
||||
# add progressValueEstimator
|
||||
stateevaluation, progressvalue = progressValueEstimator(a, newstate)
|
||||
# add evaluator
|
||||
stateevaluation, progressvalue = evaluator(a, newstate)
|
||||
|
||||
if reward < 0
|
||||
pprint(newstate[:thoughtHistory])
|
||||
@@ -221,7 +198,7 @@ julia>
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function simulate(a::T, node::MCTSNode, decisionMaker::Function, progressValueEstimator::Function,
|
||||
function simulate(a::T, node::MCTSNode, decisionMaker::Function, evaluator::Function,
|
||||
reflector::Function; maxDepth::Integer=3, n::Integer=3)::Number where {T<:agent}
|
||||
|
||||
simTrajectoryReward = 0.0
|
||||
@@ -231,7 +208,7 @@ function simulate(a::T, node::MCTSNode, decisionMaker::Function, progressValueEs
|
||||
if node.isterminal
|
||||
break
|
||||
else
|
||||
expand(a, node, decisionMaker, progressValueEstimator, reflector; n=n)
|
||||
expand(a, node, decisionMaker, evaluator, reflector; n=n)
|
||||
node = selectChildNode(node)
|
||||
end
|
||||
end
|
||||
@@ -266,7 +243,6 @@ function backpropagate(node::MCTSNode, simTrajectoryReward::T;
|
||||
simTrajectoryReward *= discountRewardCoeff # discount because future reward is uncertain
|
||||
node = node.parent
|
||||
end
|
||||
#XXX should I discount reward for fullTrajectoryReward calculation?
|
||||
end
|
||||
|
||||
|
||||
@@ -451,7 +427,7 @@ isroot(node::MCTSNode)::Bool = node.nodekey == "root" ? true : false
|
||||
initial state
|
||||
- `decisionMaker::Function`
|
||||
decide what action to take
|
||||
- `progressValueEstimator::Function`
|
||||
- `evaluator::Function`
|
||||
assess the value of the state
|
||||
- `reflector::Function`
|
||||
generate lesson from trajectory and reward
|
||||
@@ -483,7 +459,7 @@ function runMCTS(
|
||||
a::T1,
|
||||
initialState,
|
||||
decisionMaker::Function,
|
||||
progressValueEstimator::Function,
|
||||
evaluator::Function,
|
||||
reflector::Function,
|
||||
n::Integer,
|
||||
maxDepth::Integer,
|
||||
@@ -505,9 +481,9 @@ function runMCTS(
|
||||
# do nothing then go directly to backpropagation
|
||||
backpropagate(leafNode, node.reward)
|
||||
else
|
||||
expand(a, node, decisionMaker, progressValueEstimator, reflector; n=n)
|
||||
expand(a, node, decisionMaker, evaluator, reflector; n=n)
|
||||
leafNode = selectChildNode(node)
|
||||
simTrajectoryReward = simulate(a, leafNode, decisionMaker, progressValueEstimator,
|
||||
simTrajectoryReward = simulate(a, leafNode, decisionMaker, evaluator,
|
||||
reflector; maxDepth=maxDepth, n=n)
|
||||
backpropagate(leafNode, simTrajectoryReward)
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user