This commit is contained in:
narawat lamaiin
2024-05-12 13:30:04 +07:00
parent 898fa46af0
commit 8431258f1c
2 changed files with 19 additions and 43 deletions

View File

@@ -1,6 +1,6 @@
module interface
export addNewMessage, conversation, decisionMaker, progressValueEstimator, reflector
export addNewMessage, conversation, decisionMaker, evaluator, reflector
# isterminal,
using JSON3, DataStructures, Dates, UUIDs, HTTP, Random, MQTTClient, PrettyPrinting
@@ -264,7 +264,7 @@ julia>
# Signature
"""
function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where {T1<:agent, T2<:AbstractDict}
function evaluator(a::T1, state::T2)::Tuple{String, Integer} where {T1<:agent, T2<:AbstractDict}
_prompt =
"""
@@ -279,7 +279,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
analysis in detail. Focus on the latest thought, action, and observation. Incomplete trajectories
can be correct if the thoughts and actions so far are correct, even if the answer is not found
yet. Do not generate additional thoughts or actions. Then ending with the correctness score s
where s is an integer from 1 to 10.
where s is an integer from 0 to 10.
You should only respond in JSON format as describe below:
{"evaluation": "your evaluation", "score": "your evaluation score"}
@@ -295,7 +295,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
}
{"evaluation": "This trajectory is correct as it is reasonable to check an inventory for info provided in the question.
It is also better to have simple searches corresponding to a single entity, making this the best action.",
"score": 10
"score": 7
}
{
@@ -309,7 +309,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
}
{"evaluation": "This trajectory is incorrect as my search term should be related to a 4-colors pen with a pencil in it,
not a pen and a pencil seperately. A better search term should have been a 4-colors pen with a pencil, all-in-one.",
"score": 2
"score": 3
}
Let's begin!:
@@ -329,7 +329,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
msgMeta = GeneralUtils.generate_msgMeta(
a.config[:externalservice][:text2textinstruct][:mqtttopic],
senderName= "progressValueEstimator",
senderName= "evaluator",
senderId= a.id,
receiverName= "text2textinstruct",
mqttBroker= a.config[:mqttServerInfo][:broker],
@@ -374,7 +374,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
println("")
end
end
error("progressValueEstimator failed to generate an evaluation")
error("evaluator failed to generate an evaluation")
end
@@ -640,7 +640,7 @@ function conversation(a::T, userinput::Dict) where {T<:agent}
:question=> userinput[:text],
)
)
bestplan = runMCTS(a, initialState, decisionMaker, progressValueEstimator, reflector,
bestplan = runMCTS(a, initialState, decisionMaker, evaluator, reflector,
2, 3, 4, 1.0)
error("---> bestplan")

View File

@@ -99,27 +99,6 @@ function UCTselect(node::MCTSNode, w::T)::MCTSNode where {T<:AbstractFloat}
return selectedNode
end
# function UCTselect(node::MCTSNode, w::T)::MCTSNode where {T<:AbstractFloat}
# max_uct = -Inf
# selectedNode = nothing
# for (childState, childNode) in node.children
# weightedterm =
# if node.visits == 0 || childNode.visits == 0 # node.visits == 0 makes sqrt() error
# 0
# else
# w * sqrt(log(node.visits) / childNode.visits)
# end
# uctValue = childNode.statevalue + weightedterm
# if uctValue > max_uct
# max_uct = uctValue
# selectedNode = childNode
# end
# end
# return selectedNode
# end
""" Expand selected node
@@ -133,7 +112,7 @@ end
a state of a game. Can be a Dict or something else.
- `decisionMaker::Function`
a function that output Thought and Action
- `progressValueEstimator::Function`
- `evaluator::Function`
a function that output trajectory progress score
# Return
@@ -147,15 +126,13 @@ julia>
[] update docstring
[] try loop should limit to 3 times. if not succeed, skip
[] newNodeKey ∉ keys(node.children). New state may have semantic vector close enought to one of existing child state. Which can be assume that they are the same state semantically-wise.
[WORKING] store feedback -> state -> agent.
But 1). how should i store state in agent?
2). how should I retrieve and use feedback?
[x] store feedback -> state -> agent.
# Signature
"""
function expand(a::T1, node::MCTSNode, decisionMaker::Function,
progressValueEstimator::Function, reflector::Function; n::Integer=3) where {T1<:agent}
evaluator::Function, reflector::Function; n::Integer=3) where {T1<:agent}
nthSample = 0
while true
@@ -168,8 +145,8 @@ function expand(a::T1, node::MCTSNode, decisionMaker::Function,
newNodeKey, newstate, reward, isterminalstate =
MCTStransition(a, node.state, thoughtDict)
# add progressValueEstimator
stateevaluation, progressvalue = progressValueEstimator(a, newstate)
# add evaluator
stateevaluation, progressvalue = evaluator(a, newstate)
if reward < 0
pprint(newstate[:thoughtHistory])
@@ -221,7 +198,7 @@ julia>
# Signature
"""
function simulate(a::T, node::MCTSNode, decisionMaker::Function, progressValueEstimator::Function,
function simulate(a::T, node::MCTSNode, decisionMaker::Function, evaluator::Function,
reflector::Function; maxDepth::Integer=3, n::Integer=3)::Number where {T<:agent}
simTrajectoryReward = 0.0
@@ -231,7 +208,7 @@ function simulate(a::T, node::MCTSNode, decisionMaker::Function, progressValueEs
if node.isterminal
break
else
expand(a, node, decisionMaker, progressValueEstimator, reflector; n=n)
expand(a, node, decisionMaker, evaluator, reflector; n=n)
node = selectChildNode(node)
end
end
@@ -266,7 +243,6 @@ function backpropagate(node::MCTSNode, simTrajectoryReward::T;
simTrajectoryReward *= discountRewardCoeff # discount because future reward is uncertain
node = node.parent
end
#XXX should I discount reward for fullTrajectoryReward calculation?
end
@@ -451,7 +427,7 @@ isroot(node::MCTSNode)::Bool = node.nodekey == "root" ? true : false
initial state
- `decisionMaker::Function`
decide what action to take
- `progressValueEstimator::Function`
- `evaluator::Function`
assess the value of the state
- `reflector::Function`
generate lesson from trajectory and reward
@@ -483,7 +459,7 @@ function runMCTS(
a::T1,
initialState,
decisionMaker::Function,
progressValueEstimator::Function,
evaluator::Function,
reflector::Function,
n::Integer,
maxDepth::Integer,
@@ -505,9 +481,9 @@ function runMCTS(
# do nothing then go directly to backpropagation
backpropagate(leafNode, node.reward)
else
expand(a, node, decisionMaker, progressValueEstimator, reflector; n=n)
expand(a, node, decisionMaker, evaluator, reflector; n=n)
leafNode = selectChildNode(node)
simTrajectoryReward = simulate(a, leafNode, decisionMaker, progressValueEstimator,
simTrajectoryReward = simulate(a, leafNode, decisionMaker, evaluator,
reflector; maxDepth=maxDepth, n=n)
backpropagate(leafNode, simTrajectoryReward)
end