diff --git a/src/interface.jl b/src/interface.jl index cebd781..b280b8c 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -1,6 +1,6 @@ module interface -export addNewMessage, conversation, decisionMaker, progressValueEstimator, reflector +export addNewMessage, conversation, decisionMaker, evaluator, reflector # isterminal, using JSON3, DataStructures, Dates, UUIDs, HTTP, Random, MQTTClient, PrettyPrinting @@ -264,7 +264,7 @@ julia> # Signature """ -function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where {T1<:agent, T2<:AbstractDict} +function evaluator(a::T1, state::T2)::Tuple{String, Integer} where {T1<:agent, T2<:AbstractDict} _prompt = """ @@ -279,7 +279,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where analysis in detail. Focus on the latest thought, action, and observation. Incomplete trajectories can be correct if the thoughts and actions so far are correct, even if the answer is not found yet. Do not generate additional thoughts or actions. Then ending with the correctness score s - where s is an integer from 1 to 10. + where s is an integer from 0 to 10. You should only respond in JSON format as describe below: {"evaluation": "your evaluation", "score": "your evaluation score"} @@ -295,7 +295,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where } {"evaluation": "This trajectory is correct as it is reasonable to check an inventory for info provided in the question. It is also better to have simple searches corresponding to a single entity, making this the best action.", - "score": 10 + "score": 7 } { @@ -309,7 +309,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where } {"evaluation": "This trajectory is incorrect as my search term should be related to a 4-colors pen with a pencil in it, not a pen and a pencil seperately. A better search term should have been a 4-colors pen with a pencil, all-in-one.", - "score": 2 + "score": 3 } Let's begin!: @@ -329,7 +329,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where msgMeta = GeneralUtils.generate_msgMeta( a.config[:externalservice][:text2textinstruct][:mqtttopic], - senderName= "progressValueEstimator", + senderName= "evaluator", senderId= a.id, receiverName= "text2textinstruct", mqttBroker= a.config[:mqttServerInfo][:broker], @@ -374,7 +374,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where println("") end end - error("progressValueEstimator failed to generate an evaluation") + error("evaluator failed to generate an evaluation") end @@ -640,7 +640,7 @@ function conversation(a::T, userinput::Dict) where {T<:agent} :question=> userinput[:text], ) ) - bestplan = runMCTS(a, initialState, decisionMaker, progressValueEstimator, reflector, + bestplan = runMCTS(a, initialState, decisionMaker, evaluator, reflector, 2, 3, 4, 1.0) error("---> bestplan") diff --git a/src/mcts.jl b/src/mcts.jl index d3edab1..30d120f 100644 --- a/src/mcts.jl +++ b/src/mcts.jl @@ -99,27 +99,6 @@ function UCTselect(node::MCTSNode, w::T)::MCTSNode where {T<:AbstractFloat} return selectedNode end -# function UCTselect(node::MCTSNode, w::T)::MCTSNode where {T<:AbstractFloat} -# max_uct = -Inf -# selectedNode = nothing - -# for (childState, childNode) in node.children -# weightedterm = -# if node.visits == 0 || childNode.visits == 0 # node.visits == 0 makes sqrt() error -# 0 -# else -# w * sqrt(log(node.visits) / childNode.visits) -# end -# uctValue = childNode.statevalue + weightedterm - -# if uctValue > max_uct -# max_uct = uctValue -# selectedNode = childNode -# end -# end - -# return selectedNode -# end """ Expand selected node @@ -133,7 +112,7 @@ end a state of a game. Can be a Dict or something else. - `decisionMaker::Function` a function that output Thought and Action - - `progressValueEstimator::Function` + - `evaluator::Function` a function that output trajectory progress score # Return @@ -147,15 +126,13 @@ julia> [] update docstring [] try loop should limit to 3 times. if not succeed, skip [] newNodeKey ∉ keys(node.children). New state may have semantic vector close enought to one of existing child state. Which can be assume that they are the same state semantically-wise. - [WORKING] store feedback -> state -> agent. - But 1). how should i store state in agent? - 2). how should I retrieve and use feedback? + [x] store feedback -> state -> agent. # Signature """ function expand(a::T1, node::MCTSNode, decisionMaker::Function, - progressValueEstimator::Function, reflector::Function; n::Integer=3) where {T1<:agent} + evaluator::Function, reflector::Function; n::Integer=3) where {T1<:agent} nthSample = 0 while true @@ -168,8 +145,8 @@ function expand(a::T1, node::MCTSNode, decisionMaker::Function, newNodeKey, newstate, reward, isterminalstate = MCTStransition(a, node.state, thoughtDict) - # add progressValueEstimator - stateevaluation, progressvalue = progressValueEstimator(a, newstate) + # add evaluator + stateevaluation, progressvalue = evaluator(a, newstate) if reward < 0 pprint(newstate[:thoughtHistory]) @@ -221,7 +198,7 @@ julia> # Signature """ -function simulate(a::T, node::MCTSNode, decisionMaker::Function, progressValueEstimator::Function, +function simulate(a::T, node::MCTSNode, decisionMaker::Function, evaluator::Function, reflector::Function; maxDepth::Integer=3, n::Integer=3)::Number where {T<:agent} simTrajectoryReward = 0.0 @@ -231,7 +208,7 @@ function simulate(a::T, node::MCTSNode, decisionMaker::Function, progressValueEs if node.isterminal break else - expand(a, node, decisionMaker, progressValueEstimator, reflector; n=n) + expand(a, node, decisionMaker, evaluator, reflector; n=n) node = selectChildNode(node) end end @@ -266,7 +243,6 @@ function backpropagate(node::MCTSNode, simTrajectoryReward::T; simTrajectoryReward *= discountRewardCoeff # discount because future reward is uncertain node = node.parent end - #XXX should I discount reward for fullTrajectoryReward calculation? end @@ -451,7 +427,7 @@ isroot(node::MCTSNode)::Bool = node.nodekey == "root" ? true : false initial state - `decisionMaker::Function` decide what action to take - - `progressValueEstimator::Function` + - `evaluator::Function` assess the value of the state - `reflector::Function` generate lesson from trajectory and reward @@ -483,7 +459,7 @@ function runMCTS( a::T1, initialState, decisionMaker::Function, - progressValueEstimator::Function, + evaluator::Function, reflector::Function, n::Integer, maxDepth::Integer, @@ -505,9 +481,9 @@ function runMCTS( # do nothing then go directly to backpropagation backpropagate(leafNode, node.reward) else - expand(a, node, decisionMaker, progressValueEstimator, reflector; n=n) + expand(a, node, decisionMaker, evaluator, reflector; n=n) leafNode = selectChildNode(node) - simTrajectoryReward = simulate(a, leafNode, decisionMaker, progressValueEstimator, + simTrajectoryReward = simulate(a, leafNode, decisionMaker, evaluator, reflector; maxDepth=maxDepth, n=n) backpropagate(leafNode, simTrajectoryReward) end