From 68c0e7631a747d4b27f704f199c6506943f2d264 Mon Sep 17 00:00:00 2001 From: narawat lamaiin Date: Tue, 7 May 2024 21:24:51 +0700 Subject: [PATCH] update --- src/interface.jl | 1 + src/mcts.jl | 9 ++++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/interface.jl b/src/interface.jl index 82a1eae..554c0b2 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -440,6 +440,7 @@ function conversation(a::T, userinput::Dict) where {T<:agent} :select=> nothing, :reward=> 0, :isterminal=> false, + :feedback=> nothing, :thoughtHistory=> OrderedDict{Symbol, Any}( # contain question, thought_1, action_1, observation_1, thought_2, ... # :recap=>, :question=> userinput[:text], diff --git a/src/mcts.jl b/src/mcts.jl index bec27b0..cb9ef9b 100644 --- a/src/mcts.jl +++ b/src/mcts.jl @@ -126,7 +126,10 @@ julia> [] update docstring [] try loop should limit to 3 times. if not succeed, skip [] newNodeKey ∉ keys(node.children). New state may have semantic vector close enought to one of existing child state. Which can be assume that they are the same state semantically-wise. - [WORKING] add reflector() + [WORKING] store feedback -> state -> agent. + But 1). how should i store state in agent? + 2). how should I retrieve and use feedback? + # Signature """ @@ -144,6 +147,10 @@ function expand(a::T1, node::MCTSNode, decisionMaker::Function, # add progressValueEstimator stateevaluation, statevalue = progressValueEstimator(a, newstate) + if reward < 0 + newstate.feedback = stateevaluation + end + if newNodeKey ∉ keys(node.children) node.children[newNodeKey] = MCTSNode(newNodeKey, newstate, 0, stateevaluation, statevalue, reward, isterminalstate, node, Dict{String, MCTSNode}())