update
This commit is contained in:
@@ -440,6 +440,7 @@ function conversation(a::T, userinput::Dict) where {T<:agent}
|
||||
:select=> nothing,
|
||||
:reward=> 0,
|
||||
:isterminal=> false,
|
||||
:feedback=> nothing,
|
||||
:thoughtHistory=> OrderedDict{Symbol, Any}( # contain question, thought_1, action_1, observation_1, thought_2, ...
|
||||
# :recap=>,
|
||||
:question=> userinput[:text],
|
||||
|
||||
@@ -126,7 +126,10 @@ julia>
|
||||
[] update docstring
|
||||
[] try loop should limit to 3 times. if not succeed, skip
|
||||
[] newNodeKey ∉ keys(node.children). New state may have semantic vector close enought to one of existing child state. Which can be assume that they are the same state semantically-wise.
|
||||
[WORKING] add reflector()
|
||||
[WORKING] store feedback -> state -> agent.
|
||||
But 1). how should i store state in agent?
|
||||
2). how should I retrieve and use feedback?
|
||||
|
||||
|
||||
# Signature
|
||||
"""
|
||||
@@ -144,6 +147,10 @@ function expand(a::T1, node::MCTSNode, decisionMaker::Function,
|
||||
# add progressValueEstimator
|
||||
stateevaluation, statevalue = progressValueEstimator(a, newstate)
|
||||
|
||||
if reward < 0
|
||||
newstate.feedback = stateevaluation
|
||||
end
|
||||
|
||||
if newNodeKey ∉ keys(node.children)
|
||||
node.children[newNodeKey] = MCTSNode(newNodeKey, newstate, 0, stateevaluation, statevalue,
|
||||
reward, isterminalstate, node, Dict{String, MCTSNode}())
|
||||
|
||||
Reference in New Issue
Block a user