update
This commit is contained in:
@@ -440,6 +440,7 @@ function conversation(a::T, userinput::Dict) where {T<:agent}
|
|||||||
:select=> nothing,
|
:select=> nothing,
|
||||||
:reward=> 0,
|
:reward=> 0,
|
||||||
:isterminal=> false,
|
:isterminal=> false,
|
||||||
|
:feedback=> nothing,
|
||||||
:thoughtHistory=> OrderedDict{Symbol, Any}( # contain question, thought_1, action_1, observation_1, thought_2, ...
|
:thoughtHistory=> OrderedDict{Symbol, Any}( # contain question, thought_1, action_1, observation_1, thought_2, ...
|
||||||
# :recap=>,
|
# :recap=>,
|
||||||
:question=> userinput[:text],
|
:question=> userinput[:text],
|
||||||
|
|||||||
@@ -126,7 +126,10 @@ julia>
|
|||||||
[] update docstring
|
[] update docstring
|
||||||
[] try loop should limit to 3 times. if not succeed, skip
|
[] try loop should limit to 3 times. if not succeed, skip
|
||||||
[] newNodeKey ∉ keys(node.children). New state may have semantic vector close enought to one of existing child state. Which can be assume that they are the same state semantically-wise.
|
[] newNodeKey ∉ keys(node.children). New state may have semantic vector close enought to one of existing child state. Which can be assume that they are the same state semantically-wise.
|
||||||
[WORKING] add reflector()
|
[WORKING] store feedback -> state -> agent.
|
||||||
|
But 1). how should i store state in agent?
|
||||||
|
2). how should I retrieve and use feedback?
|
||||||
|
|
||||||
|
|
||||||
# Signature
|
# Signature
|
||||||
"""
|
"""
|
||||||
@@ -144,6 +147,10 @@ function expand(a::T1, node::MCTSNode, decisionMaker::Function,
|
|||||||
# add progressValueEstimator
|
# add progressValueEstimator
|
||||||
stateevaluation, statevalue = progressValueEstimator(a, newstate)
|
stateevaluation, statevalue = progressValueEstimator(a, newstate)
|
||||||
|
|
||||||
|
if reward < 0
|
||||||
|
newstate.feedback = stateevaluation
|
||||||
|
end
|
||||||
|
|
||||||
if newNodeKey ∉ keys(node.children)
|
if newNodeKey ∉ keys(node.children)
|
||||||
node.children[newNodeKey] = MCTSNode(newNodeKey, newstate, 0, stateevaluation, statevalue,
|
node.children[newNodeKey] = MCTSNode(newNodeKey, newstate, 0, stateevaluation, statevalue,
|
||||||
reward, isterminalstate, node, Dict{String, MCTSNode}())
|
reward, isterminalstate, node, Dict{String, MCTSNode}())
|
||||||
|
|||||||
Reference in New Issue
Block a user