update
This commit is contained in:
@@ -108,9 +108,12 @@ function conversation(a::T, userinput::Dict) where {T<:agent}
|
||||
|
||||
|
||||
else #[WORKING] new thinking
|
||||
|
||||
|
||||
initialState = 0
|
||||
initialState = Dict(
|
||||
:info=> Dict(), # keyword info
|
||||
:thought=> nothing,
|
||||
:action=> nothing,
|
||||
:observation=> nothing,
|
||||
)
|
||||
bestplan = runMCTS(initialState, decisionMaker, stateValueEstimator, reflector,
|
||||
3, 10, 1000, 1.0)
|
||||
error("---> bestplan")
|
||||
|
||||
93
src/mcts.jl
93
src/mcts.jl
@@ -5,17 +5,22 @@
|
||||
|
||||
module mcts
|
||||
|
||||
export runMCTS
|
||||
export MCTSNode, runMCTS, decisionMaker, stateValueEstimator, reflector
|
||||
|
||||
using Dates, UUIDs, DataStructures, JSON3, Random
|
||||
using GeneralUtils
|
||||
|
||||
# ---------------------------------------------- 100 --------------------------------------------- #
|
||||
|
||||
"""
|
||||
""" a node for MCTS search tree
|
||||
|
||||
Arguments\n
|
||||
-----
|
||||
state::T
|
||||
Represent a state of a game. Can be a Dict or something else.
|
||||
visits::Integer
|
||||
number of time the game visits this state
|
||||
stateValue::Float64
|
||||
|
||||
Return\n
|
||||
-----
|
||||
@@ -29,15 +34,15 @@ using GeneralUtils
|
||||
TODO\n
|
||||
-----
|
||||
[] update docstring
|
||||
[] implement the function
|
||||
[DONE] implement the function
|
||||
|
||||
Signature\n
|
||||
-----
|
||||
"""
|
||||
struct MCTSNode{T}
|
||||
state::T
|
||||
visits::Int
|
||||
stateValue::Float64
|
||||
visits::Integer
|
||||
stateValue::AbstractFloat
|
||||
children::Dict{T, MCTSNode}
|
||||
end
|
||||
|
||||
@@ -107,20 +112,9 @@ end
|
||||
function expand(node::MCTSNode, state::T, decisionMaker::Function, stateValueEstimator::Function;
|
||||
n::Integer=3) where {T<:Any}
|
||||
|
||||
actions = []
|
||||
|
||||
# sampling action from decisionMaker
|
||||
# for nth in 1:n
|
||||
|
||||
|
||||
# end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
for action in actions
|
||||
newState = transition(node.state, action) # Implement your transition function
|
||||
for sample in 1:n
|
||||
newState = transition(node.state, action) #[] Implement your transition function
|
||||
if newState ∉ keys(node.children)
|
||||
node.children[newState] = MCTSNode(newState, 0, 0.0, Dict{T, MCTSNode}())
|
||||
end
|
||||
@@ -145,6 +139,7 @@ end
|
||||
-----
|
||||
[] update docstring
|
||||
[] implement the function
|
||||
[] reward only comes at terminal state
|
||||
|
||||
Signature\n
|
||||
-----
|
||||
@@ -152,8 +147,12 @@ end
|
||||
function simulate(state::T, max_depth::Int) where {T<:Any}
|
||||
total_reward = 0.0
|
||||
for _ in 1:max_depth
|
||||
action = select_action(state) # Implement your action selection function
|
||||
state, reward = transition(state, action) # Implement your transition function
|
||||
#[] Implement your action selection function based on highest stateValue
|
||||
action = select_action(state) # current state
|
||||
state, reward = transition(state, action) # Implement transition function to a new state
|
||||
|
||||
#[] check for the terminal state
|
||||
|
||||
total_reward += reward
|
||||
end
|
||||
return total_reward
|
||||
@@ -183,6 +182,8 @@ end
|
||||
"""
|
||||
function backpropagate(node::MCTSNode, reward::Float64)
|
||||
node.visits += 1
|
||||
|
||||
# [] there is no total_reward in the paper, buy they use stateValue
|
||||
node.total_reward += reward
|
||||
if !isempty(node.children)
|
||||
best_child = argmax([child.total_reward / child.visits for child in values(node.children)])
|
||||
@@ -216,25 +217,27 @@ function transition(state, action)
|
||||
|
||||
end
|
||||
|
||||
""" Check whether a node is a leaf node
|
||||
""" Check whether a node is a leaf node of a tree
|
||||
|
||||
Arguments\n
|
||||
-----
|
||||
node::MCTSNode
|
||||
node of a tree
|
||||
|
||||
Return\n
|
||||
-----
|
||||
a task represent an agent
|
||||
result::Bool
|
||||
true if the node is a leaf node of a tree otherwise false
|
||||
|
||||
Example\n
|
||||
-----
|
||||
```jldoctest
|
||||
julia>
|
||||
julia> using
|
||||
```
|
||||
|
||||
TODO\n
|
||||
-----
|
||||
[] update docstring
|
||||
[DONE] implement isLeaf()
|
||||
|
||||
Signature\n
|
||||
-----
|
||||
@@ -320,6 +323,34 @@ function reflector()
|
||||
|
||||
end
|
||||
|
||||
"""
|
||||
|
||||
Arguments\n
|
||||
-----
|
||||
|
||||
Return\n
|
||||
-----
|
||||
|
||||
Example\n
|
||||
-----
|
||||
```jldoctest
|
||||
julia>
|
||||
```
|
||||
|
||||
TODO\n
|
||||
-----
|
||||
[] update docstring
|
||||
[] implement the function
|
||||
[] implement RAG to pull similar experience
|
||||
|
||||
Signature\n
|
||||
-----
|
||||
"""
|
||||
function isTerminal()
|
||||
|
||||
end
|
||||
|
||||
|
||||
# ------------------------------------------------------------------------------------------------ #
|
||||
# Create a complete example using the defined MCTS functions #
|
||||
# ------------------------------------------------------------------------------------------------ #
|
||||
@@ -342,6 +373,8 @@ end
|
||||
|
||||
Return\n
|
||||
-----
|
||||
plan::Vector{Dict}
|
||||
best plan
|
||||
|
||||
Example\n
|
||||
-----
|
||||
@@ -357,20 +390,20 @@ end
|
||||
-----
|
||||
"""
|
||||
function runMCTS(initialState, decisionMaker::Function, stateValueEstimator::Function,
|
||||
reflector::Function, totalActionSampled::Integer, maxDepth::Integer,
|
||||
reflector::Function, n::Integer, maxDepth::Integer,
|
||||
maxIterations::Integer, w::Float64)
|
||||
root = MCTSNode(initialState, 0, 0.0, Dict())
|
||||
|
||||
statetype = typeof(initialState)
|
||||
root = MCTSNode(initialState, 0, 0.0, Dict{statetype, MCTSNode}())
|
||||
error("---> runMCTS")
|
||||
for _ in 1:maxIterations
|
||||
node = root
|
||||
while !isLeaf(node)
|
||||
node = select(node, w)
|
||||
end
|
||||
|
||||
expand(node, node.state, decisionMaker, stateValueEstimator,
|
||||
n=n)
|
||||
expand(node, node.state, decisionMaker, stateValueEstimator, n=n)
|
||||
|
||||
leaf_node = node.children[node.state]
|
||||
leaf_node = node.children[node.state] # mark leaf node
|
||||
reward = simulate(leaf_node.state, maxDepth)
|
||||
backpropagate(leaf_node, reward)
|
||||
end
|
||||
|
||||
@@ -63,7 +63,7 @@ tools=Dict( # update input format
|
||||
tools=tools,
|
||||
)
|
||||
|
||||
response = YiemAgent.conversation(a, Dict(:text=> "newtopic", ) )
|
||||
response = YiemAgent.conversation(a, Dict(:text=> "hello", ) )
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user