From 856308555f8951ad7c67d5c1e3e207adefe44212 Mon Sep 17 00:00:00 2001 From: narawat lamaiin Date: Thu, 22 Aug 2024 10:59:26 +0700 Subject: [PATCH] update --- src/mcts.jl | 113 +++++++++++++++++++++++++--------------------------- 1 file changed, 55 insertions(+), 58 deletions(-) diff --git a/src/mcts.jl b/src/mcts.jl index d03bb78..1ea2191 100644 --- a/src/mcts.jl +++ b/src/mcts.jl @@ -196,72 +196,69 @@ end # Return - None -# TODO - - [WORKING] implement multithreads - # Signature """ -function expand(node::MCTSNode, transition::Function, transitionargs::NamedTuple; - totalsample::Integer=3) - - # not use Any[] because I want to preserve result order - results = Vector{Any}(undef, totalsample) - - @sync for i in 1:totalsample - @spawn begin - result = transition(deepcopy(node.state), deepcopy(transitionargs)) - results[i] = result - end - end - - for result in results - newNodeKey::AbstractString = result[:newNodeKey] - newstate::AbstractDict = result[:newstate] - progressvalue::Integer = result[:progressvalue] - - """ - [] newNodeKey ∉ keys(node.children). - New state may have semantic vector close enought to - one of existing child state. Which can be assume that they are the same state - semantically-wise i.e. De javu. This could be used to recall lessons for this - similar situation to improve decisionMaker and evaluator. - """ - if newNodeKey ∉ keys(node.children) - node.children[newNodeKey] = - MCTSNode(newNodeKey, newstate, 0, progressvalue, 0, newstate[:reward], - newstate[:isterminal], node, Dict{String, MCTSNode}()) - end - end -end -# function expand(node::MCTSNode,transition::Function, transitionargs::NamedTuple; +# function expand(node::MCTSNode, transition::Function, transitionargs::NamedTuple; # totalsample::Integer=3) + +# # not use Any[] because I want to preserve result order +# results = Vector{Any}(undef, totalsample) -# nthSample = 0 -# while true -# nthSample += 1 -# if nthSample <= totalsample -# result = transition(node.state, transitionargs) -# newNodeKey::AbstractString = result[:newNodeKey] -# newstate::AbstractDict = result[:newstate] -# progressvalue::Integer = result[:progressvalue] - -# """ -# [] newNodeKey ∉ keys(node.children). -# New state may have semantic vector close enought to -# one of existing child state. Which can be assume that they are the same state -# semantically-wise i.e. De javu. This could be used to recall lessons for this -# similar situation to improve decisionMaker and evaluator. -# """ -# if newNodeKey ∉ keys(node.children) -# node.children[newNodeKey] = -# MCTSNode(newNodeKey, newstate, 0, progressvalue, 0, newstate[:reward], -# newstate[:isterminal], node, Dict{String, MCTSNode}()) +# @sync for i in 1:totalsample +# @spawn begin +# result = transition(deepcopy(node.state), deepcopy(transitionargs)) +# results[i] = result # end -# else -# break +# end + +# for result in results +# newNodeKey::AbstractString = result[:newNodeKey] +# newstate::AbstractDict = result[:newstate] +# progressvalue::Integer = result[:progressvalue] + +# """ +# [] newNodeKey ∉ keys(node.children). +# New state may have semantic vector close enought to +# one of existing child state. Which can be assume that they are the same state +# semantically-wise i.e. De javu. This could be used to recall lessons for this +# similar situation to improve decisionMaker and evaluator. +# """ +# if newNodeKey ∉ keys(node.children) +# node.children[newNodeKey] = +# MCTSNode(newNodeKey, newstate, 0, progressvalue, 0, newstate[:reward], +# newstate[:isterminal], node, Dict{String, MCTSNode}()) # end # end # end +function expand(node::MCTSNode,transition::Function, transitionargs::NamedTuple; + totalsample::Integer=3) + + nthSample = 0 + while true + nthSample += 1 + if nthSample <= totalsample + result = transition(node.state, transitionargs) + newNodeKey::AbstractString = result[:newNodeKey] + newstate::AbstractDict = result[:newstate] + progressvalue::Integer = result[:progressvalue] + + """ + [] newNodeKey ∉ keys(node.children). + New state may have semantic vector close enought to + one of existing child state. Which can be assume that they are the same state + semantically-wise i.e. De javu. This could be used to recall lessons for this + similar situation to improve decisionMaker and evaluator. + """ + if newNodeKey ∉ keys(node.children) + node.children[newNodeKey] = + MCTSNode(newNodeKey, newstate, 0, progressvalue, 0, newstate[:reward], + newstate[:isterminal], node, Dict{String, MCTSNode}()) + end + else + break + end + end +end """ Simulate interactions between agent and environment