diff --git a/src/mcts.jl b/src/mcts.jl index ad2dd1a..24a944e 100644 --- a/src/mcts.jl +++ b/src/mcts.jl @@ -2,7 +2,7 @@ module mcts export selectBestNextNode, selectBestTrajectoryNode, backpropagate, isleaf, isroot, selectChildNode, expand, simulate, makeNewState - +using Base.Threads using GeneralUtils using ..type @@ -196,37 +196,69 @@ end # Return - None +# TODO + - [WORKING] implement multithreads + # Signature """ -function expand(node::MCTSNode,transition::Function, transitionargs::NamedTuple; +function expand(node::MCTSNode, transition::Function, transitionargs::NamedTuple; totalsample::Integer=3) - - nthSample = 0 - while true - nthSample += 1 - if nthSample <= totalsample - result = transition(node.state, transitionargs) - newNodeKey::AbstractString = result[:newNodeKey] - newstate::AbstractDict = result[:newstate] - progressvalue::Integer = result[:progressvalue] - - """ - [] newNodeKey ∉ keys(node.children). - New state may have semantic vector close enought to - one of existing child state. Which can be assume that they are the same state - semantically-wise i.e. De javu. This could be used to recall lessons for this - similar situation to improve decisionMaker and evaluator. - """ - if newNodeKey ∉ keys(node.children) - node.children[newNodeKey] = - MCTSNode(newNodeKey, newstate, 0, progressvalue, 0, newstate[:reward], - newstate[:isterminal], node, Dict{String, MCTSNode}()) + results = Any[] + @sync for i in 1:totalsample + @spawn begin + results[i] = transition(deepcopy(node.state), deepcopy(transitionargs)) end - else - break + println("--> sampling $i") + end + + for result in results + newNodeKey::AbstractString = result[:newNodeKey] + newstate::AbstractDict = result[:newstate] + progressvalue::Integer = result[:progressvalue] + + """ + [] newNodeKey ∉ keys(node.children). + New state may have semantic vector close enought to + one of existing child state. Which can be assume that they are the same state + semantically-wise i.e. De javu. This could be used to recall lessons for this + similar situation to improve decisionMaker and evaluator. + """ + if newNodeKey ∉ keys(node.children) + node.children[newNodeKey] = + MCTSNode(newNodeKey, newstate, 0, progressvalue, 0, newstate[:reward], + newstate[:isterminal], node, Dict{String, MCTSNode}()) end end end +# function expand(node::MCTSNode,transition::Function, transitionargs::NamedTuple; +# totalsample::Integer=3) + +# nthSample = 0 +# while true +# nthSample += 1 +# if nthSample <= totalsample +# result = transition(node.state, transitionargs) +# newNodeKey::AbstractString = result[:newNodeKey] +# newstate::AbstractDict = result[:newstate] +# progressvalue::Integer = result[:progressvalue] + +# """ +# [] newNodeKey ∉ keys(node.children). +# New state may have semantic vector close enought to +# one of existing child state. Which can be assume that they are the same state +# semantically-wise i.e. De javu. This could be used to recall lessons for this +# similar situation to improve decisionMaker and evaluator. +# """ +# if newNodeKey ∉ keys(node.children) +# node.children[newNodeKey] = +# MCTSNode(newNodeKey, newstate, 0, progressvalue, 0, newstate[:reward], +# newstate[:isterminal], node, Dict{String, MCTSNode}()) +# end +# else +# break +# end +# end +# end """ Simulate interactions between agent and environment