YiemAgent/src/mcts.jl

""" To implement a Monte Carlo Tree Search (MCTS) algorithm in Julia with the UCT (Upper Confidence
  Bound for Trees) selection function, you can follow the steps below: Define the necessary types
  and functions for the MCTS algorithm:
"""

module mcts

export MCTSNode, runMCTS

using Dates, UUIDs, DataStructures, JSON3, Random
using GeneralUtils
using ..type

# ---------------------------------------------- 100 --------------------------------------------- #

""" a node for MCTS search tree

  Arguments\n
  -----
    state::T
      a state of a game. Can be a Dict or something else.
      For example:
      state = Dict(
        :info=> Dict(),           # keyword info
        :thoughtHistory=> Dict(
          :question=> _,
          :thought_1=> _,
          :action_1=> _,
          :observation_1=> _,
          :thought_2=> _,
          ...
        )
      )
    visits::Integer
      number of time the game visits this state
    stateValue::Float64
      state value

  Return\n
  -----

  Example\n
  -----
  ```jldoctest
  julia>
  ```

  TODO\n
  -----
    [] update docstring
    [DONE] implement the function

  Signature\n
  -----
"""
struct MCTSNode{T<:AbstractDict}
  state::T
  visits::Integer
  stateValue::AbstractFloat
  children::Dict{T, MCTSNode}
end

""" Select a node based on UCT score

  Arguments\n
  -----
    node::MCTSNode
      mcts node
    w::Float64
      exploration weight
  Return\n
  -----

  Example\n
  -----
  ```jldoctest
  julia>
  ```

  TODO\n
  -----
    [] update docstring
    [DONE] check childNode.total_reward w/ LATS paper. Which value total_reward representing

  Signature\n
  -----
"""
function select(node::MCTSNode, w::Float64)
    max_uct = -Inf
    selectedNode = nothing

    for (childState, childNode) in node.children
        uctValue = childNode.stateValue +
                    w * sqrt(log(node.visits) / childNode.visits)
        if uctValue > max_uct
            max_uct = uctValue
            selectedNode = childNode
        end
    end

    return selectedNode
end

""" Expand selected node

  Arguments\n
  -----
    node::MCTSNode
      MCTS node
    state::T
      a state of a game. Can be a Dict or something else.
    decisionMaker::Function


  Return\n
  -----

  Example\n
  -----
  ```jldoctest
  julia>
  ```

  TODO\n
  -----
    [] update docstring
    [x] implement the function

  Signature\n
  -----
"""
function expand(a::T1, node::MCTSNode, state::T2, decisionMaker::Function, stateValueEstimator::Function;
                n::Integer=3) where {T1<:agent, T2<:AbstractDict}

  # sampling action from decisionMaker
  for sample in 1:n
    thought = decisionMaker(a, state)
    error("--> expand")
    newState = transition(node.state, action)  #[] Implement your transition function
    if newState ∉ keys(node.children)
        node.children[newState] = MCTSNode(newState, 0, 0.0, Dict{T, MCTSNode}())
    end
  end
end

"""

  Arguments\n
  -----

  Return\n
  -----

  Example\n
  -----
  ```jldoctest
  julia>
  ```

  TODO\n
  -----
    [] update docstring
    [] implement the function
    [] reward only comes at terminal state

  Signature\n
  -----
"""
function simulate(state::T, max_depth::Int) where {T<:AbstractDict}
    total_reward = 0.0
    for _ in 1:max_depth
      #[] Implement your action selection function based on highest stateValue
      action = select_action(state) # current state
      state, reward = transition(state, action)  # Implement transition function to a new state

      #[] check for the terminal state

      total_reward += reward
    end
    return total_reward
end

"""

  Arguments\n
  -----

  Return\n
  -----

  Example\n
  -----
  ```jldoctest
  julia>
  ```

  TODO\n
  -----
    [] update docstring
    [] implement the function

  Signature\n
  -----
"""
function backpropagate(node::MCTSNode, reward::Float64)
    node.visits += 1

    # [] there is no total_reward in the paper, buy they use stateValue
    node.total_reward += reward
    if !isempty(node.children)
        best_child = argmax([child.total_reward / child.visits for child in values(node.children)])
        backpropagate(node.children[best_child], -reward)
    end
end

"""

  Arguments\n
  -----

  Return\n
  -----

  Example\n
  -----
  ```jldoctest
  julia>
  ```

  TODO\n
  -----
    [] update docstring
    [] implement the function

  Signature\n
  -----
"""
function transition(state, action)

end

""" Check whether a node is a leaf node of a tree

  Arguments\n
  -----
    node::MCTSNode
      node of a tree

  Return\n
  -----
    result::Bool
      true if the node is a leaf node of a tree otherwise false

  Example\n
  -----
  ```jldoctest
  julia> using
  ```

  TODO\n
  -----
    [] update docstring

  Signature\n
  -----
"""
isLeaf(node::MCTSNode)::Bool = isempty(node.children)


"""

  Arguments\n
  -----

  Return\n
  -----

  Example\n
  -----
  ```jldoctest
  julia>
  ```

  TODO\n
  -----
    [] update docstring
    [WORKING] implement the function

  Signature\n
  -----
"""
function iterativeprompting(a::T, prompt::String, verification::Function) where {T<:agent}

  msgMeta = GeneralUtils.generate_msgMeta(
    a.config[:thirdPartyService][:text2textinstruct],
    senderName= "iterativeprompting",
    senderId= a.id,
    receiverName= "text2textinstruct",
  )

  outgoingMsg = Dict(
    :msgMeta,
    :payload=> Dict(
      :text=> prompt,
    )
  )

  # iteration loop
  while true
    # send prompt to LLM
    response = GeneralUtils.sendReceiveMqttMsg()

    # check for correctness and get feedback

    if correct

      break
    else
      # get LLM critique

      # add critique to prompt
    end


  end
  return
end


""" Check JSON format correctness, provide

  Arguments\n
  -----

  Return\n
  -----

  Example\n
  -----
  ```jldoctest
  julia>
  ```

  TODO\n
  -----
    [] update docstring
    [WORKING] implement the function

  Signature\n
  -----
"""
function syntaxcheck_json(jsonstring::String)::NamedTuple
  success, result, errormsg, st = GeneralUtils.showstracktrace(JSON3.read, jsonstring)
  if !success # gives feedback


  else


  end


  return (success, result, critique)
end


"""

  Arguments\n
  -----

  Return\n
  -----

  Example\n
  -----
  ```jldoctest
  julia>
  ```

  TODO\n
  -----
    [] update docstring
    [] implement the function
    [] implement RAG to pull similar experience

  Signature\n
  -----
"""
function isTerminal()

end

"""

  Arguments\n
  -----

  Return\n
  -----

  Example\n
  -----
  ```jldoctest
  julia>
  ```

  TODO\n
  -----
    [] update docstring
    [] implement the function

  Signature\n
  -----
"""
function executeLLMFunction()

end


# ------------------------------------------------------------------------------------------------ #
#                    Create a complete example using the defined MCTS functions                    #
# ------------------------------------------------------------------------------------------------ #
""" Search for best action

  Arguments\n
  -----
    initial state
      initial state
    decisionMaker::Function
      decide what action to take
    stateValueEstimator::Function
      assess the value of the state
    reflector::Function
      generate lesson from trajectory and reward
    n::Integer
      how many times action will be sampled from decisionMaker
    w::Float64
      exploration weight

  Return\n
  -----
    plan::Vector{Dict}
      best plan

  Example\n
  -----
  ```jldoctest
  julia>
  ```

  TODO\n
  -----
    [] update docstring

  Signature\n
  -----
"""
function runMCTS(a::T, initialState, decisionMaker::Function, stateValueEstimator::Function,
                reflector::Function, n::Integer, maxDepth::Integer,
                maxIterations::Integer, w::Float64) where {T<:agent}
  statetype = typeof(initialState)
  root = MCTSNode(initialState, 0, 0.0, Dict{statetype, MCTSNode}())

  for _ in 1:maxIterations
    node = root
    while !isLeaf(node)
      node = select(node, w)
    end

    expand(a, node, node.state, decisionMaker, stateValueEstimator, n=n)
    error("---> runMCTS")
    leaf_node = node.children[node.state] # mark leaf node
    reward = simulate(leaf_node.state, maxDepth)
    backpropagate(leaf_node, reward)
  end

  best_child_state = argmax([child.total_reward / child.visits for child in values(root.children)])
  return best_child_state
end


end