This commit is contained in:
narawat lamaiin
2024-05-07 06:30:24 +07:00
parent 8cc5606ae8
commit 43e7ba3991
4 changed files with 59 additions and 44 deletions

View File

@@ -318,46 +318,46 @@ function reflector()
end
""" Determine whether the state is a terminal state
# """ Determine whether the state is a terminal state
# Arguments
- `state::T`
a game state
# # Arguments
# - `state::T`
# a game state
# Return
- `(isterminalstate, reward)::Tuple{Bool, <:Number}`
# # Return
# - `(isterminalstate, reward)::Tuple{Bool, <:Number}`
# Example
```jldoctest
julia>
```
# # Example
# ```jldoctest
# julia>
# ```
# TODO
[PENDING] add Reflect()
# # TODO
# [PENDING] add Reflect()
# Signature
"""
function isterminal(state::T)::Tuple{Bool, <:Number} where {T<:AbstractDict}
latestObservationKey, _ = GeneralUtils.findHighestIndexKey(state[:thoughtHistory], "observation")
latestObservation = state[:thoughtHistory][latestObservationKey]
# # Signature
# """
# function isterminal(state::T)::Tuple{Bool, <:Number} where {T<:AbstractDict}
# latestObservationKey, _ = GeneralUtils.findHighestIndexKey(state[:thoughtHistory], "observation")
# latestObservation = state[:thoughtHistory][latestObservationKey]
if latestObservation !== nothing
# if latestObservation !== nothing
# terminal condition is when the user select wine by putting <<winename>> in latest observation
if occursin("<<", latestObservation) && occursin(">>", latestObservation)
isterminalstate = true
reward = 1
else
isterminalstate = false
reward = 0
end
else
isterminalstate = false
reward = 0
end
# # terminal condition is when the user select wine by putting <<winename>> in latest observation
# if occursin("<<", latestObservation) && occursin(">>", latestObservation)
# isterminalstate = true
# reward = 1
# else
# isterminalstate = false
# reward = 0
# end
# else
# isterminalstate = false
# reward = 0
# end
return (isterminalstate, reward)
end
# return (isterminalstate, reward)
# end
""" Chat with llm.
@@ -436,7 +436,10 @@ function conversation(a::T, userinput::Dict) where {T<:agent}
# deepcopy the info to prevent modifying the info unintentionally during MCTS planning
:customerinfo=> deepcopy(a.keywordinfo[:customerinfo]),
:storeinfo=> deepcopy(a.keywordinfo[:storeinfo]),
:storeinfo=> deepcopy(a.keywordinfo[:storeinfo]),
:select=> nothing,
:reward=> 0,
:isterminal=> false,
:thoughtHistory=> OrderedDict{Symbol, Any}( # contain question, thought_1, action_1, observation_1, thought_2, ...
:question=> userinput[:text],
)