update
This commit is contained in:
@@ -318,46 +318,46 @@ function reflector()
|
||||
end
|
||||
|
||||
|
||||
""" Determine whether the state is a terminal state
|
||||
# """ Determine whether the state is a terminal state
|
||||
|
||||
# Arguments
|
||||
- `state::T`
|
||||
a game state
|
||||
# # Arguments
|
||||
# - `state::T`
|
||||
# a game state
|
||||
|
||||
# Return
|
||||
- `(isterminalstate, reward)::Tuple{Bool, <:Number}`
|
||||
# # Return
|
||||
# - `(isterminalstate, reward)::Tuple{Bool, <:Number}`
|
||||
|
||||
# Example
|
||||
```jldoctest
|
||||
julia>
|
||||
```
|
||||
# # Example
|
||||
# ```jldoctest
|
||||
# julia>
|
||||
# ```
|
||||
|
||||
# TODO
|
||||
[PENDING] add Reflect()
|
||||
# # TODO
|
||||
# [PENDING] add Reflect()
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function isterminal(state::T)::Tuple{Bool, <:Number} where {T<:AbstractDict}
|
||||
latestObservationKey, _ = GeneralUtils.findHighestIndexKey(state[:thoughtHistory], "observation")
|
||||
latestObservation = state[:thoughtHistory][latestObservationKey]
|
||||
# # Signature
|
||||
# """
|
||||
# function isterminal(state::T)::Tuple{Bool, <:Number} where {T<:AbstractDict}
|
||||
# latestObservationKey, _ = GeneralUtils.findHighestIndexKey(state[:thoughtHistory], "observation")
|
||||
# latestObservation = state[:thoughtHistory][latestObservationKey]
|
||||
|
||||
if latestObservation !== nothing
|
||||
# if latestObservation !== nothing
|
||||
|
||||
# terminal condition is when the user select wine by putting <<winename>> in latest observation
|
||||
if occursin("<<", latestObservation) && occursin(">>", latestObservation)
|
||||
isterminalstate = true
|
||||
reward = 1
|
||||
else
|
||||
isterminalstate = false
|
||||
reward = 0
|
||||
end
|
||||
else
|
||||
isterminalstate = false
|
||||
reward = 0
|
||||
end
|
||||
# # terminal condition is when the user select wine by putting <<winename>> in latest observation
|
||||
# if occursin("<<", latestObservation) && occursin(">>", latestObservation)
|
||||
# isterminalstate = true
|
||||
# reward = 1
|
||||
# else
|
||||
# isterminalstate = false
|
||||
# reward = 0
|
||||
# end
|
||||
# else
|
||||
# isterminalstate = false
|
||||
# reward = 0
|
||||
# end
|
||||
|
||||
return (isterminalstate, reward)
|
||||
end
|
||||
# return (isterminalstate, reward)
|
||||
# end
|
||||
|
||||
|
||||
""" Chat with llm.
|
||||
@@ -436,7 +436,10 @@ function conversation(a::T, userinput::Dict) where {T<:agent}
|
||||
|
||||
# deepcopy the info to prevent modifying the info unintentionally during MCTS planning
|
||||
:customerinfo=> deepcopy(a.keywordinfo[:customerinfo]),
|
||||
:storeinfo=> deepcopy(a.keywordinfo[:storeinfo]),
|
||||
:storeinfo=> deepcopy(a.keywordinfo[:storeinfo]),
|
||||
:select=> nothing,
|
||||
:reward=> 0,
|
||||
:isterminal=> false,
|
||||
:thoughtHistory=> OrderedDict{Symbol, Any}( # contain question, thought_1, action_1, observation_1, thought_2, ...
|
||||
:question=> userinput[:text],
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user