update
This commit is contained in:
404
src/interface.jl
404
src/interface.jl
@@ -339,6 +339,286 @@ julia>
|
|||||||
|
|
||||||
# Signature
|
# Signature
|
||||||
"""
|
"""
|
||||||
|
# function evaluator(state::T1, text2textInstructLLM::Function;
|
||||||
|
# insertSQLVectorDB::Union{Function, Nothing}=nothing
|
||||||
|
# ) where {T1<:AbstractDict}
|
||||||
|
|
||||||
|
# # systemmsg =
|
||||||
|
# # """
|
||||||
|
# # You are a helpful assistant that analyzes agent's trajectories to find solutions and observations (i.e., the results of actions) to answer the user's questions.
|
||||||
|
|
||||||
|
# # Definitions:
|
||||||
|
# # "question" is the user's question.
|
||||||
|
# # "thought" is step-by-step reasoning about the current situation.
|
||||||
|
# # "plan" is what to do to complete the task from the current situation.
|
||||||
|
# # "action" is the taken action which can be one of the following functions:
|
||||||
|
# # 1) TABLEINFO[list_of_table_name], which you can use to get the data type of a table column.
|
||||||
|
# # 2) GETDATA[instruction], which you can use to get the data from the database.
|
||||||
|
# # 3) ANSWERBOX[answer], which returns your answer to the user. "answer" is your answer to the user question.
|
||||||
|
# # "observation" is result of the action in JSON format.
|
||||||
|
|
||||||
|
# # At each round of conversation, the user will give you:
|
||||||
|
# # Context: ...
|
||||||
|
# # Trajectories: ...
|
||||||
|
|
||||||
|
# # You should then respond to the user with:
|
||||||
|
# # - Original_question: Repeat the original question.
|
||||||
|
# # - Evaluation (you must evaluate all of the following points):
|
||||||
|
# # 1) Analyze the trajectories of a solution to answer the user's original question.
|
||||||
|
# # Given a question and a trajectory, evaluate its correctness and provide your reasoning and
|
||||||
|
# # analysis in detail. Focus on the latest thought, action, and observation.
|
||||||
|
# # Incomplete trajectories can be correct if the thoughts and actions so far are correct,
|
||||||
|
# # even if the answer is not found yet. Do not generate additional thoughts or actions.
|
||||||
|
# # 2) How the observation addresses the original question?
|
||||||
|
# # 3) Provide suggestion (if applicable).
|
||||||
|
# # - Score: Correctness score s where s is an integer from 0 to 10.
|
||||||
|
# # - Accepted_as_answer: Decide whether to accept the observation as the answer to the original question.
|
||||||
|
# # 1) The accepted observation should directly answer the question.
|
||||||
|
# # 2) The possible responses are either 'Yes' or 'No.'
|
||||||
|
|
||||||
|
# # You should only respond in JSON format as described below:
|
||||||
|
# # {"original_question": ..., "evaluation": ..., "score": ..., "accepted_as_answer": ...}
|
||||||
|
|
||||||
|
# # Here are correct trajectory examples:
|
||||||
|
# # user:
|
||||||
|
# # {
|
||||||
|
# # "question": "I'm looking for a sedan with an automatic driving feature.",
|
||||||
|
# # "thought_1": "I have many types of sedans in my inventory, each with diverse features.",
|
||||||
|
# # "thought_2": "I should check our inventory first to see if we have the one our customer wants.",
|
||||||
|
# # "action_1": {"name": "inventory", "input": "a sedan with an automatic driving feature"},
|
||||||
|
# # "observation_1": "Yiem Model A, Conez Model B"
|
||||||
|
# # }
|
||||||
|
# # assistant:
|
||||||
|
# # {
|
||||||
|
# # "original_question": "the user is looking for a sedan with an automatic driving feature.",
|
||||||
|
# # "evaluation": "This trajectory is correct because it is logical to use the INVENTORY function to search for inventory based on the details provided in the question, which could lead to a potential answer. The user is asking whether do you have a sedan with an automatic driving feature and the observation provides a list of sedan models that you have. Thus, it is accepted as the answer.",
|
||||||
|
# # "score": 10,
|
||||||
|
# # "accepted_as_answer": "Yes"
|
||||||
|
# # }
|
||||||
|
|
||||||
|
# # user:
|
||||||
|
# # {
|
||||||
|
# # "question": "How many cars that fitted with a stereo we have?",
|
||||||
|
# # "thought_1": "I have many types of car in my inventory, each with diverse features.",
|
||||||
|
# # "thought_3": "I should check our inventory.",
|
||||||
|
# # "action_1": {"name": "inventory", "input": "vehicle with a stereo"},
|
||||||
|
# # "observation_1": "2015 Conez truck."
|
||||||
|
# # }
|
||||||
|
# # assistant:
|
||||||
|
# # {
|
||||||
|
# # "evaluation": “This approach is correct. It's reasonable to use the INVENTORY function to search for inventory. However, the query asked for a car but the observation was a truck. Thus it is not accepted as the answer. To improve, make sure to input the correct terms and match the requested criteria accurately.”,
|
||||||
|
# # "score": 5,
|
||||||
|
# # "accepted_as_answer": "No"
|
||||||
|
# # }
|
||||||
|
|
||||||
|
# # Here are incorrect trajectory examples:
|
||||||
|
# # user:
|
||||||
|
# # {
|
||||||
|
# # "question": "I'm looking for a sedan with an automatic driving feature. Do you have it in stock?",
|
||||||
|
# # "thought_1": "I have many types of sedans in my inventory, each with diverse features.",
|
||||||
|
# # "thought_2": "I will use SEARCHINTERNET function to search for the car.",
|
||||||
|
# # "action_1": {"name": "SEARCHINTERNET", "input": "a sedan with an automatic driving feature.},
|
||||||
|
# # "observation_1": "Teza Model A, Teza Model B"
|
||||||
|
# # }
|
||||||
|
# # assistant:
|
||||||
|
# # {
|
||||||
|
# # "evaluation": "This trajectory is incorrect. Using the SEARCHINTERNET function to search for a sedan in the Internet is illogical because the question asked for the cars available for sale at your dealership. To improve, ensure that you read the question clearly.",
|
||||||
|
# # "score": 0,
|
||||||
|
# # "accepted_as_answer": "No"
|
||||||
|
# # }
|
||||||
|
|
||||||
|
# # Let's begin!
|
||||||
|
# # """
|
||||||
|
|
||||||
|
# # systemmsg =
|
||||||
|
# # """
|
||||||
|
# # You are a helpful assistant that analyzes agent's trajectories to find solutions and observations (i.e., the results of actions) to answer the user's questions.
|
||||||
|
|
||||||
|
# # Definitions:
|
||||||
|
# # "question" is the user's question.
|
||||||
|
# # "thought" is step-by-step reasoning about the current situation.
|
||||||
|
# # "plan" is what to do to complete the task from the current situation.
|
||||||
|
# # “action_name” is the name of the action taken, which can be one of the following functions:
|
||||||
|
# # 1) CHATBOX[text], which you can use to talk with the user. "text" is in verbal English.
|
||||||
|
# # 2) WINESTOCK[query], which you can use to find info about wine in your inventory. "query" is a search term in verbal English. The best query must includes "budget", "type of wine", "characteristics of wine" and "food pairing".
|
||||||
|
# # "action_input" is the input to the action
|
||||||
|
# # "observation" is result of the action.
|
||||||
|
|
||||||
|
# # At each round of conversation, the user will give you:
|
||||||
|
# # Context: ...
|
||||||
|
# # Trajectories: ...
|
||||||
|
|
||||||
|
# # You should then respond to the user with:
|
||||||
|
# # - original_question: Repeat the original question.
|
||||||
|
# # - evaluation (you must evaluate all of the following points in a single paragraph):
|
||||||
|
# # 1) Analyze the trajectories of a solution to answer the user's original question.
|
||||||
|
# # Given a question and a trajectory, evaluate its correctness and provide your reasoning and
|
||||||
|
# # analysis in detail. Focus on the latest thought, action, and observation.
|
||||||
|
# # Incomplete trajectories can be correct if the thoughts and actions so far are correct,
|
||||||
|
# # even if the answer is not found yet. Do not generate additional thoughts or actions.
|
||||||
|
# # 2) How the observation addresses the question exactly?
|
||||||
|
# # - accepted_as_answer: Decide whether to accept the observation as the answer to the original question.
|
||||||
|
# # 1) if the observation's content directly answers the question then just accept it as the answer. Oherwise, it is not. The possible responses are either 'Yes' or 'No.'
|
||||||
|
# # - score: Correctness score s where s is a single integer between 0 to 9.
|
||||||
|
# # 1) 0 means the trajectories are incorrect.
|
||||||
|
# # 2) 9 means the trajectories are correct, and the observation's content directly answers the question.
|
||||||
|
# # - suggestion: if accepted_as_answer is "No", provide suggestion.
|
||||||
|
|
||||||
|
# # You should only respond in format as described below:
|
||||||
|
# # original_question: ...
|
||||||
|
# # evaluation: ...
|
||||||
|
# # accepted_as_answer: ...
|
||||||
|
# # score: ...
|
||||||
|
# # suggestion: ...
|
||||||
|
|
||||||
|
# # Let's begin!
|
||||||
|
# # """
|
||||||
|
|
||||||
|
# systemmsg =
|
||||||
|
# """
|
||||||
|
# You are a helpful assistant that analyzes agent's trajectory to find solutions and observations (i.e., the results of actions) to answer the user's questions.
|
||||||
|
|
||||||
|
# Definitions:
|
||||||
|
# "question" is the user's question
|
||||||
|
# "understanding" is agent's understanding about the current situation
|
||||||
|
# "reasoning" is agent's step-by-step reasoning about the current situation
|
||||||
|
# "plan" is agent's plan to complete the task from the current situation
|
||||||
|
# "action_name" is the name of the action taken, which can be one of the following functions:
|
||||||
|
# - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
|
||||||
|
# For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
|
||||||
|
# Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
|
||||||
|
# "action_input" is the input to the action
|
||||||
|
# "observation" is result of the preceding immediate action
|
||||||
|
|
||||||
|
# At each round of conversation, the user will give you:
|
||||||
|
# Trajectory: ...
|
||||||
|
# Error note: error note from your previous attempt
|
||||||
|
|
||||||
|
# You must follow the following guidelines:
|
||||||
|
# - When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer.
|
||||||
|
|
||||||
|
# You should then respond to the user with:
|
||||||
|
# 1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question.
|
||||||
|
# - Evaluate the correctness of each section and the overall trajectory based on the given question.
|
||||||
|
# - Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation.
|
||||||
|
# - Incomplete trajectory are acceptable if the thoughts and actions up to that point are correct, even if the final answer isn't reached.
|
||||||
|
# - Do not generate additional thoughts or actions.
|
||||||
|
# 2) Answer_evaluation:
|
||||||
|
# - Focus only on the matter mentioned in the question and comprehensively analyze how the latest observation's details addresses the question
|
||||||
|
# - State your rationale
|
||||||
|
# 3) Accepted_as_answer: Decide whether the latest observation's content answers the question. Can be "Yes" or "No"
|
||||||
|
# Bad example (The observation didn't answers the question):
|
||||||
|
# question: Find cars with 4 wheels.
|
||||||
|
# observation: There are an apple in the table.
|
||||||
|
# Good example (The observation answers the question):
|
||||||
|
# question: Find cars with a stereo.
|
||||||
|
# observation: There are 1 cars in the table. 1) brand: Toyota, model: yaris, color: black.
|
||||||
|
# 4) Score: Correctness score s where s is a single integer between 0 to 9.
|
||||||
|
# Score guideline:
|
||||||
|
# - 0 indicates that both the trajectory is incorrect, failed or errors and the observation is incorrect or failed
|
||||||
|
# - 4 indicates that the trajectory are correct but the observation is incorrect or failed
|
||||||
|
# - 8 indicates that both the trajectory are correct, and the observation's content directly answers the question.
|
||||||
|
# - 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations.
|
||||||
|
# 5) Suggestion: if accepted_as_answer is "No", provide suggestion.
|
||||||
|
|
||||||
|
# You should only respond in format as described below:
|
||||||
|
# Trajectory_evaluation: ...
|
||||||
|
# Answer_evaluation: ...
|
||||||
|
# Accepted_as_answer: ...
|
||||||
|
# Score: ...
|
||||||
|
# Suggestion: ...
|
||||||
|
|
||||||
|
# Let's begin!
|
||||||
|
# """
|
||||||
|
|
||||||
|
# thoughthistory = ""
|
||||||
|
# for (k, v) in state[:thoughtHistory]
|
||||||
|
# thoughthistory *= "$k: $v\n"
|
||||||
|
# end
|
||||||
|
|
||||||
|
# errornote = ""
|
||||||
|
|
||||||
|
# for attempt in 1:5
|
||||||
|
# usermsg =
|
||||||
|
# """
|
||||||
|
# Trajectory: $thoughthistory
|
||||||
|
# Error note: $errornote
|
||||||
|
# """
|
||||||
|
|
||||||
|
# _prompt =
|
||||||
|
# [
|
||||||
|
# Dict(:name=> "system", :text=> systemmsg),
|
||||||
|
# Dict(:name=> "user", :text=> usermsg)
|
||||||
|
# ]
|
||||||
|
|
||||||
|
# # put in model format
|
||||||
|
# prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
|
||||||
|
# prompt *=
|
||||||
|
# """
|
||||||
|
# <|start_header_id|>assistant<|end_header_id|>
|
||||||
|
# """
|
||||||
|
|
||||||
|
# header = ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"]
|
||||||
|
|
||||||
|
# try
|
||||||
|
# response = text2textInstructLLM(prompt)
|
||||||
|
# # make sure every header is in the response
|
||||||
|
# for i in header
|
||||||
|
# detected = GeneralUtils.detect_keyword(i, response)
|
||||||
|
# if detected === nothing
|
||||||
|
# error("Keyword $i not found in response")
|
||||||
|
# end
|
||||||
|
# end
|
||||||
|
|
||||||
|
# responsedict = GeneralUtils.textToDict(response,
|
||||||
|
# header;
|
||||||
|
# rightmarker=":", symbolkey=true, lowercasekey=true)
|
||||||
|
|
||||||
|
# # check if dict has all required value
|
||||||
|
# trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation]
|
||||||
|
# answerevaluation_text::AbstractString = responsedict[:answer_evaluation]
|
||||||
|
# # responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "") # remove (...) if there is any.
|
||||||
|
# responsedict[:score] = responsedict[:score][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
|
||||||
|
# responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
|
||||||
|
# score::Integer = responsedict[:score]
|
||||||
|
# accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
|
||||||
|
# suggestion::AbstractString = responsedict[:suggestion]
|
||||||
|
|
||||||
|
# if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
|
||||||
|
# error("generated accepted_as_answer has wrong format")
|
||||||
|
# end
|
||||||
|
|
||||||
|
# # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
|
||||||
|
# state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])"
|
||||||
|
# state[:evaluationscore] = responsedict[:score]
|
||||||
|
# state[:accepted_as_answer] = responsedict[:accepted_as_answer]
|
||||||
|
# state[:suggestion] = responsedict[:suggestion]
|
||||||
|
|
||||||
|
# # mark as terminal state when the answer is achieved
|
||||||
|
# if accepted_as_answer == "Yes"
|
||||||
|
|
||||||
|
# # mark the state as terminal state because the evaluation say so.
|
||||||
|
# state[:isterminal] = true
|
||||||
|
|
||||||
|
# # evaluation score as reward because different answers hold different value for the user.
|
||||||
|
# state[:reward] = responsedict[:score]
|
||||||
|
# end
|
||||||
|
# println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__)
|
||||||
|
# pprintln(Dict(responsedict))
|
||||||
|
|
||||||
|
# return responsedict[:score]
|
||||||
|
# catch e
|
||||||
|
# io = IOBuffer()
|
||||||
|
# showerror(io, e)
|
||||||
|
# errorMsg = String(take!(io))
|
||||||
|
# st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
|
||||||
|
# println("")
|
||||||
|
# println("Attempt $attempt. Error occurred: $errorMsg\n$st")
|
||||||
|
# println("")
|
||||||
|
# end
|
||||||
|
# end
|
||||||
|
# error("evaluator failed to generate an evaluation")
|
||||||
|
# end
|
||||||
function evaluator(state::T1, text2textInstructLLM::Function;
|
function evaluator(state::T1, text2textInstructLLM::Function;
|
||||||
insertSQLVectorDB::Union{Function, Nothing}=nothing
|
insertSQLVectorDB::Union{Function, Nothing}=nothing
|
||||||
) where {T1<:AbstractDict}
|
) where {T1<:AbstractDict}
|
||||||
@@ -490,14 +770,16 @@ function evaluator(state::T1, text2textInstructLLM::Function;
|
|||||||
"action_input" is the input to the action
|
"action_input" is the input to the action
|
||||||
"observation" is result of the preceding immediate action
|
"observation" is result of the preceding immediate action
|
||||||
|
|
||||||
At each round of conversation, the user will give you:
|
<At each round of conversation, the user will give you>
|
||||||
Context: ...
|
|
||||||
Trajectory: ...
|
Trajectory: ...
|
||||||
|
Error note: error note from your previous attempt
|
||||||
|
</At each round of conversation, the user will give you>
|
||||||
|
|
||||||
You must follow the following guidelines:
|
<You must follow the following guidelines>
|
||||||
- When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer.
|
- When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer.
|
||||||
|
</You must follow the following guidelines>
|
||||||
You should then respond to the user with:
|
|
||||||
|
<You should then respond to the user with>
|
||||||
1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question.
|
1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question.
|
||||||
- Evaluate the correctness of each section and the overall trajectory based on the given question.
|
- Evaluate the correctness of each section and the overall trajectory based on the given question.
|
||||||
- Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation.
|
- Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation.
|
||||||
@@ -520,13 +802,15 @@ function evaluator(state::T1, text2textInstructLLM::Function;
|
|||||||
- 8 indicates that both the trajectory are correct, and the observation's content directly answers the question.
|
- 8 indicates that both the trajectory are correct, and the observation's content directly answers the question.
|
||||||
- 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations.
|
- 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations.
|
||||||
5) Suggestion: if accepted_as_answer is "No", provide suggestion.
|
5) Suggestion: if accepted_as_answer is "No", provide suggestion.
|
||||||
|
</You should then respond to the user with>
|
||||||
|
|
||||||
You should only respond in format as described below:
|
<You should only respond in format as described below>
|
||||||
Trajectory_evaluation: ...
|
Trajectory_evaluation: ...
|
||||||
Answer_evaluation: ...
|
Answer_evaluation: ...
|
||||||
Accepted_as_answer: ...
|
Accepted_as_answer: ...
|
||||||
Score: ...
|
Score: ...
|
||||||
Suggestion: ...
|
Suggestion: ...
|
||||||
|
</You should only respond in format as described below>
|
||||||
|
|
||||||
Let's begin!
|
Let's begin!
|
||||||
"""
|
"""
|
||||||
@@ -536,10 +820,15 @@ function evaluator(state::T1, text2textInstructLLM::Function;
|
|||||||
thoughthistory *= "$k: $v\n"
|
thoughthistory *= "$k: $v\n"
|
||||||
end
|
end
|
||||||
|
|
||||||
for attempt in 1:5
|
errornote = ""
|
||||||
|
|
||||||
|
for attempt in 1:10
|
||||||
|
errorFlag = false
|
||||||
|
|
||||||
usermsg =
|
usermsg =
|
||||||
"""
|
"""
|
||||||
Trajectory: $thoughthistory
|
Trajectory: $thoughthistory
|
||||||
|
Error note: $errornote
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_prompt =
|
_prompt =
|
||||||
@@ -555,59 +844,62 @@ function evaluator(state::T1, text2textInstructLLM::Function;
|
|||||||
<|start_header_id|>assistant<|end_header_id|>
|
<|start_header_id|>assistant<|end_header_id|>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
try
|
header = ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"]
|
||||||
response = text2textInstructLLM(prompt)
|
|
||||||
responsedict = GeneralUtils.textToDict(response,
|
|
||||||
["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"];
|
|
||||||
rightmarker=":", symbolkey=true, lowercasekey=true)
|
|
||||||
|
|
||||||
# check if dict has all required value
|
response = text2textInstructLLM(prompt)
|
||||||
trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation]
|
# make sure every header is in the response
|
||||||
answerevaluation_text::AbstractString = responsedict[:answer_evaluation]
|
for i in header
|
||||||
# responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "") # remove (...) if there is any.
|
detected = GeneralUtils.detect_keyword(i, response)
|
||||||
responsedict[:score] = responsedict[:score][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
|
if detected === nothing
|
||||||
responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
|
errornote = "Keyword $i not found in response"
|
||||||
score::Integer = responsedict[:score]
|
errorFlag = true
|
||||||
accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
|
|
||||||
suggestion::AbstractString = responsedict[:suggestion]
|
|
||||||
|
|
||||||
if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
|
|
||||||
error("generated accepted_as_answer has wrong format")
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
|
|
||||||
state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])"
|
|
||||||
state[:evaluationscore] = responsedict[:score]
|
|
||||||
state[:accepted_as_answer] = responsedict[:accepted_as_answer]
|
|
||||||
state[:suggestion] = responsedict[:suggestion]
|
|
||||||
|
|
||||||
# mark as terminal state when the answer is achieved
|
|
||||||
if accepted_as_answer == "Yes"
|
|
||||||
|
|
||||||
# mark the state as terminal state because the evaluation say so.
|
|
||||||
state[:isterminal] = true
|
|
||||||
|
|
||||||
# evaluation score as reward because different answers hold different value for the user.
|
|
||||||
state[:reward] = responsedict[:score]
|
|
||||||
end
|
|
||||||
println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__)
|
|
||||||
pprintln(Dict(responsedict))
|
|
||||||
|
|
||||||
return responsedict[:score]
|
|
||||||
catch e
|
|
||||||
io = IOBuffer()
|
|
||||||
showerror(io, e)
|
|
||||||
errorMsg = String(take!(io))
|
|
||||||
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
|
|
||||||
println("")
|
|
||||||
println("Attempt $attempt. Error occurred: $errorMsg\n$st")
|
|
||||||
println("")
|
|
||||||
end
|
end
|
||||||
|
if errorFlag
|
||||||
|
continue # skip to the next iteration
|
||||||
|
end
|
||||||
|
|
||||||
|
responsedict = GeneralUtils.textToDict(response,
|
||||||
|
header;
|
||||||
|
rightmarker=":", symbolkey=true, lowercasekey=true)
|
||||||
|
|
||||||
|
# check if dict has all required value
|
||||||
|
trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation]
|
||||||
|
answerevaluation_text::AbstractString = responsedict[:answer_evaluation]
|
||||||
|
# responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "") # remove (...) if there is any.
|
||||||
|
responsedict[:score] = responsedict[:score][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
|
||||||
|
responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
|
||||||
|
score::Integer = responsedict[:score]
|
||||||
|
accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
|
||||||
|
suggestion::AbstractString = responsedict[:suggestion]
|
||||||
|
|
||||||
|
if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
|
||||||
|
error("generated accepted_as_answer has wrong format")
|
||||||
|
end
|
||||||
|
|
||||||
|
# add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
|
||||||
|
state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])"
|
||||||
|
state[:evaluationscore] = responsedict[:score]
|
||||||
|
state[:accepted_as_answer] = responsedict[:accepted_as_answer]
|
||||||
|
state[:suggestion] = responsedict[:suggestion]
|
||||||
|
|
||||||
|
# mark as terminal state when the answer is achieved
|
||||||
|
if accepted_as_answer == "Yes"
|
||||||
|
|
||||||
|
# mark the state as terminal state because the evaluation say so.
|
||||||
|
state[:isterminal] = true
|
||||||
|
|
||||||
|
# evaluation score as reward because different answers hold different value for the user.
|
||||||
|
state[:reward] = responsedict[:score]
|
||||||
|
end
|
||||||
|
println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__)
|
||||||
|
pprintln(Dict(responsedict))
|
||||||
|
|
||||||
|
return responsedict[:score]
|
||||||
end
|
end
|
||||||
error("evaluator failed to generate an evaluation")
|
error("evaluator failed to generate an evaluation")
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Arguments
|
# Arguments
|
||||||
@@ -979,9 +1271,9 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
|
|||||||
earlystop(state) = state[:reward] >= 8 ? true : false
|
earlystop(state) = state[:reward] >= 8 ? true : false
|
||||||
|
|
||||||
_, _, resultState = LLMMCTS.runMCTS(initialstate, transition, transitionargs;
|
_, _, resultState = LLMMCTS.runMCTS(initialstate, transition, transitionargs;
|
||||||
horizontalSampleExpansionPhase=2,
|
horizontalSampleExpansionPhase=5,
|
||||||
horizontalSampleSimulationPhase=1,
|
horizontalSampleSimulationPhase=2,
|
||||||
maxSimulationDepth=3, maxiterations=2,
|
maxSimulationDepth=10, maxiterations=2,
|
||||||
explorationweight=1.0,
|
explorationweight=1.0,
|
||||||
earlystop=earlystop,
|
earlystop=earlystop,
|
||||||
saveSimulatedNode=true)
|
saveSimulatedNode=true)
|
||||||
|
|||||||
@@ -35,7 +35,7 @@ function text2textInstructLLM(prompt::String)
|
|||||||
msgPurpose="inference",
|
msgPurpose="inference",
|
||||||
senderName="yiemagent",
|
senderName="yiemagent",
|
||||||
senderId=sessionId,
|
senderId=sessionId,
|
||||||
receiverName="text2textinstruct_medium",
|
receiverName="text2textinstruct_small",
|
||||||
mqttBrokerAddress=config[:mqttServerInfo][:broker],
|
mqttBrokerAddress=config[:mqttServerInfo][:broker],
|
||||||
mqttBrokerPort=config[:mqttServerInfo][:port],
|
mqttBrokerPort=config[:mqttServerInfo][:port],
|
||||||
)
|
)
|
||||||
@@ -64,7 +64,7 @@ function getEmbedding(text::T) where {T<:AbstractString}
|
|||||||
msgPurpose="embedding",
|
msgPurpose="embedding",
|
||||||
senderName="yiemagent",
|
senderName="yiemagent",
|
||||||
senderId=sessionId,
|
senderId=sessionId,
|
||||||
receiverName="text2textinstruct_medium",
|
receiverName="text2textinstruct_small",
|
||||||
mqttBrokerAddress=config[:mqttServerInfo][:broker],
|
mqttBrokerAddress=config[:mqttServerInfo][:broker],
|
||||||
mqttBrokerPort=config[:mqttServerInfo][:port],
|
mqttBrokerPort=config[:mqttServerInfo][:port],
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user