This commit is contained in:
2025-03-09 18:36:57 +07:00
parent 7dd6b56e4c
commit 4f1917e01b
2 changed files with 350 additions and 58 deletions

View File

@@ -339,6 +339,286 @@ julia>
# Signature # Signature
""" """
# function evaluator(state::T1, text2textInstructLLM::Function;
# insertSQLVectorDB::Union{Function, Nothing}=nothing
# ) where {T1<:AbstractDict}
# # systemmsg =
# # """
# # You are a helpful assistant that analyzes agent's trajectories to find solutions and observations (i.e., the results of actions) to answer the user's questions.
# # Definitions:
# # "question" is the user's question.
# # "thought" is step-by-step reasoning about the current situation.
# # "plan" is what to do to complete the task from the current situation.
# # "action" is the taken action which can be one of the following functions:
# # 1) TABLEINFO[list_of_table_name], which you can use to get the data type of a table column.
# # 2) GETDATA[instruction], which you can use to get the data from the database.
# # 3) ANSWERBOX[answer], which returns your answer to the user. "answer" is your answer to the user question.
# # "observation" is result of the action in JSON format.
# # At each round of conversation, the user will give you:
# # Context: ...
# # Trajectories: ...
# # You should then respond to the user with:
# # - Original_question: Repeat the original question.
# # - Evaluation (you must evaluate all of the following points):
# # 1) Analyze the trajectories of a solution to answer the user's original question.
# # Given a question and a trajectory, evaluate its correctness and provide your reasoning and
# # analysis in detail. Focus on the latest thought, action, and observation.
# # Incomplete trajectories can be correct if the thoughts and actions so far are correct,
# # even if the answer is not found yet. Do not generate additional thoughts or actions.
# # 2) How the observation addresses the original question?
# # 3) Provide suggestion (if applicable).
# # - Score: Correctness score s where s is an integer from 0 to 10.
# # - Accepted_as_answer: Decide whether to accept the observation as the answer to the original question.
# # 1) The accepted observation should directly answer the question.
# # 2) The possible responses are either 'Yes' or 'No.'
# # You should only respond in JSON format as described below:
# # {"original_question": ..., "evaluation": ..., "score": ..., "accepted_as_answer": ...}
# # Here are correct trajectory examples:
# # user:
# # {
# # "question": "I'm looking for a sedan with an automatic driving feature.",
# # "thought_1": "I have many types of sedans in my inventory, each with diverse features.",
# # "thought_2": "I should check our inventory first to see if we have the one our customer wants.",
# # "action_1": {"name": "inventory", "input": "a sedan with an automatic driving feature"},
# # "observation_1": "Yiem Model A, Conez Model B"
# # }
# # assistant:
# # {
# # "original_question": "the user is looking for a sedan with an automatic driving feature.",
# # "evaluation": "This trajectory is correct because it is logical to use the INVENTORY function to search for inventory based on the details provided in the question, which could lead to a potential answer. The user is asking whether do you have a sedan with an automatic driving feature and the observation provides a list of sedan models that you have. Thus, it is accepted as the answer.",
# # "score": 10,
# # "accepted_as_answer": "Yes"
# # }
# # user:
# # {
# # "question": "How many cars that fitted with a stereo we have?",
# # "thought_1": "I have many types of car in my inventory, each with diverse features.",
# # "thought_3": "I should check our inventory.",
# # "action_1": {"name": "inventory", "input": "vehicle with a stereo"},
# # "observation_1": "2015 Conez truck."
# # }
# # assistant:
# # {
# # "evaluation": “This approach is correct. It's reasonable to use the INVENTORY function to search for inventory. However, the query asked for a car but the observation was a truck. Thus it is not accepted as the answer. To improve, make sure to input the correct terms and match the requested criteria accurately.”,
# # "score": 5,
# # "accepted_as_answer": "No"
# # }
# # Here are incorrect trajectory examples:
# # user:
# # {
# # "question": "I'm looking for a sedan with an automatic driving feature. Do you have it in stock?",
# # "thought_1": "I have many types of sedans in my inventory, each with diverse features.",
# # "thought_2": "I will use SEARCHINTERNET function to search for the car.",
# # "action_1": {"name": "SEARCHINTERNET", "input": "a sedan with an automatic driving feature.},
# # "observation_1": "Teza Model A, Teza Model B"
# # }
# # assistant:
# # {
# # "evaluation": "This trajectory is incorrect. Using the SEARCHINTERNET function to search for a sedan in the Internet is illogical because the question asked for the cars available for sale at your dealership. To improve, ensure that you read the question clearly.",
# # "score": 0,
# # "accepted_as_answer": "No"
# # }
# # Let's begin!
# # """
# # systemmsg =
# # """
# # You are a helpful assistant that analyzes agent's trajectories to find solutions and observations (i.e., the results of actions) to answer the user's questions.
# # Definitions:
# # "question" is the user's question.
# # "thought" is step-by-step reasoning about the current situation.
# # "plan" is what to do to complete the task from the current situation.
# # “action_name” is the name of the action taken, which can be one of the following functions:
# # 1) CHATBOX[text], which you can use to talk with the user. "text" is in verbal English.
# # 2) WINESTOCK[query], which you can use to find info about wine in your inventory. "query" is a search term in verbal English. The best query must includes "budget", "type of wine", "characteristics of wine" and "food pairing".
# # "action_input" is the input to the action
# # "observation" is result of the action.
# # At each round of conversation, the user will give you:
# # Context: ...
# # Trajectories: ...
# # You should then respond to the user with:
# # - original_question: Repeat the original question.
# # - evaluation (you must evaluate all of the following points in a single paragraph):
# # 1) Analyze the trajectories of a solution to answer the user's original question.
# # Given a question and a trajectory, evaluate its correctness and provide your reasoning and
# # analysis in detail. Focus on the latest thought, action, and observation.
# # Incomplete trajectories can be correct if the thoughts and actions so far are correct,
# # even if the answer is not found yet. Do not generate additional thoughts or actions.
# # 2) How the observation addresses the question exactly?
# # - accepted_as_answer: Decide whether to accept the observation as the answer to the original question.
# # 1) if the observation's content directly answers the question then just accept it as the answer. Oherwise, it is not. The possible responses are either 'Yes' or 'No.'
# # - score: Correctness score s where s is a single integer between 0 to 9.
# # 1) 0 means the trajectories are incorrect.
# # 2) 9 means the trajectories are correct, and the observation's content directly answers the question.
# # - suggestion: if accepted_as_answer is "No", provide suggestion.
# # You should only respond in format as described below:
# # original_question: ...
# # evaluation: ...
# # accepted_as_answer: ...
# # score: ...
# # suggestion: ...
# # Let's begin!
# # """
# systemmsg =
# """
# You are a helpful assistant that analyzes agent's trajectory to find solutions and observations (i.e., the results of actions) to answer the user's questions.
# Definitions:
# "question" is the user's question
# "understanding" is agent's understanding about the current situation
# "reasoning" is agent's step-by-step reasoning about the current situation
# "plan" is agent's plan to complete the task from the current situation
# "action_name" is the name of the action taken, which can be one of the following functions:
# - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
# For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
# Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
# "action_input" is the input to the action
# "observation" is result of the preceding immediate action
# At each round of conversation, the user will give you:
# Trajectory: ...
# Error note: error note from your previous attempt
# You must follow the following guidelines:
# - When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer.
# You should then respond to the user with:
# 1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question.
# - Evaluate the correctness of each section and the overall trajectory based on the given question.
# - Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation.
# - Incomplete trajectory are acceptable if the thoughts and actions up to that point are correct, even if the final answer isn't reached.
# - Do not generate additional thoughts or actions.
# 2) Answer_evaluation:
# - Focus only on the matter mentioned in the question and comprehensively analyze how the latest observation's details addresses the question
# - State your rationale
# 3) Accepted_as_answer: Decide whether the latest observation's content answers the question. Can be "Yes" or "No"
# Bad example (The observation didn't answers the question):
# question: Find cars with 4 wheels.
# observation: There are an apple in the table.
# Good example (The observation answers the question):
# question: Find cars with a stereo.
# observation: There are 1 cars in the table. 1) brand: Toyota, model: yaris, color: black.
# 4) Score: Correctness score s where s is a single integer between 0 to 9.
# Score guideline:
# - 0 indicates that both the trajectory is incorrect, failed or errors and the observation is incorrect or failed
# - 4 indicates that the trajectory are correct but the observation is incorrect or failed
# - 8 indicates that both the trajectory are correct, and the observation's content directly answers the question.
# - 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations.
# 5) Suggestion: if accepted_as_answer is "No", provide suggestion.
# You should only respond in format as described below:
# Trajectory_evaluation: ...
# Answer_evaluation: ...
# Accepted_as_answer: ...
# Score: ...
# Suggestion: ...
# Let's begin!
# """
# thoughthistory = ""
# for (k, v) in state[:thoughtHistory]
# thoughthistory *= "$k: $v\n"
# end
# errornote = ""
# for attempt in 1:5
# usermsg =
# """
# Trajectory: $thoughthistory
# Error note: $errornote
# """
# _prompt =
# [
# Dict(:name=> "system", :text=> systemmsg),
# Dict(:name=> "user", :text=> usermsg)
# ]
# # put in model format
# prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
# prompt *=
# """
# <|start_header_id|>assistant<|end_header_id|>
# """
# header = ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"]
# try
# response = text2textInstructLLM(prompt)
# # make sure every header is in the response
# for i in header
# detected = GeneralUtils.detect_keyword(i, response)
# if detected === nothing
# error("Keyword $i not found in response")
# end
# end
# responsedict = GeneralUtils.textToDict(response,
# header;
# rightmarker=":", symbolkey=true, lowercasekey=true)
# # check if dict has all required value
# trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation]
# answerevaluation_text::AbstractString = responsedict[:answer_evaluation]
# # responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "") # remove (...) if there is any.
# responsedict[:score] = responsedict[:score][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
# responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
# score::Integer = responsedict[:score]
# accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
# suggestion::AbstractString = responsedict[:suggestion]
# if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
# error("generated accepted_as_answer has wrong format")
# end
# # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
# state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])"
# state[:evaluationscore] = responsedict[:score]
# state[:accepted_as_answer] = responsedict[:accepted_as_answer]
# state[:suggestion] = responsedict[:suggestion]
# # mark as terminal state when the answer is achieved
# if accepted_as_answer == "Yes"
# # mark the state as terminal state because the evaluation say so.
# state[:isterminal] = true
# # evaluation score as reward because different answers hold different value for the user.
# state[:reward] = responsedict[:score]
# end
# println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__)
# pprintln(Dict(responsedict))
# return responsedict[:score]
# catch e
# io = IOBuffer()
# showerror(io, e)
# errorMsg = String(take!(io))
# st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
# println("")
# println("Attempt $attempt. Error occurred: $errorMsg\n$st")
# println("")
# end
# end
# error("evaluator failed to generate an evaluation")
# end
function evaluator(state::T1, text2textInstructLLM::Function; function evaluator(state::T1, text2textInstructLLM::Function;
insertSQLVectorDB::Union{Function, Nothing}=nothing insertSQLVectorDB::Union{Function, Nothing}=nothing
) where {T1<:AbstractDict} ) where {T1<:AbstractDict}
@@ -490,14 +770,16 @@ function evaluator(state::T1, text2textInstructLLM::Function;
"action_input" is the input to the action "action_input" is the input to the action
"observation" is result of the preceding immediate action "observation" is result of the preceding immediate action
At each round of conversation, the user will give you: <At each round of conversation, the user will give you>
Context: ...
Trajectory: ... Trajectory: ...
Error note: error note from your previous attempt
</At each round of conversation, the user will give you>
You must follow the following guidelines: <You must follow the following guidelines>
- When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer. - When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer.
</You must follow the following guidelines>
You should then respond to the user with: <You should then respond to the user with>
1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question. 1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question.
- Evaluate the correctness of each section and the overall trajectory based on the given question. - Evaluate the correctness of each section and the overall trajectory based on the given question.
- Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation. - Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation.
@@ -520,13 +802,15 @@ function evaluator(state::T1, text2textInstructLLM::Function;
- 8 indicates that both the trajectory are correct, and the observation's content directly answers the question. - 8 indicates that both the trajectory are correct, and the observation's content directly answers the question.
- 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations. - 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations.
5) Suggestion: if accepted_as_answer is "No", provide suggestion. 5) Suggestion: if accepted_as_answer is "No", provide suggestion.
</You should then respond to the user with>
You should only respond in format as described below: <You should only respond in format as described below>
Trajectory_evaluation: ... Trajectory_evaluation: ...
Answer_evaluation: ... Answer_evaluation: ...
Accepted_as_answer: ... Accepted_as_answer: ...
Score: ... Score: ...
Suggestion: ... Suggestion: ...
</You should only respond in format as described below>
Let's begin! Let's begin!
""" """
@@ -536,10 +820,15 @@ function evaluator(state::T1, text2textInstructLLM::Function;
thoughthistory *= "$k: $v\n" thoughthistory *= "$k: $v\n"
end end
for attempt in 1:5 errornote = ""
for attempt in 1:10
errorFlag = false
usermsg = usermsg =
""" """
Trajectory: $thoughthistory Trajectory: $thoughthistory
Error note: $errornote
""" """
_prompt = _prompt =
@@ -555,10 +844,23 @@ function evaluator(state::T1, text2textInstructLLM::Function;
<|start_header_id|>assistant<|end_header_id|> <|start_header_id|>assistant<|end_header_id|>
""" """
try header = ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"]
response = text2textInstructLLM(prompt) response = text2textInstructLLM(prompt)
# make sure every header is in the response
for i in header
detected = GeneralUtils.detect_keyword(i, response)
if detected === nothing
errornote = "Keyword $i not found in response"
errorFlag = true
end
end
if errorFlag
continue # skip to the next iteration
end
responsedict = GeneralUtils.textToDict(response, responsedict = GeneralUtils.textToDict(response,
["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"]; header;
rightmarker=":", symbolkey=true, lowercasekey=true) rightmarker=":", symbolkey=true, lowercasekey=true)
# check if dict has all required value # check if dict has all required value
@@ -594,20 +896,10 @@ function evaluator(state::T1, text2textInstructLLM::Function;
pprintln(Dict(responsedict)) pprintln(Dict(responsedict))
return responsedict[:score] return responsedict[:score]
catch e
io = IOBuffer()
showerror(io, e)
errorMsg = String(take!(io))
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
println("")
println("Attempt $attempt. Error occurred: $errorMsg\n$st")
println("")
end
end end
error("evaluator failed to generate an evaluation") error("evaluator failed to generate an evaluation")
end end
""" """
# Arguments # Arguments
@@ -979,9 +1271,9 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
earlystop(state) = state[:reward] >= 8 ? true : false earlystop(state) = state[:reward] >= 8 ? true : false
_, _, resultState = LLMMCTS.runMCTS(initialstate, transition, transitionargs; _, _, resultState = LLMMCTS.runMCTS(initialstate, transition, transitionargs;
horizontalSampleExpansionPhase=2, horizontalSampleExpansionPhase=5,
horizontalSampleSimulationPhase=1, horizontalSampleSimulationPhase=2,
maxSimulationDepth=3, maxiterations=2, maxSimulationDepth=10, maxiterations=2,
explorationweight=1.0, explorationweight=1.0,
earlystop=earlystop, earlystop=earlystop,
saveSimulatedNode=true) saveSimulatedNode=true)

View File

@@ -35,7 +35,7 @@ function text2textInstructLLM(prompt::String)
msgPurpose="inference", msgPurpose="inference",
senderName="yiemagent", senderName="yiemagent",
senderId=sessionId, senderId=sessionId,
receiverName="text2textinstruct_medium", receiverName="text2textinstruct_small",
mqttBrokerAddress=config[:mqttServerInfo][:broker], mqttBrokerAddress=config[:mqttServerInfo][:broker],
mqttBrokerPort=config[:mqttServerInfo][:port], mqttBrokerPort=config[:mqttServerInfo][:port],
) )
@@ -64,7 +64,7 @@ function getEmbedding(text::T) where {T<:AbstractString}
msgPurpose="embedding", msgPurpose="embedding",
senderName="yiemagent", senderName="yiemagent",
senderId=sessionId, senderId=sessionId,
receiverName="text2textinstruct_medium", receiverName="text2textinstruct_small",
mqttBrokerAddress=config[:mqttServerInfo][:broker], mqttBrokerAddress=config[:mqttServerInfo][:broker],
mqttBrokerPort=config[:mqttServerInfo][:port], mqttBrokerPort=config[:mqttServerInfo][:port],
) )