diff --git a/src/interface.jl b/src/interface.jl
index 56baccc..0624a74 100644
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -339,6 +339,286 @@ julia>
# Signature
"""
+# function evaluator(state::T1, text2textInstructLLM::Function;
+# insertSQLVectorDB::Union{Function, Nothing}=nothing
+# ) where {T1<:AbstractDict}
+
+# # systemmsg =
+# # """
+# # You are a helpful assistant that analyzes agent's trajectories to find solutions and observations (i.e., the results of actions) to answer the user's questions.
+
+# # Definitions:
+# # "question" is the user's question.
+# # "thought" is step-by-step reasoning about the current situation.
+# # "plan" is what to do to complete the task from the current situation.
+# # "action" is the taken action which can be one of the following functions:
+# # 1) TABLEINFO[list_of_table_name], which you can use to get the data type of a table column.
+# # 2) GETDATA[instruction], which you can use to get the data from the database.
+# # 3) ANSWERBOX[answer], which returns your answer to the user. "answer" is your answer to the user question.
+# # "observation" is result of the action in JSON format.
+
+# # At each round of conversation, the user will give you:
+# # Context: ...
+# # Trajectories: ...
+
+# # You should then respond to the user with:
+# # - Original_question: Repeat the original question.
+# # - Evaluation (you must evaluate all of the following points):
+# # 1) Analyze the trajectories of a solution to answer the user's original question.
+# # Given a question and a trajectory, evaluate its correctness and provide your reasoning and
+# # analysis in detail. Focus on the latest thought, action, and observation.
+# # Incomplete trajectories can be correct if the thoughts and actions so far are correct,
+# # even if the answer is not found yet. Do not generate additional thoughts or actions.
+# # 2) How the observation addresses the original question?
+# # 3) Provide suggestion (if applicable).
+# # - Score: Correctness score s where s is an integer from 0 to 10.
+# # - Accepted_as_answer: Decide whether to accept the observation as the answer to the original question.
+# # 1) The accepted observation should directly answer the question.
+# # 2) The possible responses are either 'Yes' or 'No.'
+
+# # You should only respond in JSON format as described below:
+# # {"original_question": ..., "evaluation": ..., "score": ..., "accepted_as_answer": ...}
+
+# # Here are correct trajectory examples:
+# # user:
+# # {
+# # "question": "I'm looking for a sedan with an automatic driving feature.",
+# # "thought_1": "I have many types of sedans in my inventory, each with diverse features.",
+# # "thought_2": "I should check our inventory first to see if we have the one our customer wants.",
+# # "action_1": {"name": "inventory", "input": "a sedan with an automatic driving feature"},
+# # "observation_1": "Yiem Model A, Conez Model B"
+# # }
+# # assistant:
+# # {
+# # "original_question": "the user is looking for a sedan with an automatic driving feature.",
+# # "evaluation": "This trajectory is correct because it is logical to use the INVENTORY function to search for inventory based on the details provided in the question, which could lead to a potential answer. The user is asking whether do you have a sedan with an automatic driving feature and the observation provides a list of sedan models that you have. Thus, it is accepted as the answer.",
+# # "score": 10,
+# # "accepted_as_answer": "Yes"
+# # }
+
+# # user:
+# # {
+# # "question": "How many cars that fitted with a stereo we have?",
+# # "thought_1": "I have many types of car in my inventory, each with diverse features.",
+# # "thought_3": "I should check our inventory.",
+# # "action_1": {"name": "inventory", "input": "vehicle with a stereo"},
+# # "observation_1": "2015 Conez truck."
+# # }
+# # assistant:
+# # {
+# # "evaluation": “This approach is correct. It's reasonable to use the INVENTORY function to search for inventory. However, the query asked for a car but the observation was a truck. Thus it is not accepted as the answer. To improve, make sure to input the correct terms and match the requested criteria accurately.”,
+# # "score": 5,
+# # "accepted_as_answer": "No"
+# # }
+
+# # Here are incorrect trajectory examples:
+# # user:
+# # {
+# # "question": "I'm looking for a sedan with an automatic driving feature. Do you have it in stock?",
+# # "thought_1": "I have many types of sedans in my inventory, each with diverse features.",
+# # "thought_2": "I will use SEARCHINTERNET function to search for the car.",
+# # "action_1": {"name": "SEARCHINTERNET", "input": "a sedan with an automatic driving feature.},
+# # "observation_1": "Teza Model A, Teza Model B"
+# # }
+# # assistant:
+# # {
+# # "evaluation": "This trajectory is incorrect. Using the SEARCHINTERNET function to search for a sedan in the Internet is illogical because the question asked for the cars available for sale at your dealership. To improve, ensure that you read the question clearly.",
+# # "score": 0,
+# # "accepted_as_answer": "No"
+# # }
+
+# # Let's begin!
+# # """
+
+# # systemmsg =
+# # """
+# # You are a helpful assistant that analyzes agent's trajectories to find solutions and observations (i.e., the results of actions) to answer the user's questions.
+
+# # Definitions:
+# # "question" is the user's question.
+# # "thought" is step-by-step reasoning about the current situation.
+# # "plan" is what to do to complete the task from the current situation.
+# # “action_name” is the name of the action taken, which can be one of the following functions:
+# # 1) CHATBOX[text], which you can use to talk with the user. "text" is in verbal English.
+# # 2) WINESTOCK[query], which you can use to find info about wine in your inventory. "query" is a search term in verbal English. The best query must includes "budget", "type of wine", "characteristics of wine" and "food pairing".
+# # "action_input" is the input to the action
+# # "observation" is result of the action.
+
+# # At each round of conversation, the user will give you:
+# # Context: ...
+# # Trajectories: ...
+
+# # You should then respond to the user with:
+# # - original_question: Repeat the original question.
+# # - evaluation (you must evaluate all of the following points in a single paragraph):
+# # 1) Analyze the trajectories of a solution to answer the user's original question.
+# # Given a question and a trajectory, evaluate its correctness and provide your reasoning and
+# # analysis in detail. Focus on the latest thought, action, and observation.
+# # Incomplete trajectories can be correct if the thoughts and actions so far are correct,
+# # even if the answer is not found yet. Do not generate additional thoughts or actions.
+# # 2) How the observation addresses the question exactly?
+# # - accepted_as_answer: Decide whether to accept the observation as the answer to the original question.
+# # 1) if the observation's content directly answers the question then just accept it as the answer. Oherwise, it is not. The possible responses are either 'Yes' or 'No.'
+# # - score: Correctness score s where s is a single integer between 0 to 9.
+# # 1) 0 means the trajectories are incorrect.
+# # 2) 9 means the trajectories are correct, and the observation's content directly answers the question.
+# # - suggestion: if accepted_as_answer is "No", provide suggestion.
+
+# # You should only respond in format as described below:
+# # original_question: ...
+# # evaluation: ...
+# # accepted_as_answer: ...
+# # score: ...
+# # suggestion: ...
+
+# # Let's begin!
+# # """
+
+# systemmsg =
+# """
+# You are a helpful assistant that analyzes agent's trajectory to find solutions and observations (i.e., the results of actions) to answer the user's questions.
+
+# Definitions:
+# "question" is the user's question
+# "understanding" is agent's understanding about the current situation
+# "reasoning" is agent's step-by-step reasoning about the current situation
+# "plan" is agent's plan to complete the task from the current situation
+# "action_name" is the name of the action taken, which can be one of the following functions:
+# - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
+# For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
+# Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
+# "action_input" is the input to the action
+# "observation" is result of the preceding immediate action
+
+# At each round of conversation, the user will give you:
+# Trajectory: ...
+# Error note: error note from your previous attempt
+
+# You must follow the following guidelines:
+# - When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer.
+
+# You should then respond to the user with:
+# 1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question.
+# - Evaluate the correctness of each section and the overall trajectory based on the given question.
+# - Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation.
+# - Incomplete trajectory are acceptable if the thoughts and actions up to that point are correct, even if the final answer isn't reached.
+# - Do not generate additional thoughts or actions.
+# 2) Answer_evaluation:
+# - Focus only on the matter mentioned in the question and comprehensively analyze how the latest observation's details addresses the question
+# - State your rationale
+# 3) Accepted_as_answer: Decide whether the latest observation's content answers the question. Can be "Yes" or "No"
+# Bad example (The observation didn't answers the question):
+# question: Find cars with 4 wheels.
+# observation: There are an apple in the table.
+# Good example (The observation answers the question):
+# question: Find cars with a stereo.
+# observation: There are 1 cars in the table. 1) brand: Toyota, model: yaris, color: black.
+# 4) Score: Correctness score s where s is a single integer between 0 to 9.
+# Score guideline:
+# - 0 indicates that both the trajectory is incorrect, failed or errors and the observation is incorrect or failed
+# - 4 indicates that the trajectory are correct but the observation is incorrect or failed
+# - 8 indicates that both the trajectory are correct, and the observation's content directly answers the question.
+# - 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations.
+# 5) Suggestion: if accepted_as_answer is "No", provide suggestion.
+
+# You should only respond in format as described below:
+# Trajectory_evaluation: ...
+# Answer_evaluation: ...
+# Accepted_as_answer: ...
+# Score: ...
+# Suggestion: ...
+
+# Let's begin!
+# """
+
+# thoughthistory = ""
+# for (k, v) in state[:thoughtHistory]
+# thoughthistory *= "$k: $v\n"
+# end
+
+# errornote = ""
+
+# for attempt in 1:5
+# usermsg =
+# """
+# Trajectory: $thoughthistory
+# Error note: $errornote
+# """
+
+# _prompt =
+# [
+# Dict(:name=> "system", :text=> systemmsg),
+# Dict(:name=> "user", :text=> usermsg)
+# ]
+
+# # put in model format
+# prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
+# prompt *=
+# """
+# <|start_header_id|>assistant<|end_header_id|>
+# """
+
+# header = ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"]
+
+# try
+# response = text2textInstructLLM(prompt)
+# # make sure every header is in the response
+# for i in header
+# detected = GeneralUtils.detect_keyword(i, response)
+# if detected === nothing
+# error("Keyword $i not found in response")
+# end
+# end
+
+# responsedict = GeneralUtils.textToDict(response,
+# header;
+# rightmarker=":", symbolkey=true, lowercasekey=true)
+
+# # check if dict has all required value
+# trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation]
+# answerevaluation_text::AbstractString = responsedict[:answer_evaluation]
+# # responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "") # remove (...) if there is any.
+# responsedict[:score] = responsedict[:score][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
+# responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
+# score::Integer = responsedict[:score]
+# accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
+# suggestion::AbstractString = responsedict[:suggestion]
+
+# if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
+# error("generated accepted_as_answer has wrong format")
+# end
+
+# # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
+# state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])"
+# state[:evaluationscore] = responsedict[:score]
+# state[:accepted_as_answer] = responsedict[:accepted_as_answer]
+# state[:suggestion] = responsedict[:suggestion]
+
+# # mark as terminal state when the answer is achieved
+# if accepted_as_answer == "Yes"
+
+# # mark the state as terminal state because the evaluation say so.
+# state[:isterminal] = true
+
+# # evaluation score as reward because different answers hold different value for the user.
+# state[:reward] = responsedict[:score]
+# end
+# println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__)
+# pprintln(Dict(responsedict))
+
+# return responsedict[:score]
+# catch e
+# io = IOBuffer()
+# showerror(io, e)
+# errorMsg = String(take!(io))
+# st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
+# println("")
+# println("Attempt $attempt. Error occurred: $errorMsg\n$st")
+# println("")
+# end
+# end
+# error("evaluator failed to generate an evaluation")
+# end
function evaluator(state::T1, text2textInstructLLM::Function;
insertSQLVectorDB::Union{Function, Nothing}=nothing
) where {T1<:AbstractDict}
@@ -490,14 +770,16 @@ function evaluator(state::T1, text2textInstructLLM::Function;
"action_input" is the input to the action
"observation" is result of the preceding immediate action
- At each round of conversation, the user will give you:
- Context: ...
+
Trajectory: ...
+ Error note: error note from your previous attempt
+
- You must follow the following guidelines:
+
- When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer.
-
- You should then respond to the user with:
+
+
+
1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question.
- Evaluate the correctness of each section and the overall trajectory based on the given question.
- Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation.
@@ -520,13 +802,15 @@ function evaluator(state::T1, text2textInstructLLM::Function;
- 8 indicates that both the trajectory are correct, and the observation's content directly answers the question.
- 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations.
5) Suggestion: if accepted_as_answer is "No", provide suggestion.
+
- You should only respond in format as described below:
+
Trajectory_evaluation: ...
Answer_evaluation: ...
Accepted_as_answer: ...
Score: ...
Suggestion: ...
+
Let's begin!
"""
@@ -536,10 +820,15 @@ function evaluator(state::T1, text2textInstructLLM::Function;
thoughthistory *= "$k: $v\n"
end
- for attempt in 1:5
+ errornote = ""
+
+ for attempt in 1:10
+ errorFlag = false
+
usermsg =
"""
Trajectory: $thoughthistory
+ Error note: $errornote
"""
_prompt =
@@ -555,59 +844,62 @@ function evaluator(state::T1, text2textInstructLLM::Function;
<|start_header_id|>assistant<|end_header_id|>
"""
- try
- response = text2textInstructLLM(prompt)
- responsedict = GeneralUtils.textToDict(response,
- ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"];
- rightmarker=":", symbolkey=true, lowercasekey=true)
+ header = ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"]
- # check if dict has all required value
- trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation]
- answerevaluation_text::AbstractString = responsedict[:answer_evaluation]
- # responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "") # remove (...) if there is any.
- responsedict[:score] = responsedict[:score][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
- responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
- score::Integer = responsedict[:score]
- accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
- suggestion::AbstractString = responsedict[:suggestion]
-
- if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
- error("generated accepted_as_answer has wrong format")
+ response = text2textInstructLLM(prompt)
+ # make sure every header is in the response
+ for i in header
+ detected = GeneralUtils.detect_keyword(i, response)
+ if detected === nothing
+ errornote = "Keyword $i not found in response"
+ errorFlag = true
end
-
- # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
- state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])"
- state[:evaluationscore] = responsedict[:score]
- state[:accepted_as_answer] = responsedict[:accepted_as_answer]
- state[:suggestion] = responsedict[:suggestion]
-
- # mark as terminal state when the answer is achieved
- if accepted_as_answer == "Yes"
-
- # mark the state as terminal state because the evaluation say so.
- state[:isterminal] = true
-
- # evaluation score as reward because different answers hold different value for the user.
- state[:reward] = responsedict[:score]
- end
- println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__)
- pprintln(Dict(responsedict))
-
- return responsedict[:score]
- catch e
- io = IOBuffer()
- showerror(io, e)
- errorMsg = String(take!(io))
- st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
- println("")
- println("Attempt $attempt. Error occurred: $errorMsg\n$st")
- println("")
end
+ if errorFlag
+ continue # skip to the next iteration
+ end
+
+ responsedict = GeneralUtils.textToDict(response,
+ header;
+ rightmarker=":", symbolkey=true, lowercasekey=true)
+
+ # check if dict has all required value
+ trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation]
+ answerevaluation_text::AbstractString = responsedict[:answer_evaluation]
+ # responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "") # remove (...) if there is any.
+ responsedict[:score] = responsedict[:score][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
+ responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
+ score::Integer = responsedict[:score]
+ accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
+ suggestion::AbstractString = responsedict[:suggestion]
+
+ if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
+ error("generated accepted_as_answer has wrong format")
+ end
+
+ # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
+ state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])"
+ state[:evaluationscore] = responsedict[:score]
+ state[:accepted_as_answer] = responsedict[:accepted_as_answer]
+ state[:suggestion] = responsedict[:suggestion]
+
+ # mark as terminal state when the answer is achieved
+ if accepted_as_answer == "Yes"
+
+ # mark the state as terminal state because the evaluation say so.
+ state[:isterminal] = true
+
+ # evaluation score as reward because different answers hold different value for the user.
+ state[:reward] = responsedict[:score]
+ end
+ println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__)
+ pprintln(Dict(responsedict))
+
+ return responsedict[:score]
end
error("evaluator failed to generate an evaluation")
end
-
"""
# Arguments
@@ -979,9 +1271,9 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
earlystop(state) = state[:reward] >= 8 ? true : false
_, _, resultState = LLMMCTS.runMCTS(initialstate, transition, transitionargs;
- horizontalSampleExpansionPhase=2,
- horizontalSampleSimulationPhase=1,
- maxSimulationDepth=3, maxiterations=2,
+ horizontalSampleExpansionPhase=5,
+ horizontalSampleSimulationPhase=2,
+ maxSimulationDepth=10, maxiterations=2,
explorationweight=1.0,
earlystop=earlystop,
saveSimulatedNode=true)
diff --git a/test/runtest.jl b/test/runtest.jl
index 4b1c6f3..f19a753 100644
--- a/test/runtest.jl
+++ b/test/runtest.jl
@@ -35,7 +35,7 @@ function text2textInstructLLM(prompt::String)
msgPurpose="inference",
senderName="yiemagent",
senderId=sessionId,
- receiverName="text2textinstruct_medium",
+ receiverName="text2textinstruct_small",
mqttBrokerAddress=config[:mqttServerInfo][:broker],
mqttBrokerPort=config[:mqttServerInfo][:port],
)
@@ -64,7 +64,7 @@ function getEmbedding(text::T) where {T<:AbstractString}
msgPurpose="embedding",
senderName="yiemagent",
senderId=sessionId,
- receiverName="text2textinstruct_medium",
+ receiverName="text2textinstruct_small",
mqttBrokerAddress=config[:mqttServerInfo][:broker],
mqttBrokerPort=config[:mqttServerInfo][:port],
)