From 4f1917e01ba098c52b1e00469a12d31a2d1665e2 Mon Sep 17 00:00:00 2001 From: tonaerospace Date: Sun, 9 Mar 2025 18:36:57 +0700 Subject: [PATCH] update --- src/interface.jl | 404 ++++++++++++++++++++++++++++++++++++++++------- test/runtest.jl | 4 +- 2 files changed, 350 insertions(+), 58 deletions(-) diff --git a/src/interface.jl b/src/interface.jl index 56baccc..0624a74 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -339,6 +339,286 @@ julia> # Signature """ +# function evaluator(state::T1, text2textInstructLLM::Function; +# insertSQLVectorDB::Union{Function, Nothing}=nothing +# ) where {T1<:AbstractDict} + +# # systemmsg = +# # """ +# # You are a helpful assistant that analyzes agent's trajectories to find solutions and observations (i.e., the results of actions) to answer the user's questions. + +# # Definitions: +# # "question" is the user's question. +# # "thought" is step-by-step reasoning about the current situation. +# # "plan" is what to do to complete the task from the current situation. +# # "action" is the taken action which can be one of the following functions: +# # 1) TABLEINFO[list_of_table_name], which you can use to get the data type of a table column. +# # 2) GETDATA[instruction], which you can use to get the data from the database. +# # 3) ANSWERBOX[answer], which returns your answer to the user. "answer" is your answer to the user question. +# # "observation" is result of the action in JSON format. + +# # At each round of conversation, the user will give you: +# # Context: ... +# # Trajectories: ... + +# # You should then respond to the user with: +# # - Original_question: Repeat the original question. +# # - Evaluation (you must evaluate all of the following points): +# # 1) Analyze the trajectories of a solution to answer the user's original question. +# # Given a question and a trajectory, evaluate its correctness and provide your reasoning and +# # analysis in detail. Focus on the latest thought, action, and observation. +# # Incomplete trajectories can be correct if the thoughts and actions so far are correct, +# # even if the answer is not found yet. Do not generate additional thoughts or actions. +# # 2) How the observation addresses the original question? +# # 3) Provide suggestion (if applicable). +# # - Score: Correctness score s where s is an integer from 0 to 10. +# # - Accepted_as_answer: Decide whether to accept the observation as the answer to the original question. +# # 1) The accepted observation should directly answer the question. +# # 2) The possible responses are either 'Yes' or 'No.' + +# # You should only respond in JSON format as described below: +# # {"original_question": ..., "evaluation": ..., "score": ..., "accepted_as_answer": ...} + +# # Here are correct trajectory examples: +# # user: +# # { +# # "question": "I'm looking for a sedan with an automatic driving feature.", +# # "thought_1": "I have many types of sedans in my inventory, each with diverse features.", +# # "thought_2": "I should check our inventory first to see if we have the one our customer wants.", +# # "action_1": {"name": "inventory", "input": "a sedan with an automatic driving feature"}, +# # "observation_1": "Yiem Model A, Conez Model B" +# # } +# # assistant: +# # { +# # "original_question": "the user is looking for a sedan with an automatic driving feature.", +# # "evaluation": "This trajectory is correct because it is logical to use the INVENTORY function to search for inventory based on the details provided in the question, which could lead to a potential answer. The user is asking whether do you have a sedan with an automatic driving feature and the observation provides a list of sedan models that you have. Thus, it is accepted as the answer.", +# # "score": 10, +# # "accepted_as_answer": "Yes" +# # } + +# # user: +# # { +# # "question": "How many cars that fitted with a stereo we have?", +# # "thought_1": "I have many types of car in my inventory, each with diverse features.", +# # "thought_3": "I should check our inventory.", +# # "action_1": {"name": "inventory", "input": "vehicle with a stereo"}, +# # "observation_1": "2015 Conez truck." +# # } +# # assistant: +# # { +# # "evaluation": “This approach is correct. It's reasonable to use the INVENTORY function to search for inventory. However, the query asked for a car but the observation was a truck. Thus it is not accepted as the answer. To improve, make sure to input the correct terms and match the requested criteria accurately.”, +# # "score": 5, +# # "accepted_as_answer": "No" +# # } + +# # Here are incorrect trajectory examples: +# # user: +# # { +# # "question": "I'm looking for a sedan with an automatic driving feature. Do you have it in stock?", +# # "thought_1": "I have many types of sedans in my inventory, each with diverse features.", +# # "thought_2": "I will use SEARCHINTERNET function to search for the car.", +# # "action_1": {"name": "SEARCHINTERNET", "input": "a sedan with an automatic driving feature.}, +# # "observation_1": "Teza Model A, Teza Model B" +# # } +# # assistant: +# # { +# # "evaluation": "This trajectory is incorrect. Using the SEARCHINTERNET function to search for a sedan in the Internet is illogical because the question asked for the cars available for sale at your dealership. To improve, ensure that you read the question clearly.", +# # "score": 0, +# # "accepted_as_answer": "No" +# # } + +# # Let's begin! +# # """ + +# # systemmsg = +# # """ +# # You are a helpful assistant that analyzes agent's trajectories to find solutions and observations (i.e., the results of actions) to answer the user's questions. + +# # Definitions: +# # "question" is the user's question. +# # "thought" is step-by-step reasoning about the current situation. +# # "plan" is what to do to complete the task from the current situation. +# # “action_name” is the name of the action taken, which can be one of the following functions: +# # 1) CHATBOX[text], which you can use to talk with the user. "text" is in verbal English. +# # 2) WINESTOCK[query], which you can use to find info about wine in your inventory. "query" is a search term in verbal English. The best query must includes "budget", "type of wine", "characteristics of wine" and "food pairing". +# # "action_input" is the input to the action +# # "observation" is result of the action. + +# # At each round of conversation, the user will give you: +# # Context: ... +# # Trajectories: ... + +# # You should then respond to the user with: +# # - original_question: Repeat the original question. +# # - evaluation (you must evaluate all of the following points in a single paragraph): +# # 1) Analyze the trajectories of a solution to answer the user's original question. +# # Given a question and a trajectory, evaluate its correctness and provide your reasoning and +# # analysis in detail. Focus on the latest thought, action, and observation. +# # Incomplete trajectories can be correct if the thoughts and actions so far are correct, +# # even if the answer is not found yet. Do not generate additional thoughts or actions. +# # 2) How the observation addresses the question exactly? +# # - accepted_as_answer: Decide whether to accept the observation as the answer to the original question. +# # 1) if the observation's content directly answers the question then just accept it as the answer. Oherwise, it is not. The possible responses are either 'Yes' or 'No.' +# # - score: Correctness score s where s is a single integer between 0 to 9. +# # 1) 0 means the trajectories are incorrect. +# # 2) 9 means the trajectories are correct, and the observation's content directly answers the question. +# # - suggestion: if accepted_as_answer is "No", provide suggestion. + +# # You should only respond in format as described below: +# # original_question: ... +# # evaluation: ... +# # accepted_as_answer: ... +# # score: ... +# # suggestion: ... + +# # Let's begin! +# # """ + +# systemmsg = +# """ +# You are a helpful assistant that analyzes agent's trajectory to find solutions and observations (i.e., the results of actions) to answer the user's questions. + +# Definitions: +# "question" is the user's question +# "understanding" is agent's understanding about the current situation +# "reasoning" is agent's step-by-step reasoning about the current situation +# "plan" is agent's plan to complete the task from the current situation +# "action_name" is the name of the action taken, which can be one of the following functions: +# - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database. +# For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator. +# Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'. +# "action_input" is the input to the action +# "observation" is result of the preceding immediate action + +# At each round of conversation, the user will give you: +# Trajectory: ... +# Error note: error note from your previous attempt + +# You must follow the following guidelines: +# - When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer. + +# You should then respond to the user with: +# 1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question. +# - Evaluate the correctness of each section and the overall trajectory based on the given question. +# - Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation. +# - Incomplete trajectory are acceptable if the thoughts and actions up to that point are correct, even if the final answer isn't reached. +# - Do not generate additional thoughts or actions. +# 2) Answer_evaluation: +# - Focus only on the matter mentioned in the question and comprehensively analyze how the latest observation's details addresses the question +# - State your rationale +# 3) Accepted_as_answer: Decide whether the latest observation's content answers the question. Can be "Yes" or "No" +# Bad example (The observation didn't answers the question): +# question: Find cars with 4 wheels. +# observation: There are an apple in the table. +# Good example (The observation answers the question): +# question: Find cars with a stereo. +# observation: There are 1 cars in the table. 1) brand: Toyota, model: yaris, color: black. +# 4) Score: Correctness score s where s is a single integer between 0 to 9. +# Score guideline: +# - 0 indicates that both the trajectory is incorrect, failed or errors and the observation is incorrect or failed +# - 4 indicates that the trajectory are correct but the observation is incorrect or failed +# - 8 indicates that both the trajectory are correct, and the observation's content directly answers the question. +# - 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations. +# 5) Suggestion: if accepted_as_answer is "No", provide suggestion. + +# You should only respond in format as described below: +# Trajectory_evaluation: ... +# Answer_evaluation: ... +# Accepted_as_answer: ... +# Score: ... +# Suggestion: ... + +# Let's begin! +# """ + +# thoughthistory = "" +# for (k, v) in state[:thoughtHistory] +# thoughthistory *= "$k: $v\n" +# end + +# errornote = "" + +# for attempt in 1:5 +# usermsg = +# """ +# Trajectory: $thoughthistory +# Error note: $errornote +# """ + +# _prompt = +# [ +# Dict(:name=> "system", :text=> systemmsg), +# Dict(:name=> "user", :text=> usermsg) +# ] + +# # put in model format +# prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct") +# prompt *= +# """ +# <|start_header_id|>assistant<|end_header_id|> +# """ + +# header = ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"] + +# try +# response = text2textInstructLLM(prompt) +# # make sure every header is in the response +# for i in header +# detected = GeneralUtils.detect_keyword(i, response) +# if detected === nothing +# error("Keyword $i not found in response") +# end +# end + +# responsedict = GeneralUtils.textToDict(response, +# header; +# rightmarker=":", symbolkey=true, lowercasekey=true) + +# # check if dict has all required value +# trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation] +# answerevaluation_text::AbstractString = responsedict[:answer_evaluation] +# # responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "") # remove (...) if there is any. +# responsedict[:score] = responsedict[:score][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number. +# responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5 +# score::Integer = responsedict[:score] +# accepted_as_answer::AbstractString = responsedict[:accepted_as_answer] +# suggestion::AbstractString = responsedict[:suggestion] + +# if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt +# error("generated accepted_as_answer has wrong format") +# end + +# # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension) +# state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])" +# state[:evaluationscore] = responsedict[:score] +# state[:accepted_as_answer] = responsedict[:accepted_as_answer] +# state[:suggestion] = responsedict[:suggestion] + +# # mark as terminal state when the answer is achieved +# if accepted_as_answer == "Yes" + +# # mark the state as terminal state because the evaluation say so. +# state[:isterminal] = true + +# # evaluation score as reward because different answers hold different value for the user. +# state[:reward] = responsedict[:score] +# end +# println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__) +# pprintln(Dict(responsedict)) + +# return responsedict[:score] +# catch e +# io = IOBuffer() +# showerror(io, e) +# errorMsg = String(take!(io)) +# st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace())) +# println("") +# println("Attempt $attempt. Error occurred: $errorMsg\n$st") +# println("") +# end +# end +# error("evaluator failed to generate an evaluation") +# end function evaluator(state::T1, text2textInstructLLM::Function; insertSQLVectorDB::Union{Function, Nothing}=nothing ) where {T1<:AbstractDict} @@ -490,14 +770,16 @@ function evaluator(state::T1, text2textInstructLLM::Function; "action_input" is the input to the action "observation" is result of the preceding immediate action - At each round of conversation, the user will give you: - Context: ... + Trajectory: ... + Error note: error note from your previous attempt + - You must follow the following guidelines: + - When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer. - - You should then respond to the user with: + + + 1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question. - Evaluate the correctness of each section and the overall trajectory based on the given question. - Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation. @@ -520,13 +802,15 @@ function evaluator(state::T1, text2textInstructLLM::Function; - 8 indicates that both the trajectory are correct, and the observation's content directly answers the question. - 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations. 5) Suggestion: if accepted_as_answer is "No", provide suggestion. + - You should only respond in format as described below: + Trajectory_evaluation: ... Answer_evaluation: ... Accepted_as_answer: ... Score: ... Suggestion: ... + Let's begin! """ @@ -536,10 +820,15 @@ function evaluator(state::T1, text2textInstructLLM::Function; thoughthistory *= "$k: $v\n" end - for attempt in 1:5 + errornote = "" + + for attempt in 1:10 + errorFlag = false + usermsg = """ Trajectory: $thoughthistory + Error note: $errornote """ _prompt = @@ -555,59 +844,62 @@ function evaluator(state::T1, text2textInstructLLM::Function; <|start_header_id|>assistant<|end_header_id|> """ - try - response = text2textInstructLLM(prompt) - responsedict = GeneralUtils.textToDict(response, - ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"]; - rightmarker=":", symbolkey=true, lowercasekey=true) + header = ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"] - # check if dict has all required value - trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation] - answerevaluation_text::AbstractString = responsedict[:answer_evaluation] - # responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "") # remove (...) if there is any. - responsedict[:score] = responsedict[:score][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number. - responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5 - score::Integer = responsedict[:score] - accepted_as_answer::AbstractString = responsedict[:accepted_as_answer] - suggestion::AbstractString = responsedict[:suggestion] - - if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt - error("generated accepted_as_answer has wrong format") + response = text2textInstructLLM(prompt) + # make sure every header is in the response + for i in header + detected = GeneralUtils.detect_keyword(i, response) + if detected === nothing + errornote = "Keyword $i not found in response" + errorFlag = true end - - # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension) - state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])" - state[:evaluationscore] = responsedict[:score] - state[:accepted_as_answer] = responsedict[:accepted_as_answer] - state[:suggestion] = responsedict[:suggestion] - - # mark as terminal state when the answer is achieved - if accepted_as_answer == "Yes" - - # mark the state as terminal state because the evaluation say so. - state[:isterminal] = true - - # evaluation score as reward because different answers hold different value for the user. - state[:reward] = responsedict[:score] - end - println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__) - pprintln(Dict(responsedict)) - - return responsedict[:score] - catch e - io = IOBuffer() - showerror(io, e) - errorMsg = String(take!(io)) - st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace())) - println("") - println("Attempt $attempt. Error occurred: $errorMsg\n$st") - println("") end + if errorFlag + continue # skip to the next iteration + end + + responsedict = GeneralUtils.textToDict(response, + header; + rightmarker=":", symbolkey=true, lowercasekey=true) + + # check if dict has all required value + trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation] + answerevaluation_text::AbstractString = responsedict[:answer_evaluation] + # responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "") # remove (...) if there is any. + responsedict[:score] = responsedict[:score][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number. + responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5 + score::Integer = responsedict[:score] + accepted_as_answer::AbstractString = responsedict[:accepted_as_answer] + suggestion::AbstractString = responsedict[:suggestion] + + if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt + error("generated accepted_as_answer has wrong format") + end + + # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension) + state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])" + state[:evaluationscore] = responsedict[:score] + state[:accepted_as_answer] = responsedict[:accepted_as_answer] + state[:suggestion] = responsedict[:suggestion] + + # mark as terminal state when the answer is achieved + if accepted_as_answer == "Yes" + + # mark the state as terminal state because the evaluation say so. + state[:isterminal] = true + + # evaluation score as reward because different answers hold different value for the user. + state[:reward] = responsedict[:score] + end + println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__) + pprintln(Dict(responsedict)) + + return responsedict[:score] end error("evaluator failed to generate an evaluation") end - """ # Arguments @@ -979,9 +1271,9 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function; earlystop(state) = state[:reward] >= 8 ? true : false _, _, resultState = LLMMCTS.runMCTS(initialstate, transition, transitionargs; - horizontalSampleExpansionPhase=2, - horizontalSampleSimulationPhase=1, - maxSimulationDepth=3, maxiterations=2, + horizontalSampleExpansionPhase=5, + horizontalSampleSimulationPhase=2, + maxSimulationDepth=10, maxiterations=2, explorationweight=1.0, earlystop=earlystop, saveSimulatedNode=true) diff --git a/test/runtest.jl b/test/runtest.jl index 4b1c6f3..f19a753 100644 --- a/test/runtest.jl +++ b/test/runtest.jl @@ -35,7 +35,7 @@ function text2textInstructLLM(prompt::String) msgPurpose="inference", senderName="yiemagent", senderId=sessionId, - receiverName="text2textinstruct_medium", + receiverName="text2textinstruct_small", mqttBrokerAddress=config[:mqttServerInfo][:broker], mqttBrokerPort=config[:mqttServerInfo][:port], ) @@ -64,7 +64,7 @@ function getEmbedding(text::T) where {T<:AbstractString} msgPurpose="embedding", senderName="yiemagent", senderId=sessionId, - receiverName="text2textinstruct_medium", + receiverName="text2textinstruct_small", mqttBrokerAddress=config[:mqttServerInfo][:broker], mqttBrokerPort=config[:mqttServerInfo][:port], )