update

2025-03-14 12:32:09 +07:00
parent a22f9c52d2
commit 200a1d3e23
3 changed files with 71 additions and 500 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -233,6 +233,10 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
        response
      end

+    # sometime LLM output something like **Comprehension**: which is not expected
+    response = replace(response, "**"=>"")
+    response = replace(response, "***"=>"")
+
    # some time LLM output Plan_1: so we need to detect and replace topic numbering
    regex = r"_[0-1000]+:"
    matches = collect(eachmatch(regex, response))
@@ -250,9 +254,9 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
    dictkey = ["comprehension", "plan", "action_name", "action_input"]

    # detect if there are more than 1 key per categories
-    count = GeneralUtils.countGivenWords(response, header)
+    wordcount = GeneralUtils.countGivenWords(response, header)
    duplicateKeywordFlag = false
-    for (i, v) in enumerate(count)
+    for (i, v) in enumerate(wordcount)
      keyword = header[i]
      keywordNumber = v
      if keywordNumber > 1
@@ -355,420 +359,8 @@ julia>

 # Signature
 """
-# function evaluator(state::T1, text2textInstructLLM::Function;
-#   insertSQLVectorDB::Union{Function, Nothing}=nothing
-#   ) where {T1<:AbstractDict}
-  
-#   # systemmsg = 
-#   # """
-#   # You are a helpful assistant that analyzes agent's trajectories to find solutions and observations (i.e., the results of actions) to answer the user's questions.
-
-#   # Definitions:
-#   # "question" is the user's question.
-#   # "thought" is step-by-step reasoning about the current situation.
-#   # "plan" is what to do to complete the task from the current situation.
-#   # "action" is the taken action which can be one of the following functions:
-#   #   1) TABLEINFO[list_of_table_name], which you can use to get the data type of a table column.
-#   #   2) GETDATA[instruction], which you can use to get the data from the database.
-#   #   3) ANSWERBOX[answer], which returns your answer to the user. "answer" is your answer to the user question.
-#   # "observation" is result of the action in JSON format.
-
-#   # At each round of conversation, the user will give you:
-#   # Context: ...
-#   # Trajectories: ...
-    
-#   # You should then respond to the user with:
-#   # - Original_question: Repeat the original question.
-#   # - Evaluation (you must evaluate all of the following points):
-#   #     1) Analyze the trajectories of a solution to answer the user's original question. 
-#   #       Given a question and a trajectory, evaluate its correctness and provide your reasoning and
-#   #       analysis in detail. Focus on the latest thought, action, and observation. 
-#   #       Incomplete trajectories can be correct if the thoughts and actions so far are correct, 
-#   #       even if the answer is not found yet. Do not generate additional thoughts or actions.
-#   #     2) How the observation addresses the original question?
-#   #     3) Provide suggestion (if applicable).
-#   # - Score: Correctness score s where s is an integer from 0 to 10.
-#   # - Accepted_as_answer: Decide whether to accept the observation as the answer to the original question.
-#   #     1) The accepted observation should directly answer the question.
-#   #     2) The possible responses are either 'Yes' or 'No.' 
-
-#   # You should only respond in JSON format as described below:
-#   #   {"original_question": ..., "evaluation": ..., "score": ..., "accepted_as_answer": ...}
-
-#   # Here are correct trajectory examples:
-#   # user:
-#   # {
-#   #   "question": "I'm looking for a sedan with an automatic driving feature.",
-#   #   "thought_1": "I have many types of sedans in my inventory, each with diverse features.",
-#   #   "thought_2": "I should check our inventory first to see if we have the one our customer wants.", 
-#   #   "action_1": {"name": "inventory", "input": "a sedan with an automatic driving feature"},
-#   #   "observation_1": "Yiem Model A, Conez Model B"
-#   # }
-#   # assistant:
-#   # {
-#   #   "original_question": "the user is looking for a sedan with an automatic driving feature.",
-#   #   "evaluation": "This trajectory is correct because it is logical to use the INVENTORY function to search for inventory based on the details provided in the question, which could lead to a potential answer. The user is asking whether do you have a sedan with an automatic driving feature and the observation provides a list of sedan models that you have. Thus, it is accepted as the answer.",
-#   #   "score": 10,
-#   #   "accepted_as_answer": "Yes"
-#   # }
-
-#   # user:
-#   # {
-#   #   "question": "How many cars that fitted with a stereo we have?",
-#   #   "thought_1": "I have many types of car in my inventory, each with diverse features.",
-#   #   "thought_3": "I should check our inventory.", 
-#   #   "action_1": {"name": "inventory", "input": "vehicle with a stereo"},
-#   #   "observation_1": "2015 Conez truck."
-#   # }
-#   # assistant:
-#   # {
-#   #   "evaluation": “This approach is correct. It's reasonable to use the INVENTORY function to search for inventory. However, the query asked for a car but the observation was a truck. Thus it is not accepted as the answer. To improve, make sure to input the correct terms and match the requested criteria accurately.”,
-#   #   "score": 5,
-#   #   "accepted_as_answer": "No"
-#   # }
-
-#   # Here are incorrect trajectory examples:
-#   # user:
-#   # {
-#   #   "question": "I'm looking for a sedan with an automatic driving feature. Do you have it in stock?",
-#   #   "thought_1": "I have many types of sedans in my inventory, each with diverse features.",
-#   #   "thought_2": "I will use SEARCHINTERNET function to search for the car.", 
-#   #   "action_1": {"name": "SEARCHINTERNET", "input": "a sedan with an automatic driving feature.},
-#   #   "observation_1": "Teza Model A, Teza Model B"
-#   # }
-#   # assistant:
-#   # {
-#   #   "evaluation": "This trajectory is incorrect. Using the SEARCHINTERNET function to search for a sedan in the Internet is illogical because the question asked for the cars available for sale at your dealership. To improve, ensure that you read the question clearly.",
-#   #   "score": 0,
-#   #   "accepted_as_answer": "No"
-#   # }
-
-#   # Let's begin!
-#   # """
-  
-#   # systemmsg = 
-#   # """
-#   # You are a helpful assistant that analyzes agent's trajectories to find solutions and observations (i.e., the results of actions) to answer the user's questions.
-
-#   # Definitions:
-#   # "question" is the user's question.
-#   # "thought" is step-by-step reasoning about the current situation.
-#   # "plan" is what to do to complete the task from the current situation.
-#   # “action_name” is the name of the action taken, which can be one of the following functions:
-#   #     1) CHATBOX[text], which you can use to talk with the user. "text" is in verbal English. 
-#   #     2) WINESTOCK[query], which you can use to find info about wine in your inventory. "query" is a search term in verbal English. The best query must includes "budget", "type of wine", "characteristics of wine" and "food pairing".
-#   # "action_input" is the input to the action
-#   # "observation" is result of the action.
-
-#   # At each round of conversation, the user will give you:
-#   # Context: ...
-#   # Trajectories: ...
-    
-#   # You should then respond to the user with:
-#   # - original_question: Repeat the original question.
-#   # - evaluation (you must evaluate all of the following points in a single paragraph):
-#   #     1) Analyze the trajectories of a solution to answer the user's original question. 
-#   #       Given a question and a trajectory, evaluate its correctness and provide your reasoning and
-#   #       analysis in detail. Focus on the latest thought, action, and observation. 
-#   #       Incomplete trajectories can be correct if the thoughts and actions so far are correct, 
-#   #       even if the answer is not found yet. Do not generate additional thoughts or actions.
-#   #     2) How the observation addresses the question exactly?
-#   # - accepted_as_answer: Decide whether to accept the observation as the answer to the original question.
-#   #     1) if the observation's content directly answers the question then just accept it as the answer. Oherwise, it is not. The possible responses are either 'Yes' or 'No.'
-#   # - score: Correctness score s where s is a single integer between 0 to 9. 
-#   #     1) 0 means the trajectories are incorrect.
-#   #     2) 9 means the trajectories are correct, and the observation's content directly answers the question.
-#   # - suggestion: if accepted_as_answer is "No", provide suggestion.
-
-#   # You should only respond in format as described below:
-#   # original_question: ...
-#   # evaluation: ...
-#   # accepted_as_answer: ...
-#   # score: ...
-#   # suggestion: ...
-
-#   # Let's begin!
-#   # """
-
-#   systemmsg = 
-#   """
-#   You are a helpful assistant that analyzes agent's trajectory to find solutions and observations (i.e., the results of actions) to answer the user's questions.
-
-#   Definitions:
-#   "question" is the user's question
-#   "understanding" is agent's understanding about the current situation
-#   "reasoning" is agent's step-by-step reasoning about the current situation
-#   "plan" is agent's plan to complete the task from the current situation
-#   "action_name" is the name of the action taken, which can be one of the following functions:
-#       - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
-#           For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
-#           Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
-#   "action_input" is the input to the action
-#   "observation" is result of the preceding immediate action
-
-#   At each round of conversation, the user will give you:
-#   Trajectory: ...
-#   Error note: error note from your previous attempt
-
-#   You must follow the following guidelines:
-#   - When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer.
-    
-#   You should then respond to the user with:
-#   1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question.
-#     - Evaluate the correctness of each section and the overall trajectory based on the given question.
-#     - Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation.
-#     - Incomplete trajectory are acceptable if the thoughts and actions up to that point are correct, even if the final answer isn't reached.
-#     - Do not generate additional thoughts or actions.
-#   2) Answer_evaluation: 
-#     - Focus only on the matter mentioned in the question and comprehensively analyze how the latest observation's details addresses the question
-#     - State your rationale
-#   3) Accepted_as_answer: Decide whether the latest observation's content answers the question. Can be "Yes" or "No"
-#     Bad example (The observation didn't answers the question):
-#       question: Find cars with 4 wheels.
-#       observation: There are an apple in the table.
-#     Good example (The observation answers the question): 
-#       question: Find cars with a stereo.
-#       observation: There are 1 cars in the table. 1) brand: Toyota, model: yaris, color: black.
-#   4) Score: Correctness score s where s is a single integer between 0 to 9. 
-#       Score guideline:
-#       - 0 indicates that both the trajectory is incorrect, failed or errors and the observation is incorrect or failed
-#       - 4 indicates that the trajectory are correct but the observation is incorrect or failed
-#       - 8 indicates that both the trajectory are correct, and the observation's content directly answers the question.
-#       - 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations.
-#   5) Suggestion: if accepted_as_answer is "No", provide suggestion.
-
-#   You should only respond in format as described below:
-#   Trajectory_evaluation: ...
-#   Answer_evaluation: ...
-#   Accepted_as_answer: ...
-#   Score: ...
-#   Suggestion: ...
-
-#   Let's begin!
-#   """
-
-#   thoughthistory = ""
-#   for (k, v) in state[:thoughtHistory]
-#     thoughthistory *= "$k: $v\n" 
-#   end
-
-#   errornote = ""
-
-#   for attempt in 1:5
-#     usermsg =
-#     """
-#     Trajectory: $thoughthistory
-#     Error note: $errornote
-#     """
-
-#     _prompt = 
-#     [
-#       Dict(:name=> "system", :text=> systemmsg),
-#       Dict(:name=> "user", :text=> usermsg)
-#     ]
-
-#     # put in model format
-#     prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
-#     prompt *=
-#       """
-#       <|start_header_id|>assistant<|end_header_id|>
-#       """
-
-#     header = ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"]
-
-#     try
-#       response = text2textInstructLLM(prompt)
-#       # make sure every header is in the response
-#       for i in header
-#         detected = GeneralUtils.detect_keyword(i, response)
-#         if detected === nothing
-#           error("Keyword $i not found in response")
-#         end
-#       end
-
-#       responsedict = GeneralUtils.textToDict(response,
-#         header; 
-#         rightmarker=":", symbolkey=true, lowercasekey=true)
-
-#       # check if dict has all required value
-#       trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation]
-#       answerevaluation_text::AbstractString = responsedict[:answer_evaluation]
-#       # responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "")  # remove (...) if there is any.
-#       responsedict[:score] = responsedict[:score][1]  # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
-#       responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
-#       score::Integer = responsedict[:score]
-#       accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
-#       suggestion::AbstractString = responsedict[:suggestion]
-
-#       if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
-#         error("generated accepted_as_answer has wrong format")
-#       end
-
-#       # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
-#       state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])" 
-#       state[:evaluationscore] = responsedict[:score] 
-#       state[:accepted_as_answer] = responsedict[:accepted_as_answer]
-#       state[:suggestion] = responsedict[:suggestion]
-
-#       # mark as terminal state when the answer is achieved
-#       if accepted_as_answer == "Yes"
-
-#         # mark the state as terminal state because the evaluation say so.
-#         state[:isterminal] = true
-
-#         # evaluation score as reward because different answers hold different value for the user.
-#         state[:reward] = responsedict[:score]
-#       end
-#       println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
-#       pprintln(Dict(responsedict))
-
-#       return responsedict[:score]
-#     catch e
-#       io = IOBuffer()
-#       showerror(io, e)
-#       errorMsg = String(take!(io))
-#       st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
-#       println("")
-#       println("Attempt $attempt. Error occurred: $errorMsg\n$st")
-#       println("")
-#     end
-#   end
-#   error("evaluator failed to generate an evaluation")
-# end
-function evaluator(state::T1, text2textInstructLLM::Function;
-  insertSQLVectorDB::Union{Function, Nothing}=nothing
+function evaluator(state::T1, text2textInstructLLM::Function
  ) where {T1<:AbstractDict}
-  
-  # systemmsg = 
-  # """
-  # You are a helpful assistant that analyzes agent's trajectories to find solutions and observations (i.e., the results of actions) to answer the user's questions.
-
-  # Definitions:
-  # "question" is the user's question.
-  # "thought" is step-by-step reasoning about the current situation.
-  # "plan" is what to do to complete the task from the current situation.
-  # "action" is the taken action which can be one of the following functions:
-  #   1) TABLEINFO[list_of_table_name], which you can use to get the data type of a table column.
-  #   2) GETDATA[instruction], which you can use to get the data from the database.
-  #   3) ANSWERBOX[answer], which returns your answer to the user. "answer" is your answer to the user question.
-  # "observation" is result of the action in JSON format.
-
-  # At each round of conversation, the user will give you:
-  # Context: ...
-  # Trajectories: ...
-    
-  # You should then respond to the user with:
-  # - Original_question: Repeat the original question.
-  # - Evaluation (you must evaluate all of the following points):
-  #     1) Analyze the trajectories of a solution to answer the user's original question. 
-  #       Given a question and a trajectory, evaluate its correctness and provide your reasoning and
-  #       analysis in detail. Focus on the latest thought, action, and observation. 
-  #       Incomplete trajectories can be correct if the thoughts and actions so far are correct, 
-  #       even if the answer is not found yet. Do not generate additional thoughts or actions.
-  #     2) How the observation addresses the original question?
-  #     3) Provide suggestion (if applicable).
-  # - Score: Correctness score s where s is an integer from 0 to 10.
-  # - Accepted_as_answer: Decide whether to accept the observation as the answer to the original question.
-  #     1) The accepted observation should directly answer the question.
-  #     2) The possible responses are either 'Yes' or 'No.' 
-
-  # You should only respond in JSON format as described below:
-  #   {"original_question": ..., "evaluation": ..., "score": ..., "accepted_as_answer": ...}
-
-  # Here are correct trajectory examples:
-  # user:
-  # {
-  #   "question": "I'm looking for a sedan with an automatic driving feature.",
-  #   "thought_1": "I have many types of sedans in my inventory, each with diverse features.",
-  #   "thought_2": "I should check our inventory first to see if we have the one our customer wants.", 
-  #   "action_1": {"name": "inventory", "input": "a sedan with an automatic driving feature"},
-  #   "observation_1": "Yiem Model A, Conez Model B"
-  # }
-  # assistant:
-  # {
-  #   "original_question": "the user is looking for a sedan with an automatic driving feature.",
-  #   "evaluation": "This trajectory is correct because it is logical to use the INVENTORY function to search for inventory based on the details provided in the question, which could lead to a potential answer. The user is asking whether do you have a sedan with an automatic driving feature and the observation provides a list of sedan models that you have. Thus, it is accepted as the answer.",
-  #   "score": 10,
-  #   "accepted_as_answer": "Yes"
-  # }
-
-  # user:
-  # {
-  #   "question": "How many cars that fitted with a stereo we have?",
-  #   "thought_1": "I have many types of car in my inventory, each with diverse features.",
-  #   "thought_3": "I should check our inventory.", 
-  #   "action_1": {"name": "inventory", "input": "vehicle with a stereo"},
-  #   "observation_1": "2015 Conez truck."
-  # }
-  # assistant:
-  # {
-  #   "evaluation": “This approach is correct. It's reasonable to use the INVENTORY function to search for inventory. However, the query asked for a car but the observation was a truck. Thus it is not accepted as the answer. To improve, make sure to input the correct terms and match the requested criteria accurately.”,
-  #   "score": 5,
-  #   "accepted_as_answer": "No"
-  # }
-
-  # Here are incorrect trajectory examples:
-  # user:
-  # {
-  #   "question": "I'm looking for a sedan with an automatic driving feature. Do you have it in stock?",
-  #   "thought_1": "I have many types of sedans in my inventory, each with diverse features.",
-  #   "thought_2": "I will use SEARCHINTERNET function to search for the car.", 
-  #   "action_1": {"name": "SEARCHINTERNET", "input": "a sedan with an automatic driving feature.},
-  #   "observation_1": "Teza Model A, Teza Model B"
-  # }
-  # assistant:
-  # {
-  #   "evaluation": "This trajectory is incorrect. Using the SEARCHINTERNET function to search for a sedan in the Internet is illogical because the question asked for the cars available for sale at your dealership. To improve, ensure that you read the question clearly.",
-  #   "score": 0,
-  #   "accepted_as_answer": "No"
-  # }
-
-  # Let's begin!
-  # """
-  
-  # systemmsg = 
-  # """
-  # You are a helpful assistant that analyzes agent's trajectories to find solutions and observations (i.e., the results of actions) to answer the user's questions.
-
-  # Definitions:
-  # "question" is the user's question.
-  # "thought" is step-by-step reasoning about the current situation.
-  # "plan" is what to do to complete the task from the current situation.
-  # “action_name” is the name of the action taken, which can be one of the following functions:
-  #     1) CHATBOX[text], which you can use to talk with the user. "text" is in verbal English. 
-  #     2) WINESTOCK[query], which you can use to find info about wine in your inventory. "query" is a search term in verbal English. The best query must includes "budget", "type of wine", "characteristics of wine" and "food pairing".
-  # "action_input" is the input to the action
-  # "observation" is result of the action.
-
-  # At each round of conversation, the user will give you:
-  # Context: ...
-  # Trajectories: ...
-    
-  # You should then respond to the user with:
-  # - original_question: Repeat the original question.
-  # - evaluation (you must evaluate all of the following points in a single paragraph):
-  #     1) Analyze the trajectories of a solution to answer the user's original question. 
-  #       Given a question and a trajectory, evaluate its correctness and provide your reasoning and
-  #       analysis in detail. Focus on the latest thought, action, and observation. 
-  #       Incomplete trajectories can be correct if the thoughts and actions so far are correct, 
-  #       even if the answer is not found yet. Do not generate additional thoughts or actions.
-  #     2) How the observation addresses the question exactly?
-  # - accepted_as_answer: Decide whether to accept the observation as the answer to the original question.
-  #     1) if the observation's content directly answers the question then just accept it as the answer. Oherwise, it is not. The possible responses are either 'Yes' or 'No.'
-  # - score: Correctness score s where s is a single integer between 0 to 9. 
-  #     1) 0 means the trajectories are incorrect.
-  #     2) 9 means the trajectories are correct, and the observation's content directly answers the question.
-  # - suggestion: if accepted_as_answer is "No", provide suggestion.
-
-  # You should only respond in format as described below:
-  # original_question: ...
-  # evaluation: ...
-  # accepted_as_answer: ...
-  # score: ...
-  # suggestion: ...
-
-  # Let's begin!
-  # """

  systemmsg = 
  """
@@ -861,6 +453,10 @@ function evaluator(state::T1, text2textInstructLLM::Function;

    response = text2textInstructLLM(prompt)

+    # sometime LLM output something like **Comprehension**: which is not expected
+    response = replace(response, "**"=>"")
+    response = replace(response, "***"=>"")
+
    # make sure every header is in the response
    for i in header
      detected = GeneralUtils.detect_keyword(i, response)
@@ -1127,8 +723,7 @@ function transition(state::T, args::NamedTuple
    elseif thoughtDict[:action_name] == "GETDATA"
      response = SQLexecution(executeSQL, thoughtDict[:action_input])
      if response[:success]
-        # intention = Dict(:intention=> "$(thoughtDict[:plan])")
-        extracted = extractContent_dataframe(response[:result], text2textInstructLLM)
+        extracted = extractContent_dataframe(response[:result], text2textInstructLLM, thoughtDict[:action_input])
        (rawresponse=response[:result], result=extracted, errormsg=nothing, success=true)
      else
        (result=nothing, errormsg=response[:errormsg], success=false)
@@ -1144,8 +739,7 @@ function transition(state::T, args::NamedTuple
  reward::Integer = haskey(response, :reward) ? response[:reward] : 0
  isterminal::Bool = haskey(response, :isterminal) ? response[:isterminal] : false
  newNodeKey, newstate = makeNewState(state, thoughtDict, rawresponse, JSON3.write(result), select, reward, isterminal)
-  progressvalue::Integer = evaluatorF(newstate, text2textInstructLLM; 
-                                    insertSQLVectorDB=insertSQLVectorDB)
+  progressvalue::Integer = evaluatorF(newstate, text2textInstructLLM)

  return (newNodeKey=newNodeKey, newstate=newstate, progressvalue=progressvalue)
 end
@@ -1239,7 +833,7 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
    response = SQLexecution(executeSQL, sql)
    if response[:success]
      # intention = Dict(:intention=> "$(thoughtDict[:plan])")
-      extracted = extractContent_dataframe(response[:result], text2textInstructLLM)
+      extracted = extractContent_dataframe(response[:result], text2textInstructLLM, sql)
      return (text=extracted, rawresponse=response[:result]) 
    end
  end
@@ -1278,13 +872,14 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
  earlystop(state) = state[:reward] >= 8 ? true : false

  _, _, resultState = LLMMCTS.runMCTS(initialstate, transition, transitionargs;
-                      horizontalSampleExpansionPhase=3, 
+                      horizontalSampleExpansionPhase=2, 
                      horizontalSampleSimulationPhase=2,
                      maxSimulationDepth=10, 
                      maxiterations=1, 
                      explorationweight=1.0,
                      earlystop=earlystop,
-                      saveSimulatedNode=true)
+                      saveSimulatedNode=true,
+                      multithread=true)
  latestKey, latestInd = GeneralUtils.findHighestIndexKey(resultState[:thoughtHistory], "observation")
  action_input = Symbol("action_input_$latestInd")  # latest sql
  sql = resultState[:thoughtHistory][action_input]