update

2025-03-09 18:36:57 +07:00
parent 7dd6b56e4c
commit 4f1917e01b
2 changed files with 350 additions and 58 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -339,6 +339,286 @@ julia>
 # Signature
 """
 # function evaluator(state::T1, text2textInstructLLM::Function;
 #   insertSQLVectorDB::Union{Function, Nothing}=nothing
 #   ) where {T1<:AbstractDict}
 #   # systemmsg = 
 #   # """
 #   # You are a helpful assistant that analyzes agent's trajectories to find solutions and observations (i.e., the results of actions) to answer the user's questions.
 #   # Definitions:
 #   # "question" is the user's question.
 #   # "thought" is step-by-step reasoning about the current situation.
 #   # "plan" is what to do to complete the task from the current situation.
 #   # "action" is the taken action which can be one of the following functions:
 #   #   1) TABLEINFO[list_of_table_name], which you can use to get the data type of a table column.
 #   #   2) GETDATA[instruction], which you can use to get the data from the database.
 #   #   3) ANSWERBOX[answer], which returns your answer to the user. "answer" is your answer to the user question.
 #   # "observation" is result of the action in JSON format.
 #   # At each round of conversation, the user will give you:
 #   # Context: ...
 #   # Trajectories: ...
 #   # You should then respond to the user with:
 #   # - Original_question: Repeat the original question.
 #   # - Evaluation (you must evaluate all of the following points):
 #   #     1) Analyze the trajectories of a solution to answer the user's original question. 
 #   #       Given a question and a trajectory, evaluate its correctness and provide your reasoning and
 #   #       analysis in detail. Focus on the latest thought, action, and observation. 
 #   #       Incomplete trajectories can be correct if the thoughts and actions so far are correct, 
 #   #       even if the answer is not found yet. Do not generate additional thoughts or actions.
 #   #     2) How the observation addresses the original question?
 #   #     3) Provide suggestion (if applicable).
 #   # - Score: Correctness score s where s is an integer from 0 to 10.
 #   # - Accepted_as_answer: Decide whether to accept the observation as the answer to the original question.
 #   #     1) The accepted observation should directly answer the question.
 #   #     2) The possible responses are either 'Yes' or 'No.' 
 #   # You should only respond in JSON format as described below:
 #   #   {"original_question": ..., "evaluation": ..., "score": ..., "accepted_as_answer": ...}
 #   # Here are correct trajectory examples:
 #   # user:
 #   # {
 #   #   "question": "I'm looking for a sedan with an automatic driving feature.",
 #   #   "thought_1": "I have many types of sedans in my inventory, each with diverse features.",
 #   #   "thought_2": "I should check our inventory first to see if we have the one our customer wants.", 
 #   #   "action_1": {"name": "inventory", "input": "a sedan with an automatic driving feature"},
 #   #   "observation_1": "Yiem Model A, Conez Model B"
 #   # }
 #   # assistant:
 #   # {
 #   #   "original_question": "the user is looking for a sedan with an automatic driving feature.",
 #   #   "evaluation": "This trajectory is correct because it is logical to use the INVENTORY function to search for inventory based on the details provided in the question, which could lead to a potential answer. The user is asking whether do you have a sedan with an automatic driving feature and the observation provides a list of sedan models that you have. Thus, it is accepted as the answer.",
 #   #   "score": 10,
 #   #   "accepted_as_answer": "Yes"
 #   # }
 #   # user:
 #   # {
 #   #   "question": "How many cars that fitted with a stereo we have?",
 #   #   "thought_1": "I have many types of car in my inventory, each with diverse features.",
 #   #   "thought_3": "I should check our inventory.", 
 #   #   "action_1": {"name": "inventory", "input": "vehicle with a stereo"},
 #   #   "observation_1": "2015 Conez truck."
 #   # }
 #   # assistant:
 #   # {
 #   #   "evaluation": “This approach is correct. It's reasonable to use the INVENTORY function to search for inventory. However, the query asked for a car but the observation was a truck. Thus it is not accepted as the answer. To improve, make sure to input the correct terms and match the requested criteria accurately.”,
 #   #   "score": 5,
 #   #   "accepted_as_answer": "No"
 #   # }
 #   # Here are incorrect trajectory examples:
 #   # user:
 #   # {
 #   #   "question": "I'm looking for a sedan with an automatic driving feature. Do you have it in stock?",
 #   #   "thought_1": "I have many types of sedans in my inventory, each with diverse features.",
 #   #   "thought_2": "I will use SEARCHINTERNET function to search for the car.", 
 #   #   "action_1": {"name": "SEARCHINTERNET", "input": "a sedan with an automatic driving feature.},
 #   #   "observation_1": "Teza Model A, Teza Model B"
 #   # }
 #   # assistant:
 #   # {
 #   #   "evaluation": "This trajectory is incorrect. Using the SEARCHINTERNET function to search for a sedan in the Internet is illogical because the question asked for the cars available for sale at your dealership. To improve, ensure that you read the question clearly.",
 #   #   "score": 0,
 #   #   "accepted_as_answer": "No"
 #   # }
 #   # Let's begin!
 #   # """
 #   # systemmsg = 
 #   # """
 #   # You are a helpful assistant that analyzes agent's trajectories to find solutions and observations (i.e., the results of actions) to answer the user's questions.
 #   # Definitions:
 #   # "question" is the user's question.
 #   # "thought" is step-by-step reasoning about the current situation.
 #   # "plan" is what to do to complete the task from the current situation.
 #   # “action_name” is the name of the action taken, which can be one of the following functions:
 #   #     1) CHATBOX[text], which you can use to talk with the user. "text" is in verbal English. 
 #   #     2) WINESTOCK[query], which you can use to find info about wine in your inventory. "query" is a search term in verbal English. The best query must includes "budget", "type of wine", "characteristics of wine" and "food pairing".
 #   # "action_input" is the input to the action
 #   # "observation" is result of the action.
 #   # At each round of conversation, the user will give you:
 #   # Context: ...
 #   # Trajectories: ...
 #   # You should then respond to the user with:
 #   # - original_question: Repeat the original question.
 #   # - evaluation (you must evaluate all of the following points in a single paragraph):
 #   #     1) Analyze the trajectories of a solution to answer the user's original question. 
 #   #       Given a question and a trajectory, evaluate its correctness and provide your reasoning and
 #   #       analysis in detail. Focus on the latest thought, action, and observation. 
 #   #       Incomplete trajectories can be correct if the thoughts and actions so far are correct, 
 #   #       even if the answer is not found yet. Do not generate additional thoughts or actions.
 #   #     2) How the observation addresses the question exactly?
 #   # - accepted_as_answer: Decide whether to accept the observation as the answer to the original question.
 #   #     1) if the observation's content directly answers the question then just accept it as the answer. Oherwise, it is not. The possible responses are either 'Yes' or 'No.'
 #   # - score: Correctness score s where s is a single integer between 0 to 9. 
 #   #     1) 0 means the trajectories are incorrect.
 #   #     2) 9 means the trajectories are correct, and the observation's content directly answers the question.
 #   # - suggestion: if accepted_as_answer is "No", provide suggestion.
 #   # You should only respond in format as described below:
 #   # original_question: ...
 #   # evaluation: ...
 #   # accepted_as_answer: ...
 #   # score: ...
 #   # suggestion: ...
 #   # Let's begin!
 #   # """
 #   systemmsg = 
 #   """
 #   You are a helpful assistant that analyzes agent's trajectory to find solutions and observations (i.e., the results of actions) to answer the user's questions.
 #   Definitions:
 #   "question" is the user's question
 #   "understanding" is agent's understanding about the current situation
 #   "reasoning" is agent's step-by-step reasoning about the current situation
 #   "plan" is agent's plan to complete the task from the current situation
 #   "action_name" is the name of the action taken, which can be one of the following functions:
 #       - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
 #           For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
 #           Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
 #   "action_input" is the input to the action
 #   "observation" is result of the preceding immediate action
 #   At each round of conversation, the user will give you:
 #   Trajectory: ...
 #   Error note: error note from your previous attempt
 #   You must follow the following guidelines:
 #   - When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer.
 #   You should then respond to the user with:
 #   1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question.
 #     - Evaluate the correctness of each section and the overall trajectory based on the given question.
 #     - Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation.
 #     - Incomplete trajectory are acceptable if the thoughts and actions up to that point are correct, even if the final answer isn't reached.
 #     - Do not generate additional thoughts or actions.
 #   2) Answer_evaluation: 
 #     - Focus only on the matter mentioned in the question and comprehensively analyze how the latest observation's details addresses the question
 #     - State your rationale
 #   3) Accepted_as_answer: Decide whether the latest observation's content answers the question. Can be "Yes" or "No"
 #     Bad example (The observation didn't answers the question):
 #       question: Find cars with 4 wheels.
 #       observation: There are an apple in the table.
 #     Good example (The observation answers the question): 
 #       question: Find cars with a stereo.
 #       observation: There are 1 cars in the table. 1) brand: Toyota, model: yaris, color: black.
 #   4) Score: Correctness score s where s is a single integer between 0 to 9. 
 #       Score guideline:
 #       - 0 indicates that both the trajectory is incorrect, failed or errors and the observation is incorrect or failed
 #       - 4 indicates that the trajectory are correct but the observation is incorrect or failed
 #       - 8 indicates that both the trajectory are correct, and the observation's content directly answers the question.
 #       - 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations.
 #   5) Suggestion: if accepted_as_answer is "No", provide suggestion.
 #   You should only respond in format as described below:
 #   Trajectory_evaluation: ...
 #   Answer_evaluation: ...
 #   Accepted_as_answer: ...
 #   Score: ...
 #   Suggestion: ...
 #   Let's begin!
 #   """
 #   thoughthistory = ""
 #   for (k, v) in state[:thoughtHistory]
 #     thoughthistory *= "$k: $v\n" 
 #   end
 #   errornote = ""
 #   for attempt in 1:5
 #     usermsg =
 #     """
 #     Trajectory: $thoughthistory
 #     Error note: $errornote
 #     """
 #     _prompt = 
 #     [
 #       Dict(:name=> "system", :text=> systemmsg),
 #       Dict(:name=> "user", :text=> usermsg)
 #     ]
 #     # put in model format
 #     prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
 #     prompt *=
 #       """
 #       <|start_header_id|>assistant<|end_header_id|>
 #       """
 #     header = ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"]
 #     try
 #       response = text2textInstructLLM(prompt)
 #       # make sure every header is in the response
 #       for i in header
 #         detected = GeneralUtils.detect_keyword(i, response)
 #         if detected === nothing
 #           error("Keyword $i not found in response")
 #         end
 #       end
 #       responsedict = GeneralUtils.textToDict(response,
 #         header; 
 #         rightmarker=":", symbolkey=true, lowercasekey=true)
 #       # check if dict has all required value
 #       trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation]
 #       answerevaluation_text::AbstractString = responsedict[:answer_evaluation]
 #       # responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "")  # remove (...) if there is any.
 #       responsedict[:score] = responsedict[:score][1]  # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
 #       responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
 #       score::Integer = responsedict[:score]
 #       accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
 #       suggestion::AbstractString = responsedict[:suggestion]
 #       if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
 #         error("generated accepted_as_answer has wrong format")
 #       end
 #       # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
 #       state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])" 
 #       state[:evaluationscore] = responsedict[:score] 
 #       state[:accepted_as_answer] = responsedict[:accepted_as_answer]
 #       state[:suggestion] = responsedict[:suggestion]
 #       # mark as terminal state when the answer is achieved
 #       if accepted_as_answer == "Yes"
 #         # mark the state as terminal state because the evaluation say so.
 #         state[:isterminal] = true
 #         # evaluation score as reward because different answers hold different value for the user.
 #         state[:reward] = responsedict[:score]
 #       end
 #       println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__)
 #       pprintln(Dict(responsedict))
 #       return responsedict[:score]
 #     catch e
 #       io = IOBuffer()
 #       showerror(io, e)
 #       errorMsg = String(take!(io))
 #       st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
 #       println("")
 #       println("Attempt $attempt. Error occurred: $errorMsg\n$st")
 #       println("")
 #     end
 #   end
 #   error("evaluator failed to generate an evaluation")
 # end
 function evaluator(state::T1, text2textInstructLLM::Function;
  insertSQLVectorDB::Union{Function, Nothing}=nothing
  ) where {T1<:AbstractDict}
@@ -490,14 +770,16 @@ function evaluator(state::T1, text2textInstructLLM::Function;
  "action_input" is the input to the action
  "observation" is result of the preceding immediate action
-  At each round of conversation, the user will give you:
+  <At each round of conversation, the user will give you>
  Context: ...
  Trajectory: ...
  Error note: error note from your previous attempt
  </At each round of conversation, the user will give you>
-  You must follow the following guidelines:
+  <You must follow the following guidelines>
  - When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer.
-    
+  </You must follow the following guidelines>  
-  You should then respond to the user with:
+
  <You should then respond to the user with>
  1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question.
    - Evaluate the correctness of each section and the overall trajectory based on the given question.
    - Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation.
@@ -520,13 +802,15 @@ function evaluator(state::T1, text2textInstructLLM::Function;
      - 8 indicates that both the trajectory are correct, and the observation's content directly answers the question.
      - 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations.
  5) Suggestion: if accepted_as_answer is "No", provide suggestion.
  </You should then respond to the user with>
-  You should only respond in format as described below:
+  <You should only respond in format as described below>
  Trajectory_evaluation: ...
  Answer_evaluation: ...
  Accepted_as_answer: ...
  Score: ...
  Suggestion: ...
  </You should only respond in format as described below>
  Let's begin!
  """
@@ -536,10 +820,15 @@ function evaluator(state::T1, text2textInstructLLM::Function;
    thoughthistory *= "$k: $v\n" 
  end
-  for attempt in 1:5
+  errornote = ""
  for attempt in 1:10
    errorFlag = false
    usermsg =
    """
    Trajectory: $thoughthistory
    Error note: $errornote
    """
    _prompt = 
@@ -555,59 +844,62 @@ function evaluator(state::T1, text2textInstructLLM::Function;
      <|start_header_id|>assistant<|end_header_id|>
      """
-    try
+    header = ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"]
      response = text2textInstructLLM(prompt)
      responsedict = GeneralUtils.textToDict(response,
        ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"]; 
        rightmarker=":", symbolkey=true, lowercasekey=true)
-      # check if dict has all required value
+    response = text2textInstructLLM(prompt)
-      trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation]
+    # make sure every header is in the response
-      answerevaluation_text::AbstractString = responsedict[:answer_evaluation]
+    for i in header
-      # responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "")  # remove (...) if there is any.
+      detected = GeneralUtils.detect_keyword(i, response)
-      responsedict[:score] = responsedict[:score][1]  # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
+      if detected === nothing
-      responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
+        errornote = "Keyword $i not found in response"
-      score::Integer = responsedict[:score]
+        errorFlag = true
      accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
      suggestion::AbstractString = responsedict[:suggestion]
      if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
        error("generated accepted_as_answer has wrong format")
      end
      # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
      state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])" 
      state[:evaluationscore] = responsedict[:score] 
      state[:accepted_as_answer] = responsedict[:accepted_as_answer]
      state[:suggestion] = responsedict[:suggestion]
      # mark as terminal state when the answer is achieved
      if accepted_as_answer == "Yes"
        # mark the state as terminal state because the evaluation say so.
        state[:isterminal] = true
        # evaluation score as reward because different answers hold different value for the user.
        state[:reward] = responsedict[:score]
      end
      println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__)
      pprintln(Dict(responsedict))
      return responsedict[:score]
    catch e
      io = IOBuffer()
      showerror(io, e)
      errorMsg = String(take!(io))
      st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
      println("")
      println("Attempt $attempt. Error occurred: $errorMsg\n$st")
      println("")
    end
    if errorFlag
      continue  # skip to the next iteration
    end
    responsedict = GeneralUtils.textToDict(response,
      header; 
      rightmarker=":", symbolkey=true, lowercasekey=true)
    # check if dict has all required value
    trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation]
    answerevaluation_text::AbstractString = responsedict[:answer_evaluation]
    # responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "")  # remove (...) if there is any.
    responsedict[:score] = responsedict[:score][1]  # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
    responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
    score::Integer = responsedict[:score]
    accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
    suggestion::AbstractString = responsedict[:suggestion]
    if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
      error("generated accepted_as_answer has wrong format")
    end
    # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
    state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])" 
    state[:evaluationscore] = responsedict[:score] 
    state[:accepted_as_answer] = responsedict[:accepted_as_answer]
    state[:suggestion] = responsedict[:suggestion]
    # mark as terminal state when the answer is achieved
    if accepted_as_answer == "Yes"
      # mark the state as terminal state because the evaluation say so.
      state[:isterminal] = true
      # evaluation score as reward because different answers hold different value for the user.
      state[:reward] = responsedict[:score]
    end
    println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__)
    pprintln(Dict(responsedict))
    return responsedict[:score]
  end
  error("evaluator failed to generate an evaluation")
 end
 """
 # Arguments
@@ -979,9 +1271,9 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
  earlystop(state) = state[:reward] >= 8 ? true : false
  _, _, resultState = LLMMCTS.runMCTS(initialstate, transition, transitionargs;
-                      horizontalSampleExpansionPhase=2, 
+                      horizontalSampleExpansionPhase=5, 
-                      horizontalSampleSimulationPhase=1,
+                      horizontalSampleSimulationPhase=2,
-                      maxSimulationDepth=3, maxiterations=2, 
+                      maxSimulationDepth=10, maxiterations=2, 
                      explorationweight=1.0,
                      earlystop=earlystop,
                      saveSimulatedNode=true)
--- a/test/runtest.jl
+++ b/test/runtest.jl
@@ -35,7 +35,7 @@ function text2textInstructLLM(prompt::String)
    msgPurpose="inference",
    senderName="yiemagent",
    senderId=sessionId,
-    receiverName="text2textinstruct_medium",
+    receiverName="text2textinstruct_small",
    mqttBrokerAddress=config[:mqttServerInfo][:broker],
    mqttBrokerPort=config[:mqttServerInfo][:port],
  )
@@ -64,7 +64,7 @@ function getEmbedding(text::T) where {T<:AbstractString}
    msgPurpose="embedding",
    senderName="yiemagent",
    senderId=sessionId,
-    receiverName="text2textinstruct_medium",
+    receiverName="text2textinstruct_small",
    mqttBrokerAddress=config[:mqttServerInfo][:broker],
    mqttBrokerPort=config[:mqttServerInfo][:port],
  )