update

2025-05-04 13:30:08 +07:00
parent 5112701dc2
commit c8f5983620
2 changed files with 44 additions and 51 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -160,7 +160,6 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo
    - Text information in the database usually stored in lower case. If your search returns empty, try using lower case to search.
    You should then respond to the user with interleaving Comprehension, Plan, Action_name, Action_input:
      Comprehension: state your comprehension about the current situation.
      Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
      Action_name: (Typically corresponds to the execution of the first step in your plan)
        Can be one of the following function names: 
@@ -170,7 +169,6 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo
    4) Action_input: Input to the action
    You should only respond in format as described below:
    Comprehension: ...
    Plan: ...
    Action_name: ...
    Action_input: ...
@@ -195,16 +193,15 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo
    similarSQL_ = sql !== nothing ? sql : "None"
  end
-  header = ["Comprehension:", "Plan:", "Action_name:", "Action_input:"]
+  header = ["Plan:", "Action_name:", "Action_input:"]
-  dictkey = ["comprehension", "plan", "action_name", "action_input"]
+  dictkey = ["plan", "action_name", "action_input"]
  llmkwargs=Dict(
      :num_ctx => 32768,
-      :temperature => 0.1,
+      :temperature => 0.5,
    )
  for attempt in 1:maxattempt
    attempt > 1 ? llmkwargs[:temperature] += 0.1 : nothing
    QandA = generatequestion(state, context, text2textInstructLLM, llmFormatName; similarSQL=similarSQL_)
@@ -230,6 +227,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo
    prompt = GeneralUtils.formatLLMtext(_prompt, llmFormatName)
    response = text2textInstructLLM(prompt; llmkwargs=llmkwargs)
    response = GeneralUtils.deFormatLLMtext(response, llmFormatName)
    think, response = GeneralUtils.extractthink(response)
    # LLM tends to generate observation given that it is in the input
    response = 
@@ -376,16 +374,14 @@ function evaluator(state::T1, text2textInstructLLM::Function, llmFormatName::Str
  "action_input" is the input to the action
  "observation" is result of the preceding immediate action
-  <At each round of conversation, the user will give you>
+  At each round of conversation, the user will give you:
  Trajectory: ...
  Error_note: error note from your previous attempt
  </At each round of conversation, the user will give you>
-  <You must follow the following guidelines>
+  You must follow the following guidelines:
  - When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer.
  </You must follow the following guidelines>  
-  <You should then respond to the user with>
+  You should then respond to the user with:
  1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question.
    - Evaluate the correctness of each section and the overall trajectory based on the given question.
    - Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation.
@@ -408,16 +404,14 @@ function evaluator(state::T1, text2textInstructLLM::Function, llmFormatName::Str
      - 6 indicates that the trajectory are correct, but the observation's content doesn't directly answer the question
      - 8 indicates that both the trajectory are correct, and the observation's content directly answers the question.
      - 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations.
-  5) Suggestion: if accepted_as_answer is "No", provide suggestion.
+  5) Suggestion: what are the possible reason of this outcome, what can you learn from it and what suggestion can made?
  </You should then respond to the user with>
-  <You should only respond in format as described below>
+  You should only respond in format as described below:
  Trajectory_evaluation: ...
  Answer_evaluation: ...
  Accepted_as_answer: ...
  Score: ...
  Suggestion: ...
  </You should only respond in format as described below>
  Let's begin!
  """
@@ -427,7 +421,7 @@ function evaluator(state::T1, text2textInstructLLM::Function, llmFormatName::Str
    thoughthistory *= "$k: $v\n" 
  end
-  errornote = ""
+  errornote = "N/A"
  for attempt in 1:maxattempt
    usermsg =
    """
@@ -449,6 +443,7 @@ function evaluator(state::T1, text2textInstructLLM::Function, llmFormatName::Str
    response = text2textInstructLLM(prompt, modelsize="medium")
    response = GeneralUtils.deFormatLLMtext(response, llmFormatName)
    think, response = GeneralUtils.extractthink(response)
    # sometime LLM output something like **Comprehension**: which is not expected
    response = replace(response, "**"=>"")
@@ -1004,9 +999,9 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
  root, _, resultState, highValueState = 
    LLMMCTS.runMCTS(initialstate, transition, transitionargs;
-                  horizontalSampleExpansionPhase=3, 
+                  horizontalSampleExpansionPhase=2, 
-                  horizontalSampleSimulationPhase=3,
+                  horizontalSampleSimulationPhase=2,
-                  maxSimulationDepth=5,
+                  maxSimulationDepth=3,
                  maxiterations=1, 
                  explorationweight=1.0,
                  earlystop=earlystop,
@@ -1060,7 +1055,7 @@ function makeNewState(currentstate::T1, thoughtDict::T4, rawresponse, response::
  reward::T3, isterminal::Bool
  )::NamedTuple{(:newNodeKey, :newstate), Tuple{String, Dict{Symbol, <:Any}}} where {T1<:AbstractDict, T2<:AbstractString, T3<:Number, T4<:AbstractDict}
-  keys = [:comprehension, :action_name, :action_input, :observation]
+  keys = [:action_name, :action_input, :observation]
  # latestKeys = []
  currentstate_latestKey, currentstate_latestIndice = 
@@ -1127,14 +1122,11 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function,
    - When querying data in the database, start with broad search terms and refine your query later for more precise results. 
    You should then respond to the user with:
-    1) Understanding: 
+    1) Q: Given the situation, "ask yourself" about the situation at least three, but no more than five, questions.
-        - State your understanding about the current situation.
+    2) A: Given the situation, "answer to yourself" the best you can.
    2) Q: Given the situation, "ask yourself" about the situation at least five, but no more than ten, questions.
    3) A: Given the situation, "answer to yourself" the best you can.
        - Do not generate any text after the last answer.
    You must only respond in format as described below:
    Understanding: ...
    Q1: ...
    A1: ...
    Q2: ...
@@ -1154,8 +1146,8 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function,
    Let's begin!
  """
-  header = ["Understanding:", "Q1:"]
+  header = ["Q1:"]
-  dictkey = ["understanding", "q1"]
+  dictkey = ["q1"]
  workprogress = ""
  for (k, v) in state[:thoughtHistory]
@@ -1165,7 +1157,7 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function,
  end
  response = nothing # store for show when error msg show up
-  errornote = ""
+  errornote = "N/A"
  for attempt in 1:maxattempt
    usermsg =
@@ -1188,6 +1180,7 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function,
    response = text2textInstructLLM(prompt, modelsize="medium")
    response = GeneralUtils.deFormatLLMtext(response, llmFormatName)
    think, response = GeneralUtils.extractthink(response)
    # check if response is valid
    q_number = count("Q", response)
--- a/src/llmfunction.jl
+++ b/src/llmfunction.jl
@@ -367,17 +367,14 @@ function getdata_decisionMaker(state::Dict, context::Dict, text2textInstructLLM:
  - Text information in the database is sometimes stored in lower case. If your search returns empty, try using lower case to search.
  You should then respond to the user with:
-  1) Comprehension: 
+  1) Plan: Step-by-step instructions of how to complete the task.
      - State your comprehension about the current situation.
  3) Plan: Step-by-step instructions of how to complete the task.
      - Focus on improving the code from the last round.
      - Do not create any table in the database.
-  4) Code:
+  2) Code:
      - Write new improved code.
      - Do not wrap the code and no comment as it will be executed directly without any modification against the database.
  You should only respond in format as described below and nothing more:
  Comprehension: ...
  Plan: 
    1) ...
    2) ... 
@@ -411,9 +408,10 @@ function getdata_decisionMaker(state::Dict, context::Dict, text2textInstructLLM:
    try
      response = text2textInstructLLM(prompt, modelsize="medium")
      response = GeneralUtils.deFormatLLMtext(response, llmFormatName)
      think, response = GeneralUtils.extractthink(response)
-      header = ["Comprehension:", "Plan:", "Code:"]
+      header = ["Plan:", "Code:"]
-      dictkey = ["comprehension", "plan", "code"]
+      dictkey = ["plan", "code"]
      responsedict = GeneralUtils.textToDict(response, header; 
                                            dictKey=dictkey, symbolkey=true)
@@ -519,9 +517,9 @@ function SQLexecution(executeSQL::Function, sql::T
    tablesize = size(df)
    row, column = tablesize
    if row == 0
-      error("The resulting table has 0 row. Possible causes: 1) 1) Your search criteria might be overly specific. Consider removing or adjusting highly specific conditions (e.g., exact values, exact phrases, narrow ranges). Start with broader terms and refine your search incrementally. This often resolves empty result. 2) There could be a typo in your search query. 3) You might be searching in the wrong place.")
+      error("\nThe resulting table has 0 row. Possible causes: 1) Your search criteria might be overly specific. Consider removing or adjusting highly specific conditions (e.g., exact values, exact phrases, narrow ranges). Start with broader terms and refine your search incrementally. This often resolves empty result. 2) When searching for data like wine tasting notes, which often use multiple phrases to describe characteristics, start with a single keyword. If multiple results are returned, you can refine the search further to narrow down the matches. 3) There could be a typo in your search query. 4) You might be searching in the wrong place.")
    elseif column > 30
-      error("SQL execution failed. An unexpected error occurred. Please try again.")
+      error("\nSQL execution failed. An unexpected error occurred. Please try again.")
    end
    df1 =
@@ -637,6 +635,7 @@ function extractContent_dataframe(df::DataFrame, text2textInstructLLM::Function,
  for i in 1:5
    response = text2textInstructLLM(prompt, modelsize="medium")
    response = GeneralUtils.deFormatLLMtext(response, llmFormatName)
    think, response = GeneralUtils.extractthink(response)
    kw = []
    # use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
@@ -839,7 +838,7 @@ function compareState(question::String, highValueStateList::Vector{T},
      Question: the question the user is trying to answer
      Attempt: the user's attempted actions and their corresponding results
    You should then respond to the user with the following:
-      Comparison: a comparison of all results from all attempts
+      Comparison: detailed comparison of all results from all attempts from various aspects.
      Rationale: a brief explanation of why the selected response is the most accurate and relevant
      Selected_response_number: the number the selected response in the list of results (e.g., 1, 2, 3, ...)
    You should only respond in format as described below:
@@ -848,10 +847,10 @@ function compareState(question::String, highValueStateList::Vector{T},
      Selected_response_number: ...
    Here are some examples:
      User's question: "How many German wines do you have?"
-      Attempt 1:
+      Attempt 1)
      Action: SELECT COUNT(*) FROM wines WHERE country = 'Germany'
      Result: 100 wines
-      Attempt 2:
+      Attempt 2)
      Action: SELECT COUNT(*) FROM wines WHERE country = 'Germany' AND type = 'Red'
      Result: 50 red wines
      Comparison: The second attempt counts only German red wines while the first attempt includes all German wines.
@@ -879,26 +878,26 @@ function compareState(question::String, highValueStateList::Vector{T},
  """
  # put potential solutions from potentialSolution into the following form
-  Attempt 1
+  Attempt 1)
-  action_name: 
+    action_name: 
-  action_input:
+    action_input:
-  observation:
+    observation:
-  Attempt 2
+  Attempt 2)
-  action_name: 
+    action_name:` 
-  action_input:
+    action_input:
-  observation:
+    observation:`
  ...
  """
  potentialSolutionStr = ""
  for (i, state) in enumerate(potentialSolution)
-    potentialSolutionStr *= "Attempt $i\n"
+    potentialSolutionStr *= "Attempt $i)\n"
    for k in keys
      potentialSolutionStr *= "$k: $(state[k])\n"
      println("")
    end
  end
-  errornote = ""
+  errornote = "N/A"
  for attempt in 1:10
    errorFlag = false
@@ -928,6 +927,7 @@ function compareState(question::String, highValueStateList::Vector{T},
    response = replace(response, "**"=>"")
    response = replace(response, "***"=>"")
    response = GeneralUtils.deFormatLLMtext(response, llmFormatName)
    think, response = GeneralUtils.extractthink(response)
    # make sure every header is in the response
    for i in header