update

2025-05-06 06:49:21 +07:00
parent 2541223bbb
commit aeda7e0baf
3 changed files with 95 additions and 51 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -102,7 +102,7 @@ Dict(

 # Signature
 """
-function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFormatName::String
+function decisionMaker(state::T1, additionalinfo, text2textInstructLLM::Function, llmFormatName::String
  ; querySQLVectorDBF::Union{T2, Nothing}=nothing, maxattempt=10
  )::Dict{Symbol, Any} where {T1<:AbstractDict, T2<:Function}

@@ -143,9 +143,8 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo

    At each round of conversation, you will be given the following information:
    User Query: ...
-    Example: ...
-    Your Q&A: ...
-    Your work progress: ...
+    Closest SQL: The closest known SQL for this query
+    Your work progress: your progress so far
    Evaluation: Evaluation of the immediately preceding action and observation
    Suggestion: Suggestion for the immediately preceding action and observation
    
@@ -156,8 +155,8 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo
    - Do not create any table in the database
    - A junction table can be used to link tables together. Another use case is for filtering data.
    - If you can't find a single table that can be used to answer the user's query, try joining multiple tables to see if you can obtain the answer.
-    - If you are unable to find the requested information, kindly inform the user, "The current data in our database does not provide the specific answer to your query".
    - Text information in the database usually stored in lower case. If your search returns empty, try using lower case to search.
+    - If there is no search result from the database, remove the restrictive criteria until a search result is available, and proceed from there.

    You should then respond to the user with interleaving Comprehension, Plan, Action_name, Action_input:
      Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
@@ -203,33 +202,30 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo

  for attempt in 1:maxattempt

-    QandA = generatequestion(state, context, text2textInstructLLM, llmFormatName; similarSQL=similarSQL_)
+    # QandA = generatequestion(state, context, text2textInstructLLM, llmFormatName; similarSQL=similarSQL_)

-    assistantinfo =
+    context =
    """
-    <information>
-    $(context[:tablelist])
-    User query: $(state[:thoughtHistory][:question])
-    Similar SQL: $similarSQL_
-    Your Q&A: $QandA
+    <context>
+    $(additionalinfo[:tablelist])
+    Closest SQL: $similarSQL_
    Your work progress: $workprogress
    Evaluation: $(state[:evaluation])
    Suggestion: $(state[:suggestion])
-    Data specific guidelines:
-     - tasting_notes should not be used as search criteria.
    P.S. $errornote
-    </information>
+    </context>
    """

    unformatPrompt =
      [
        Dict(:name => "system", :text => systemmsg),
+        Dict(:name => "user", :text => state[:thoughtHistory][:question])
      ]

    # put in model format
    prompt = GeneralUtils.formatLLMtext(unformatPrompt, llmFormatName)
    # add info
-    prompt = prompt * assistantinfo
+    prompt = prompt * context
    response = text2textInstructLLM(prompt; llmkwargs=llmkwargs)
    response = GeneralUtils.deFormatLLMtext(response, llmFormatName)
    think, response = GeneralUtils.extractthink(response)
@@ -254,12 +250,12 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo
    response = replace(response, "**"=>"")
    response = replace(response, "***"=>"")

-    # some time LLM output Plan_1: so we need to detect and replace topic numbering
-    regex = r"_[0-1000]+:"
-    matches = collect(eachmatch(regex, response))
-    for m in matches
-      response = replace(response, string(m.match)=>":") 
-    end
+    # # some time LLM output Plan_1: so we need to detect and replace topic numbering
+    # regex = r"_[0-1000]+:"
+    # matches = collect(eachmatch(regex, response))
+    # for m in matches
+    #   response = replace(response, string(m.match)=>":") 
+    # end

    if occursin("NULL", response)
      errornote = "\nYour previous attempt was NULL. This is not allowed"
@@ -334,10 +330,39 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo
      end
    end

+    # store for later training
+    responsedict[:thoughthistory] = state[:thoughtHistory]
+    responsedict[:system] = systemmsg
+    responsedict[:prompt] = prompt
+    responsedict[:context] = context
+    responsedict[:think] = think
    state[:decisionMaker] = responsedict

    println("\nSQLLLM decisionMaker() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
    pprintln(Dict(responsedict))
+    # read sessionId
+    sessionid = JSON3.read("/appfolder/app/sessionid.json")
+
+    # save to filename ./log/decisionlog.txt
+    println("saving SQLLLM decisionMaker() to disk")
+    filename = "agent_decision_log_$(sessionid[:id]).json"
+    filepath = "/appfolder/app/log/$filename"
+    # check whether there is a file path exists before writing to it
+    if !isfile(filepath)
+      decisionlist = [responsedict]
+      println("Creating file $filepath")
+      open(filepath, "a") do io
+        JSON3.pretty(io, decisionlist)
+      end
+    else
+      # read the file and append new data
+      decisionlist = copy(JSON3.read(filepath))
+      push!(decisionlist, responsedict)
+      println("Appending new data to file $filepath")
+      open(filepath, "w") do io
+        JSON3.pretty(io, decisionlist)
+      end
+    end

    return responsedict
  end
@@ -596,7 +621,7 @@ julia>

 # Signature
 """
-function evaluator(state::T1, text2textInstructLLM::Function, llmFormatName::String; 
+function evaluator(state::T1, additionalinfo, text2textInstructLLM::Function, llmFormatName::String; 
  maxattempt=10
  ) where {T1<:AbstractDict}

@@ -616,9 +641,8 @@ function evaluator(state::T1, text2textInstructLLM::Function, llmFormatName::Str
  "action_input" is the input to the action
  "observation" is result of the preceding immediate action

-  At each round of conversation, the user will give you:
+  At each round of conversation, you will be given the following information:
  Trajectory: ...
-  Error_note: error note from your previous attempt

  You must follow the following guidelines:
  - When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer.
@@ -641,8 +665,8 @@ function evaluator(state::T1, text2textInstructLLM::Function, llmFormatName::Str
  4) Score: Correctness score s where s is a single integer between 0 to 9. 
      For example:
      - 0 indicates that both the trajectory is incorrect, failed or errors and the observation is incorrect or failed
-      - 4 indicates that the trajectory are correct but the observation is incorrect or failed
-      - 5 indicates that the trajectory are correct, but no results are returned.
+      - 4 indicates that the trajectory are correct, but no results are returned.
+      - 5 indicates that the trajectory are correct but the observation is incorrect or failed
      - 6 indicates that the trajectory are correct, but the observation's content doesn't directly answer the question
      - 8 indicates that both the trajectory are correct, and the observation's content directly answers the question.
      - 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations.
@@ -657,6 +681,9 @@ function evaluator(state::T1, text2textInstructLLM::Function, llmFormatName::Str

  Let's begin!
  """
+  #[WORKING] add what I should think --> this will be the think for decisionMaker()
+  header = ["Trajectory_evaluation:", "Answer_evaluation:", "Accepted_as_answer:", "Score:", "Suggestion:"]
+  dictkey = ["trajectory_evaluation", "answer_evaluation", "accepted_as_answer", "score", "suggestion"]

  thoughthistory = ""
  for (k, v) in state[:thoughtHistory]
@@ -668,20 +695,24 @@ function evaluator(state::T1, text2textInstructLLM::Function, llmFormatName::Str
    usermsg =
    """
    Trajectory: $thoughthistory
+    """
+    context =
+    """
+    <context>
+    $(additionalinfo[:tablelist])
    P.S. $errornote
+    </context>
    """

-    _prompt = 
+    unformatPrompt =
      [
        Dict(:name => "system", :text => systemmsg),
        Dict(:name => "user", :text => usermsg)
      ]
-
    # put in model format
-    prompt = GeneralUtils.formatLLMtext(_prompt, llmFormatName)
-
-    header = ["Trajectory_evaluation:", "Answer_evaluation:", "Accepted_as_answer:", "Score:", "Suggestion:"]
-    dictkey = ["trajectory_evaluation", "answer_evaluation", "accepted_as_answer", "score", "suggestion"]
+    prompt = GeneralUtils.formatLLMtext(unformatPrompt, llmFormatName)
+    # add info
+    prompt = prompt * context

    response = text2textInstructLLM(prompt, modelsize="medium")
    response = GeneralUtils.deFormatLLMtext(response, llmFormatName)
@@ -738,6 +769,8 @@ function evaluator(state::T1, text2textInstructLLM::Function, llmFormatName::Str
      # evaluation score as reward because different answers hold different value for the user.
      state[:reward] = responsedict[:score]
    end
+
+    responsedict[:think] = think
    println("\nEvaluator() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
    pprintln(Dict(responsedict))

@@ -979,7 +1012,7 @@ function transition(state::T, args::NamedTuple
  reward::Integer = haskey(response, :reward) ? response[:reward] : 0
  isterminal::Bool = haskey(response, :isterminal) ? response[:isterminal] : false
  newNodeKey, newstate = makeNewState(state, thoughtDict, rawresponse, JSON3.write(result), select, reward, isterminal)
-  progressvalue::Integer = evaluatorF(newstate, text2textInstructLLM, llmFormatName)
+  progressvalue::Integer = evaluatorF(newstate, context, text2textInstructLLM, llmFormatName)

  return (newNodeKey=newNodeKey, newstate=newstate, progressvalue=progressvalue)
 end
@@ -1241,14 +1274,14 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;

  root, _, resultState, highValueState = 
    LLMMCTS.runMCTS(initialstate, transition, transitionargs;
-                  horizontalSampleExpansionPhase=2, 
-                  horizontalSampleSimulationPhase=2,
+                  horizontalSampleExpansionPhase=1, 
+                  horizontalSampleSimulationPhase=1,
                  maxSimulationDepth=3,
                  maxiterations=1, 
                  explorationweight=1.0,
                  earlystop=earlystop,
                  saveSimulatedNode=true,
-                  multithread=true)
+                  multithread=false)

  # compare all high value state answer then select the best one
  if length(highValueState) > 0
@@ -1271,7 +1304,7 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
  end

  if extracted === nothing
-    println("query() return nothing")
+    println("\nSQLLLM query() return nothing ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
  end

  result = (text=extracted, rawresponse=resultState[:rawresponse])
@@ -1361,7 +1394,7 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function,
    4) Do not generate any question or comments at the end.

    You should follow the following guidelines:
-    - When querying data in the database, start with broad search terms and refine your query later for more precise results. 
+    - If there is no search result from the database, remove the restrictive criteria until a search result is available, and proceed from there.

    You should then respond to the user with:
    1) Q: Given the situation, "ask yourself" about the situation at least three, but no more than five, questions.
@@ -1373,8 +1406,6 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function,
    A1: ...
    Q2: ...
    A2: ...
-    Q3: ...
-    A3: ...
    ...

    Here are some examples:
@@ -1384,6 +1415,10 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function,
    A: ...
    Q: Why the query failed?
    A: ...
+    Q: What criteria become more restrictive as the search scope broadens and can be remove?
+    A: In the "2019 Toyota Camry hybrid" search query, "2019" represents the most restrictive criteria because it narrows the data scope to a specific year, whereas "Toyota" and "Camry" are broader categories that allow for more general results.
+    Q: What works and what not previously?
+    A: ...

    Let's begin!
  """
@@ -1409,6 +1444,7 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function,
    Example: $similarSQL
    Your work progress: $workprogress
    P.S. $errornote
+    /no_think
    """

    _prompt = 
--- a/src/llmfunction.jl
+++ b/src/llmfunction.jl
@@ -517,7 +517,7 @@ function SQLexecution(executeSQL::Function, sql::T
    tablesize = size(df)
    row, column = tablesize
    if row == 0
-      error("\nThe resulting table has 0 row. Possible causes: 1) Your search criteria might be overly specific. Consider removing or adjusting highly specific conditions (e.g., exact values, exact phrases, narrow ranges). Start with broader terms and refine your search incrementally. This often resolves empty result. 2) When searching for data like wine tasting notes, which often use multiple phrases to describe characteristics, start with a single keyword. If multiple results are returned, you can refine the search further to narrow down the matches. 3) There could be a typo in your search query. 4) You might be searching in the wrong place.")
+      error("\nThe resulting table has 0 row. Please try again.")
    elseif column > 30
      error("\nSQL execution failed. An unexpected error occurred. Please try again.")
    end
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -208,7 +208,14 @@ function insertSommelierDecision(recentevents::T1, decision::T2; maxdistance::In
  end
 end

-sessionId = "555"
+sessionId = GeneralUtils.uuid4snakecase()
+d = Dict(:id => sessionId)
+filepath = "/appfolder/app/sessionid.json"
+open(filepath, "w") do io
+  JSON3.pretty(io, d)
+end
+
+


 # query = "How many German wines do you have?"
@@ -218,8 +225,9 @@ sessionId = "555"


 # query = Dict(:text=> "How many wines from France do you have that can be paired with lamb?")
-query = "How many French wines from Yiem store under 100 dollars do you have?"
+# query = "How many French wines from Yiem store under 100 dollars do you have?"
 # query = "retailer: Yiem, wine_type: red, sweetness: 1-2, intensity: 4-5, wine price: 20-40"
+query = "from Yiem retailer, red wine from Burgundy, France. Merlot varietal. price 100 to 1000 USD. sweetness: 1-2, intensity: 4-5"
 # query = "wine_type: white, country: United States, sweetness: 1-2, tannin: 3, food to be served with wine: pizza"
 # query = "wine_type: white, country: Austria, food to be served with wine: pork"
 # query = "wine price: less than 25, wine_type: rose, country: France, sweetness: 2, tannin: 3, food to be served with wine: pizza"