update

2025-05-04 20:56:55 +07:00
parent c8f5983620
commit 2541223bbb
2 changed files with 317 additions and 22 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -141,7 +141,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo
    For your information:
    - Observation: Result of the immediately preceding action

-    At each round of conversation, the user will give you the following:
+    At each round of conversation, you will be given the following information:
    User Query: ...
    Example: ...
    Your Q&A: ...
@@ -205,30 +205,37 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo

    QandA = generatequestion(state, context, text2textInstructLLM, llmFormatName; similarSQL=similarSQL_)

-    usermsg =
+    assistantinfo =
    """
+    <information>
    $(context[:tablelist])
    User query: $(state[:thoughtHistory][:question])
-    Example: $similarSQL_
+    Similar SQL: $similarSQL_
    Your Q&A: $QandA
    Your work progress: $workprogress
    Evaluation: $(state[:evaluation])
    Suggestion: $(state[:suggestion])
+    Data specific guidelines:
+     - tasting_notes should not be used as search criteria.
    P.S. $errornote
+    </information>
    """

-    _prompt = 
-    [
-      Dict(:name=> "system", :text=> systemmsg),
-      Dict(:name=> "user", :text=> usermsg)
-    ]
+    unformatPrompt =
+        [
+          Dict(:name => "system", :text => systemmsg),
+        ]

    # put in model format
-    prompt = GeneralUtils.formatLLMtext(_prompt, llmFormatName)
+    prompt = GeneralUtils.formatLLMtext(unformatPrompt, llmFormatName)
+    # add info
+    prompt = prompt * assistantinfo
    response = text2textInstructLLM(prompt; llmkwargs=llmkwargs)
    response = GeneralUtils.deFormatLLMtext(response, llmFormatName)
    think, response = GeneralUtils.extractthink(response)

+    #[WORKING] check for tasting_notes occurs AFTER where in the sql
+
    # LLM tends to generate observation given that it is in the input
    response = 
      if occursin("observation:", response)
@@ -328,11 +335,246 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo
    end

    state[:decisionMaker] = responsedict
+
+    println("\nSQLLLM decisionMaker() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+    pprintln(Dict(responsedict))
+
    return responsedict
  end
  error("SQLLLM DecisionMaker() failed to generate a thought \n", response)
 end

+# function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFormatName::String
+#   ; querySQLVectorDBF::Union{T2, Nothing}=nothing, maxattempt=10
+#   )::Dict{Symbol, Any} where {T1<:AbstractDict, T2<:Function}
+
+#   # lessonDict =
+#   # if isfile("lesson.json")
+#   #   lessonDict = copy(JSON3.read("lesson.json"))
+#   # else
+#   #   lessonDict = nothing
+#   # end
+
+#   # lessonDict = nothing
+
+#   # lesson =
+#   # if lessonDict === nothing
+#   #   ""
+#   # else
+#   #   """
+#   #   You have attempted to help the user before and failed, either because your reasoning for the 
+#   #   recommendation was incorrect or your response did not exactly match the user expectation. 
+#   #   The following lesson(s) give a plan to avoid failing to help the user in the same way you 
+#   #   did previously. Use them to improve your strategy to help the user.
+
+#   #   Here are some lessons in JSON format:
+#   #   $(JSON3.write(lessonDict))
+
+#   #   When providing the thought and action for the current trial, that into account these failed 
+#   #   trajectories and make sure not to repeat the same mistakes and incorrect answers. 
+#   #   """
+#   # end
+
+#   systemmsg = 
+#   """
+#     You are a helpful assistant that find the data from a database to satisfy the user's query. 
+#     You are also eager to improve your helpfulness.
+
+#     For your information:
+#     - Observation: Result of the immediately preceding action
+
+#     At each round of conversation, the user will give you the following:
+#     User Query: ...
+#     Example: ...
+#     Your Q&A: ...
+#     Your work progress: ...
+#     Evaluation: Evaluation of the immediately preceding action and observation
+#     Suggestion: Suggestion for the immediately preceding action and observation
+    
+#     You must follow the following guidelines:
+#     - Keep SQL queries focused only on the provided information.
+
+#     You should follow the following guidelines:
+#     - Do not create any table in the database
+#     - A junction table can be used to link tables together. Another use case is for filtering data.
+#     - If you can't find a single table that can be used to answer the user's query, try joining multiple tables to see if you can obtain the answer.
+#     - If you are unable to find the requested information, kindly inform the user, "The current data in our database does not provide the specific answer to your query".
+#     - Text information in the database usually stored in lower case. If your search returns empty, try using lower case to search.
+
+#     You should then respond to the user with interleaving Comprehension, Plan, Action_name, Action_input:
+#       Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
+#       Action_name: (Typically corresponds to the execution of the first step in your plan)
+#         Can be one of the following function names: 
+#         - RUNSQL, which you can use to execute SQL against the database. Action_input for this function must be a single SQL query to be executed against the database.
+#           For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
+#           Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
+#     4) Action_input: Input to the action
+
+#     You should only respond in format as described below:
+#     Plan: ...
+#     Action_name: ...
+#     Action_input: ...
+
+#     Let's begin!
+#   """
+  
+#   workprogress = ""
+#   for (k, v) in state[:thoughtHistory]
+#     if k ∉ [:question]
+#       workprogress *= "$k: $v\n"
+#     end
+#   end
+  
+#   response = nothing # store for show when error msg show up
+#   errornote = "N/A"
+
+#   # provide similar sql only for the first attempt
+#   similarSQL_ = "None"
+#   if length(state[:thoughtHistory]) == 1
+#     sql, distance = querySQLVectorDBF(state[:thoughtHistory][:question])
+#     similarSQL_ = sql !== nothing ? sql : "None"
+#   end
+
+#   header = ["Plan:", "Action_name:", "Action_input:"]
+#   dictkey = ["plan", "action_name", "action_input"]
+
+#   llmkwargs=Dict(
+#       :num_ctx => 32768,
+#       :temperature => 0.5,
+#     )
+
+#   for attempt in 1:maxattempt
+
+#     QandA = generatequestion(state, context, text2textInstructLLM, llmFormatName; similarSQL=similarSQL_)
+
+#     usermsg =
+#     """
+#     $(context[:tablelist])
+#     User query: $(state[:thoughtHistory][:question])
+#     Example: $similarSQL_
+#     Your Q&A: $QandA
+#     Your work progress: $workprogress
+#     Evaluation: $(state[:evaluation])
+#     Suggestion: $(state[:suggestion])
+#     P.S. $errornote
+#     """
+
+#     _prompt = 
+#     [
+#       Dict(:name=> "system", :text=> systemmsg),
+#       Dict(:name=> "user", :text=> usermsg)
+#     ]
+
+#     # put in model format
+#     prompt = GeneralUtils.formatLLMtext(_prompt, llmFormatName)
+#     response = text2textInstructLLM(prompt; llmkwargs=llmkwargs)
+#     response = GeneralUtils.deFormatLLMtext(response, llmFormatName)
+#     think, response = GeneralUtils.extractthink(response)
+
+#     # LLM tends to generate observation given that it is in the input
+#     response = 
+#       if occursin("observation:", response)
+#         string(split(response, "observation:")[1])
+#       elseif occursin("Observation:", response)
+#         string(split(response, "Observation:")[1])
+#       elseif occursin("observation_", response)
+#         string(split(response, "observation_")[1])
+#       elseif occursin("Observation_", response)
+#         string(split(response, "Observation_")[1])
+#       else
+#         response
+#       end
+
+#     # sometime LLM output something like **Comprehension**: which is not expected
+#     response = replace(response, "**"=>"")
+#     response = replace(response, "***"=>"")
+
+#     # some time LLM output Plan_1: so we need to detect and replace topic numbering
+#     regex = r"_[0-1000]+:"
+#     matches = collect(eachmatch(regex, response))
+#     for m in matches
+#       response = replace(response, string(m.match)=>":") 
+#     end
+
+#     if occursin("NULL", response)
+#       errornote = "\nYour previous attempt was NULL. This is not allowed"
+#       println("\nERROR SQLLLM decisionMaker(). Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+#       continue
+#     end
+
+#     # # detect if there are more than 1 key per categories
+#     # wordcount = GeneralUtils.countGivenWords(response, header)
+#     # duplicateKeywordFlag = false
+#     # for (i, v) in enumerate(wordcount)
+#     #   keyword = header[i]
+#     #   keywordNumber = v
+#     #   if keywordNumber > 1
+#     #     errornote = "\nSQL query has duplicated keyword, $keyword"
+#     #     println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+#     #     duplicateKeywordFlag = true
+#     #     break
+#     #   end
+#     # end
+#     # duplicateKeywordFlag == true ? continue : nothing
+
+#     # check whether response has all header
+#     detected_kw = GeneralUtils.detect_keyword(header, response)
+#     if 0 ∈ values(detected_kw)
+#       errornote = "\nYour previous attempt did not have all points according to the required response format"
+#       println("\nERROR SQLLLM decisionMaker(). Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+#       continue
+#     elseif sum(values(detected_kw)) > length(header)
+#       errornote = "\nYour previous attempt has duplicated points according to the required response format"
+#       println("\nERROR SQLLLM decisionMaker(). Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+#       continue
+#     end
+
+#     responsedict = GeneralUtils.textToDict(response, header; 
+#                                             dictKey=dictkey, symbolkey=true)
+
+#     delete!(responsedict, :observation)
+
+#     # remove backticks Error occurred: MethodError: no method matching occursin(::String, ::Vector{String})
+#     if occursin("```", responsedict[:action_input])
+#       sql = GeneralUtils.extract_triple_backtick_text(responsedict[:action_input])[1]
+#       if sql[1:4] == "sql\n"
+#         sql = sql[5:end]
+#       end
+#       sql = split(sql, ';')   # some time there are comments in the sql
+#       sql = sql[1] * ';'
+
+#       responsedict[:action_input] = sql
+#     end
+
+#     toollist = ["TABLEINFO", "RUNSQL"]
+#     if responsedict[:action_name] ∉ toollist
+#       errornote = "Your previous attempt has action_name that is not in the tool list"
+#       println("\nERROR SQLLLM decisionMaker(). Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+#       continue
+#     end
+
+#     for i in toollist
+#       if occursin(i, responsedict[:action_input])
+#         errornote = "Your previous attempt has action_name in action_input which is not allowed"
+#         println("\nERROR SQLLLM decisionMaker(). Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+#         continue
+#       end
+#     end
+
+#     for i ∈ Symbol.(dictkey)
+#       if length(JSON3.write(responsedict[i])) == 0
+#         errornote = "Your previous attempt has empty value for $i"
+#         println("\nERROR SQLLLM decisionMaker(). Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+#         continue
+#       end
+#     end
+
+#     state[:decisionMaker] = responsedict
+#     return responsedict
+#   end
+#   error("SQLLLM DecisionMaker() failed to generate a thought \n", response)
+# end
+

 """ Assigns a scalar value to each new child node to be used for selec-
 tion and backpropagation. This value effectively quantifies the agent's progress in task completion,