update

2025-03-18 21:22:12 +07:00
parent 7fd0d6269a
commit e6ce6f9954
7 changed files with 375 additions and 110 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -141,7 +141,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
    For your information:
    - Observation: Result of the immediately preceding action

-    At each round of conversation, the user will give you the current situation:
+    At each round of conversation, the user will give you the following:
    User Query: ...
    Example: ...
    Your Q&A: ...
@@ -164,7 +164,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
      - State your comprehension about the current situation.
    2) Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
    3) Action_name (Must be aligned with your plan): Can be one of the following functions: 
-        - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
+        - RUNSQL, which you can use to execute SQL against the database. Action_input for this function must be a single SQL query to be executed against the database.
          For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
          Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
    4) Action_input: Input to the action
@@ -300,7 +300,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
      responsedict[:action_input] = sql
    end

-    toollist = ["TABLEINFO", "GETDATA"]
+    toollist = ["TABLEINFO", "RUNSQL"]
    if responsedict[:action_name] ∉ toollist
      errornote = "\nYou must only use the given functions"
      println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
@@ -340,6 +340,244 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
  end
  error("DecisionMaker failed to generate a thought \n", response)
 end
+# function decisionMaker(state::T1, context, text2textInstructLLM::Function,
+#   ; querySQLVectorDBF::Union{T2, Nothing}=nothing
+#   )::Dict{Symbol, Any} where {T1<:AbstractDict, T2<:Function}
+
+#   # lessonDict =
+#   # if isfile("lesson.json")
+#   #   lessonDict = copy(JSON3.read("lesson.json"))
+#   # else
+#   #   lessonDict = nothing
+#   # end
+
+#   # lessonDict = nothing
+
+#   # lesson =
+#   # if lessonDict === nothing
+#   #   ""
+#   # else
+#   #   """
+#   #   You have attempted to help the user before and failed, either because your reasoning for the 
+#   #   recommendation was incorrect or your response did not exactly match the user expectation. 
+#   #   The following lesson(s) give a plan to avoid failing to help the user in the same way you 
+#   #   did previously. Use them to improve your strategy to help the user.
+
+#   #   Here are some lessons in JSON format:
+#   #   $(JSON3.write(lessonDict))
+
+#   #   When providing the thought and action for the current trial, that into account these failed 
+#   #   trajectories and make sure not to repeat the same mistakes and incorrect answers. 
+#   #   """
+#   # end
+
+#   systemmsg = 
+#   """
+#     You are a helpful assistant that find the data from a database to satisfy the user's query. 
+#     You are also eager to improve your helpfulness.
+
+#     For your information:
+#     - Observation: Result of the immediately preceding action
+
+#     At each round of conversation, the user will give you the current situation:
+#     User Query: ...
+#     Example: ...
+#     Your Q&A: ...
+#     Your work progress: ...
+#     Evaluation: Evaluation of the immediately preceding action and observation
+#     Suggestion: Suggestion for the immediately preceding action and observation
+    
+#     You must follow the following guidelines:
+#     - Keep SQL queries focused only on the provided information.
+
+#     You should follow the following guidelines:
+#     - Do not create any table in the database
+#     - A junction table can be used to link tables together. Another use case is for filtering data.
+#     - If you can't find a single table that can be used to answer the user's query, try joining multiple tables to see if you can obtain the answer.
+#     - If you are unable to find the requested information, kindly inform the user, "The current data in our database does not provide the specific answer to your query".
+#     - Text information in the database usually stored in lower case. If your search returns empty, try using lower case to search.
+
+#     You should then respond to the user with interleaving Understanding, Reasoning, Plan, Action:
+#     1) Comprehension: 
+#       - State your comprehension about the current situation.
+#     2) Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
+#     3) Action_name (Must be aligned with your plan): Can be one of the following functions: 
+#         - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
+#           For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
+#           Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
+#     4) Action_input: Input to the action
+
+#     You should only respond in format as described below:
+#     Comprehension: ...
+#     Plan: ...
+#     Action_name: ...
+#     Action_input: ...
+
+#     Let's begin!
+#   """
+  
+#   workprogress = ""
+#   for (k, v) in state[:thoughtHistory]
+#     if k ∉ [:question]
+#       workprogress *= "$k: $v\n"
+#     end
+#   end
+  
+#   response = nothing # store for show when error msg show up
+#   errornote = ""
+
+#   # provide similar sql only for the first attempt
+#   similarSQL_ = "None"
+#   if length(state[:thoughtHistory]) == 1
+#     sql, distance = querySQLVectorDBF(state[:thoughtHistory][:question])
+#     similarSQL_ = sql !== nothing ? sql : "None"
+#   end
+
+
+#   for attempt in 1:10
+#     QandA = generatequestion(state, context, text2textInstructLLM; similarSQL=similarSQL_)
+
+#     usermsg =
+#     """
+#     $(context[:tablelist])
+#     User query: $(state[:thoughtHistory][:question])
+#     Example: $similarSQL_
+#     Your Q&A: $QandA
+#     Your work progress: $workprogress
+#     Evaluation: $(state[:evaluation])
+#     Suggestion: $(state[:suggestion])
+#     $errornote
+#     """
+
+#     _prompt = 
+#     [
+#       Dict(:name=> "system", :text=> systemmsg),
+#       Dict(:name=> "user", :text=> usermsg)
+#     ]
+
+#     # put in model format
+#     prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
+#     response = text2textInstructLLM(prompt)
+
+#     # LLM tends to generate observation given that it is in the input
+#     response = 
+#       if occursin("observation:", response)
+#         string(split(response, "observation:")[1])
+#       elseif occursin("Observation:", response)
+#         string(split(response, "Observation:")[1])
+#       elseif occursin("observation_", response)
+#         string(split(response, "observation_")[1])
+#       elseif occursin("Observation_", response)
+#         string(split(response, "Observation_")[1])
+#       else
+#         response
+#       end
+
+#     # sometime LLM output something like **Comprehension**: which is not expected
+#     response = replace(response, "**"=>"")
+#     response = replace(response, "***"=>"")
+
+#     # some time LLM output Plan_1: so we need to detect and replace topic numbering
+#     regex = r"_[0-1000]+:"
+#     matches = collect(eachmatch(regex, response))
+#     for m in matches
+#       response = replace(response, string(m.match)=>":") 
+#     end
+
+#     if occursin("NULL", response)
+#       errornote = "\nSQL decisionMaker() NULL response is not allowed"
+#       println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+#       continue
+#     end
+
+#     header = ["Comprehension:", "Plan:", "Action_name:", "Action_input:"]
+#     dictkey = ["comprehension", "plan", "action_name", "action_input"]
+
+#     # detect if there are more than 1 key per categories
+#     wordcount = GeneralUtils.countGivenWords(response, header)
+#     duplicateKeywordFlag = false
+#     for (i, v) in enumerate(wordcount)
+#       keyword = header[i]
+#       keywordNumber = v
+#       if keywordNumber > 1
+#         errornote = "\nSQL query has duplicated keyword, $keyword"
+#         println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+#         duplicateKeywordFlag = true
+#         break
+#       end
+#     end
+#     duplicateKeywordFlag == true ? continue : nothing
+
+#     # check whether response has all header
+#     kw = []
+#     # use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
+#     for keyword in header
+#       detected = GeneralUtils.detect_keyword(keyword, response)
+#       push!(kw, detected)
+#     end
+#     if nothing ∈ kw
+#       println("Some keywords are missing, Required keywords=$header, Response keywords=$kw ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+#       continue  # try again next loop
+#     end
+
+#     # textToDict() search for action_input
+#     responsedict = GeneralUtils.textToDict(response, header; 
+#                                             dictKey=dictkey, symbolkey=true)
+
+#     delete!(responsedict, :observation)
+
+#     # remove backticks Error occurred: MethodError: no method matching occursin(::String, ::Vector{String})
+#     if occursin("```", responsedict[:action_input])
+#       sql = GeneralUtils.extract_triple_backtick_text(responsedict[:action_input])[1]
+#       if sql[1:4] == "sql\n"
+#         sql = sql[5:end]
+#       end
+#       sql = split(sql, ';')   # some time there are comments in the sql
+#       sql = sql[1] * ';'
+
+#       responsedict[:action_input] = sql
+#     end
+
+#     toollist = ["TABLEINFO", "GETDATA"]
+#     if responsedict[:action_name] ∉ toollist
+#       errornote = "\nYou must only use the given functions"
+#       println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+#       continue
+#     end
+
+#     for i in toollist
+#       if occursin(i, responsedict[:action_input])
+#         errornote = "\n action_name is in action_input which is not allowed."
+#         println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+#         continue
+#       end
+#     end
+
+#     for i ∈ [:comprehension, :plan, :action_name, :action_input]
+#       if length(JSON3.write(responsedict[i])) == 0
+#         errornote = "\n $i is empty"
+#         println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+#         continue
+#       end
+#     end
+
+#     # check if there are more than 1 key per categories
+#     for i ∈ [:comprehension, :plan, :action_name, :action_input]
+#       matchkeys = GeneralUtils.findMatchingDictKey(responsedict, i)
+#       if length(matchkeys) > 1
+#         errornote = "\n $i has more than one key"
+#         println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+#         continue
+#       end
+#     end
+
+#     state[:decisionMaker] = responsedict
+
+#     return responsedict
+
+#   end
+#   error("DecisionMaker failed to generate a thought \n", response)
+# end

 """ Assigns a scalar value to each new child node to be used for selec-
 tion and backpropagation. This value effectively quantifies the agent's progress in task completion,
@@ -374,9 +612,9 @@ function evaluator(state::T1, text2textInstructLLM::Function
  "reasoning" is agent's step-by-step reasoning about the current situation
  "plan" is agent's plan to complete the task from the current situation
  "action_name" is the name of the action taken, which can be one of the following functions:
-      - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
-          For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
-          Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
+      - RUNSQL, which you can use to execute SQL against the database. Action_input for this function must be a single SQL query to be executed against the database.
+        For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
+        Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
  "action_input" is the input to the action
  "observation" is result of the preceding immediate action

@@ -722,7 +960,7 @@ function transition(state::T, args::NamedTuple
    elseif thoughtDict[:action_name] == "TABLEINFO"
      input = thoughtDict[:action_input]
      tableinfo(executeSQL, input)
-    elseif thoughtDict[:action_name] == "GETDATA"
+    elseif thoughtDict[:action_name] == "RUNSQL"
      response = SQLexecution(executeSQL, thoughtDict[:action_input])
      if response[:success]
        extracted = extractContent_dataframe(response[:result], text2textInstructLLM, thoughtDict[:action_input])
@@ -877,19 +1115,20 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
    LLMMCTS.runMCTS(initialstate, transition, transitionargs;
                  horizontalSampleExpansionPhase=5, 
                  horizontalSampleSimulationPhase=2,
-                  maxSimulationDepth=5, 
+                  maxSimulationDepth=10, 
                  maxiterations=1, 
                  explorationweight=1.0,
                  earlystop=earlystop,
                  saveSimulatedNode=true,
                  multithread=true)

-  #[WORKING] compare all high value state answer then select the best one
+  # compare all high value state answer then select the best one
  if length(highValueState) > 0
-    open("/appfolder/app/highValueState.json", "w") do io
-      JSON3.pretty(io, highValueState)
-    end
-    resultState = compareState(query, highValueState)
+    # open("/appfolder/app/highValueState.json", "w") do io
+    #   JSON3.pretty(io, highValueState)
+    # end
+    selected = compareState(query, highValueState, text2textInstructLLM)
+    resultState = highValueState[selected]
  end
  latestKey, latestInd = GeneralUtils.findHighestIndexKey(resultState[:thoughtHistory], "observation")
  action_input = Symbol("action_input_$latestInd")  # latest sql
@@ -936,7 +1175,7 @@ function makeNewState(currentstate::T1, thoughtDict::T4, rawresponse, response::
  nextindice = currentstate_latestKey !== nothing ? currentstate_latestIndice + 1 : 1
    # currentstate_latestKey == :NA ? 1 : currentstate_latestIndice + 1

-  currentstate_latestKey = makeNextKey.(keys, nextindice)
+  currentstate_latestKey = makekey.(keys, nextindice)

  # add Thought, action, observation to thoughtHistory
  newstate = deepcopy(currentstate)
@@ -959,9 +1198,6 @@ function makeNewState(currentstate::T1, thoughtDict::T4, rawresponse, response::
 end


-makeNextKey(key, indice) = Symbol("$(key)_$indice")
-
-
 function generatequestion(state::T1, context, text2textInstructLLM::Function;
  similarSQL::Union{T2, Nothing}=nothing
  )::String where {T1<:AbstractDict, T2<:AbstractString}