update

2024-10-16 13:10:31 +07:00
parent a711c0c111
commit 159d1717a4
2 changed files with 181 additions and 206 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -158,6 +158,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
    - If you can't find a single table that can be used to answer the user's query, try joining multiple tables to see if you can obtain the answer.
    - If you are unable to find the requested information, kindly inform the user, "The current data in our database does not provide the specific answer to your query".
    - Text information in the database usually stored in lower case. If your search returns empty, try using lower case to search.
+    - Do not use backticks (`). Use double quotes instead.

    You should then respond to the user with interleaving Understanding, Reasoning, Plan, Action:
    1) Understanding: 
@@ -183,7 +184,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,

    Let's begin!
  """
-  
+  # [WORKING] add no backtick
  workprogress = ""
  for (k, v) in state[:thoughtHistory]
    if k ∉ [:query]
@@ -191,31 +192,37 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
    end
  end
  
-  usermsg =
-  """
-  $(context[:tablelist])
-  User query: $(state[:thoughtHistory][:question])
-  Hints: $similarSQL
-  Your Q&A: $QandA
-  Your work progress: $workprogress
-  Evaluation: $(state[:evaluation])
-  Suggestion: $(state[:suggestion])
-  """
-
-  _prompt = 
-  [
-    Dict(:name=> "system", :text=> systemmsg),
-    Dict(:name=> "user", :text=> usermsg)
-  ]
-
-  # put in model format
-  prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
-  prompt *=
-    """
-    <|start_header_id|>assistant<|end_header_id|>
-    """
  response = nothing # store for show when error msg show up
+  errornote = ""
+  noise = ""
+
  for attempt in 1:10
+    usermsg =
+    """
+    $(context[:tablelist])
+    User query: $(state[:thoughtHistory][:question])
+    Hints: $similarSQL
+    Your Q&A: $QandA
+    Your work progress: $workprogress
+    Evaluation: $(state[:evaluation])
+    Suggestion: $(state[:suggestion])
+    $errornote
+    $noise
+    """
+
+    _prompt = 
+    [
+      Dict(:name=> "system", :text=> systemmsg),
+      Dict(:name=> "user", :text=> usermsg)
+    ]
+
+    # put in model format
+    prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
+    prompt *=
+      """
+      <|start_header_id|>assistant<|end_header_id|>
+      """
+
    try
      response = text2textInstructLLM(prompt)

@@ -226,6 +233,10 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,

      delete!(responsedict, :observation)

+      if occursin('`', response)
+        response = replace(response, '`'=>"")
+      end
+
      toollist = ["TABLEINFO", "GETDATA"]
      if responsedict[:action_name] ∉ toollist
        error("SQL decisionMaker() didn't use the given functions ", @__FILE__, " ", @__LINE__)
@@ -258,220 +269,175 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
      errorMsg = String(take!(io))
      st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
      println("")
-      println("Attempt $attempt. Error occurred: $errorMsg\n$st")
+      println("\n~~~ SQLLLM decisionMaker() Attempt $attempt. Error occurred: $errorMsg\n$st ", @__FILE__, " ", @__LINE__)
      println("")
+      noise = GeneralUtils.randstrings(3, 5)
    end
  end
  error("DecisionMaker failed to generate a thought ", response)
 end
+# function decisionMaker(state::T1, context, text2textInstructLLM::Function,
+#   QandA::T2; similarSQL::Union{T3, Nothing}=nothing
+#   )::Dict{Symbol, Any} where {T1<:AbstractDict, T2<:AbstractString, T3<:AbstractString}
  
-# function decisionMaker(state::T2, config::T1
-#   )::Dict{Symbol, Any} where {T1<:AbstractDict, T2<:AbstractDict}
+#   similarSQL = 
+#     if similarSQL === nothing
+#       "None"
+#     else
+#       "This is the closest matching SQL statement for a similar query: $similarSQL"
+#     end

-#   if isfile("lesson.json")
-#     lessonDict = copy(JSON3.read("lesson.json"))
-#   else
-#     lessonDict = nothing
-#   end
+#   # lessonDict =
+#   # if isfile("lesson.json")
+#   #   lessonDict = copy(JSON3.read("lesson.json"))
+#   # else
+#   #   lessonDict = nothing
+#   # end

-#   lesson =
-#   if lessonDict === nothing
-#     ""
-#   else
-#     """
-#     You have attempted to help the user before and failed, either because your reasoning for the 
-#     recommendation was incorrect or your response did not exactly match the user expectation. 
-#     The following lesson(s) give a plan to avoid failing to help the user in the same way you 
-#     did previously. Use them to improve your strategy to help the user.
+#   # lessonDict = nothing

-#     Here are some lessons in JSON format:
-#     $(JSON3.write(lessonDict))
+#   # lesson =
+#   # if lessonDict === nothing
+#   #   ""
+#   # else
+#   #   """
+#   #   You have attempted to help the user before and failed, either because your reasoning for the 
+#   #   recommendation was incorrect or your response did not exactly match the user expectation. 
+#   #   The following lesson(s) give a plan to avoid failing to help the user in the same way you 
+#   #   did previously. Use them to improve your strategy to help the user.

-#     When providing the thought and action for the current trial, that into account these failed 
-#     trajectories and make sure not to repeat the same mistakes and incorrect answers. 
-#     """
-#   end
+#   #   Here are some lessons in JSON format:
+#   #   $(JSON3.write(lessonDict))

-#   _prompt = 
+#   #   When providing the thought and action for the current trial, that into account these failed 
+#   #   trajectories and make sure not to repeat the same mistakes and incorrect answers. 
+#   #   """
+#   # end
+
+#   systemmsg = 
 #   """
-#     You are a helpful data engineer. 
-#     Your goal is to help the user to get what the user wants.
-#     You are also keen to improve your helpfulness with lesson(s).
+#     You are a helpful assistant that get the data from a database to satisfy the user's query. 
+#     You are also eager to improve your helpfulness.

-#     You must follow the following criteria:
-#     1) Get to know what table are available in the database.
-#     2) Get to know what the data in the table looks like.
-#     3) If you can't find a single table that can be used to answer the user's question, try joining multiple tables to see if you can obtain the answer.
-#     4) If you are unable to find the requested information, kindly inform the user, "The current data in our database does not provide the specific answer to your question".
+#     At each round of conversation, the user will give you the current situation:
+#     User Query: ...
+#     Hints: ...
+#     Your Q&A: ...
+#     Your work progress: ...
+#     Evaluation: Evaluation of the latest action and observation
+#     Suggestion: ...
    
-#     You should only respond with interleaving Thought, Action, Observation steps. 
-#     Thought can reason about the current situation, and Action can be one of the following functions:
-#     1) listalltables[NA], which you can use to list all tables in the database and see their descriptions. "NA" word is the function input.
-#     2) tableinfo[table_name], which you can use to see the table and its column description. "table_name" is name of the table you want to get info.
-#     3) getdata[instructions], which you can use to ask other people to get the data from tables for you. "instructions" should clearly describe how you want others to extract the data.
-#       For example, 
-#       a. Query the "Engine" table to identify the engine types that have 3 cylinders. This can be done using a SELECT statement in SQL, filtering the results where the number of cylinders equals 3.
-#       b. Once you have identified the engine types with 3 cylinders, use this information to query the "Car" table. You're looking for car models that are associated with these engine types. This can be achieved by performing a JOIN operation between the "Car" and "Engine" tables based on the engine type.
-#     4) finalanswerbox[answer], which returns your answer to the user. "answer" is your answer to the user question.
-#     After each observation, provide the next Thought and next Action.
+#     You should consider the following guidelines:
+#     - Do not create any table in the database
+#     - Column name can be the same in different tables. Refer to column comments to get more details by using TABLEINFO function
+#     - A junction table can be used to link tables together. Another use case is for filtering data.
+#     - If you can't find a single table that can be used to answer the user's query, try joining multiple tables to see if you can obtain the answer.
+#     - If you are unable to find the requested information, kindly inform the user, "The current data in our database does not provide the specific answer to your query".
+#     - Text information in the database usually stored in lower case. If your search returns empty, try using lower case to search.

-#     You should only respond in JSON format as describe below:
-#     {
-#       "thought": "your reasoning",
-#       "action": {"name": "action to take", "input": "Action input"},
-#       "observation": "result of the action"
-#     }
+#     You should then respond to the user with interleaving Understanding, Reasoning, Plan, Action:
+#     1) Understanding: 
+#       - State your understanding about the current situation.
+#     2) Reasoning: 
+#         - State your step by step reasoning about the current situation.
+#     3) Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
+#     4) Action_name (Must be aligned with your plan): Can be one of the following functions: 
+#         - TABLEINFO[list_of_table_name], which you can use to get the data type of a table column. "list_of_table_name" is a list of table name you want to get info. e.g. TABLEINFO["table name 1", "table name 2"]
+#         - GETDATA[SQL], which you can use to get the data from the database. "SQL" is the single SQL command to be executed against the database.
+#           For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
+#           Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
+#     5) Action_input: Input to the action
+#     6) Observation: Result of the immediately preceding action

-#     Here are some examples:
-#     {
-#     "question": "I would like to buy a sedan with 8 seats.",
-#     "thought_1": "Our showroom carries various vehicle model. But I'm not sure whether we have a models that fits the user demand, I need to check our inventory.",
-#     "action_1": {"name": "inventory", "input": "sedan with 8 seats."},
-#     "observation_1": "Several model has 8 seats. Available color are black, red green"
-#     }
-#     {
-#       "thought": "I have a few color for the user to choose from. I will ask him what color he likes.",
-#       "action": {"name": "chatbox", "input": "Which color do you like?"}
-#       "observation": "I'll take black."
-#     }
-
-#     $lesson
+#     You should only respond in format as described below:
+#     Understanding: ...
+#     Reasoning: ...
+#     Plan: ...
+#     Action_name: ...
+#     Action_input: ...
+#     Observation: ...

 #     Let's begin!
-
-#     $(JSON3.write(state[:thoughtHistory]))
-#     {"thought"
 #   """
-
-#   # _prompt = 
-#   # """
-#   #   You are a helpful data engineer. 
-#   #   Your goal is to help the user to get what the user wants. 
-#   #   You are also keen to improve your helpfulness with lesson(s).
-    
-#   #   You must follow the following criteria:
-#   #   1) Get to know what table are available in the database.
-#   #   2) Get to know what the data in the table looks like.
-#   #   3) If you can't find a single table that can be used to answer the user's question, try joining multiple tables to see if you can obtain the answer.
-#   #   4) Keep trying even if you get SQL execution error.
-    
-#   #   You should only respond with interleaving Thought, Action, Observation steps. 
-#   #   Thought can reason about the current situation, and Action can be one of the following functions:
-#   #   1) listalltables[NA], which you can use to list all tables in the database and see their descriptions. "NA" word is the function input.
-#   #   2) tableinfo[table_name], which you can use to see the table and its column description. "table_name" is name of the table you want to get info.
-#   #   3) getdata[SQL], which you can use to ask other people to get the data from tables for you. "SQL" is the command you will use to extract the data.
-#   #   4) finalanswerbox[answer], which returns your answer to the user. "answer" is your answer to the user question.
-#   #   After each observation, provide the next Thought and next Action.
-    
-#   #   You should only respond in JSON format as describe below:
-#   #   {
-#   #     "thought": "your reasoning",
-#   #     "action": {"name": "action to take", "input": "Action input"},
-#   #     "observation": "result of the action"
-#   #   }
-    
-#   #   Here are some examples:
-#   #   {
-#   #   "question": "I would like to buy a sedan with 8 seats.",
-#   #   "thought_1": "Our showroom carries various vehicle model. But I'm not sure whether we have a models that fits the user demand, I need to check our inventory.",
-#   #   "action_1": {"name": "inventory", "input": "sedan with 8 seats."},
-#   #   "observation_1": "Several model has 8 seats. Available color are black, red green"
-#   #   }
-#   #   {
-#   #     "thought": "I have a few color for the user to choose from. I will ask him what color he likes.",
-#   #     "action": {"name": "chatbox", "input": "Which color do you like?"}
-#   #     "observation": "I'll take black."
-#   #   }
-
-#   #   $lesson
-
-#   #   Let's begin!
-
-#   #   $(JSON3.write(state[:thoughtHistory]))
-#   #   {"thought"
-#   # """
-
-#   # apply LLM specific instruct format
-#   externalService = config[:externalservice][:text2textinstruct]
-#   llminfo = externalService[:llminfo]
-#   prompt = 
-#   if llminfo[:name] == "llama3instruct"
-#     GeneralUtils.formatLLMtext_llama3instruct("system", _prompt)
-#   else
-#     error("llm model name is not defied yet $(@__LINE__)")
+#   workprogress = ""
+#   for (k, v) in state[:thoughtHistory]
+#     if k ∉ [:query]
+#       workprogress *= "$k: $v\n"
+#     end
 #   end

-#   msgMeta = GeneralUtils.generate_msgMeta(
-#     externalService[:mqtttopic],
-#     senderName= "decisionMaker",
-#     senderId= string(uuid4()),
-#     receiverName= "text2textinstruct",
-#     mqttBroker= config[:mqttServerInfo][:broker],
-#     mqttBrokerPort= config[:mqttServerInfo][:port],
-#   )
+#   usermsg =
+#   """
+#   $(context[:tablelist])
+#   User query: $(state[:thoughtHistory][:question])
+#   Hints: $similarSQL
+#   Your Q&A: $QandA
+#   Your work progress: $workprogress
+#   Evaluation: $(state[:evaluation])
+#   Suggestion: $(state[:suggestion])
+#   """

-#   outgoingMsg = Dict(
-#     :msgMeta=> msgMeta,
-#     :payload=> Dict(
-#       :text=> prompt,
-#       :kwargs=> Dict(
-#         :max_tokens=> 512,
-#         :stop=> ["<|eot_id|>"],
-#       )
-#     )
-#   )
-#   @show outgoingMsg
+#   _prompt = 
+#   [
+#     Dict(:name=> "system", :text=> systemmsg),
+#     Dict(:name=> "user", :text=> usermsg)
+#   ]

-#   for attempt in 1:5
+#   # put in model format
+#   prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
+#   prompt *=
+#     """
+#     <|start_header_id|>assistant<|end_header_id|>
+#     """
+#   response = nothing # store for show when error msg show up
+#   for attempt in 1:10
 #     try
-#       response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
-#       _responseJsonStr = response[:response][:text]
-#       expectedJsonExample =
-#       """
-#       Here is an expected JSON format:
-#       {
-#         "thought": "...",
-#         "action": {"name": "...", "input": "..."},
-#         "observation": "..."
-#       }
-#       """
-#       responseJsonStr = FormatCorrector.jsoncorrection(config, _responseJsonStr, expectedJsonExample)
-#       thoughtDict = copy(JSON3.read(responseJsonStr)) 
+#       response = text2textInstructLLM(prompt)

-#       # check if dict has all required value
-#       thought::AbstractString = thoughtDict[:thought]
-#       actionname::AbstractString = thoughtDict[:action][:name]
-#       actioninput::AbstractString = thoughtDict[:action][:input]
-#       if actionname ∈ ["listalltables", "tableinfo", "getdata", "finalanswerbox"]
-#         # LLM use available function
-#       elseif thought == ""
-#         error("DecisionMaker has no thought")
-#       elseif length(actioninput) == 0
-#         error("DecisionMaker has no actioninput")
-#       else 
-#         error("DecisionMaker use wrong function")
+#       # textToDict() search for action_input
+#       responsedict = GeneralUtils.textToDict(response,
+#                   ["Understanding", "Reasoning", "Plan", "Action_name", "Action_input", "Observation"], 
+#                   rightmarker=":", symbolkey=true, lowercasekey=true)
+
+#       delete!(responsedict, :observation)
+
+#       toollist = ["TABLEINFO", "GETDATA"]
+#       if responsedict[:action_name] ∉ toollist
+#         error("SQL decisionMaker() didn't use the given functions ", @__FILE__, " ", @__LINE__)
+#       end
+
+#       for i in toollist
+#         if occursin(i, responsedict[:action_input])
+#           error("Action_name is in action_input which is not allowed.")
+#         end
+#       end
+
+#       for i ∈ [:understanding, :reasoning, :plan, :action_name, :action_input]
+#         if length(JSON3.write(responsedict[i])) == 0
+#           error("$i is empty ", @__FILE__, " ", @__LINE__)
+#         end
 #       end

 #       # check if there are more than 1 key per categories
-#       for i ∈ ["thought", "action", "observation"]
-#         matchkeys = GeneralUtils.findMatchingDictKey(thoughtDict, i)
+#       for i ∈ [:understanding, :reasoning, :plan, :action_name, :action_input]
+#         matchkeys = GeneralUtils.findMatchingDictKey(responsedict, i)
 #         if length(matchkeys) > 1
 #           error("DecisionMaker has more than one key per categories")
 #         end
 #       end

-#       return thoughtDict
+#       return responsedict
 #     catch e
 #       io = IOBuffer()
 #       showerror(io, e)
 #       errorMsg = String(take!(io))
 #       st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
 #       println("")
-#       @warn "Attempt $attempt. Error occurred: $errorMsg\n$st"
+#       println("Attempt $attempt. Error occurred: $errorMsg\n$st")
 #       println("")
 #     end
 #   end
-#   error("DecisionMaker failed to generate a thought")
+#   error("DecisionMaker failed to generate a thought ", response)
 # end


@@ -1221,6 +1187,7 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
        - State your understanding about the current situation.
    2) Q: Given the situation, "ask yourself" about the situation at least five, but no more than ten, questions.
    3) A: Given the situation, "answer to yourself" the best you can.
+        - Do not generate any text after the last answer.
    
    You must only respond in format as described below:
    Understanding: ...
@@ -1244,9 +1211,9 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;

  response = nothing # store for show when error msg show up
  errornote = ""
+  noise = ""

  for attempt in 1:10
-    
    usermsg =
    """
    $(context[:tablelist])
@@ -1254,6 +1221,7 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
    Hints: $similarSQL
    Your work progress: $workprogress
    $errornote
+    $noise
    """

    _prompt = 
@@ -1271,10 +1239,18 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;

    try
      response = text2textInstructLLM(prompt)
+      
+      # check if response is valid
      q_number = count("Q", response)
      if q_number < 1
-        error("too few questions only $q_number questions are generated ", @__FILE__, " ", @__LINE__)
+        errornote = "too few question"
+        error("too few questions only $q_number questions are generated")
      end
+      if occursin('`', response)
+        response = replace(response, '`'=>"")
+      end
+
+
      # response = string(split(response, "Please")[1]) # LLM usually add comments which is no need.
      responsedict = GeneralUtils.textToDict(response,
                        ["Understanding", "Q1"], 
@@ -1288,9 +1264,8 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
      showerror(io, e)
      errorMsg = String(take!(io))
      st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
-      println("")
-      println("Attempt $attempt. Error occurred: $errorMsg\n$st")
-      println("")
+      println("\n~~~ SQLLLM generatequestion() Attempt $attempt. Error occurred: $errorMsg\n$st ", @__FILE__, " ", @__LINE__)
+      noise = GeneralUtils.randstrings(3, 5)
    end
  end
  error("generatequestion failed to generate a thought ", response)
--- a/src/llmfunction.jl
+++ b/src/llmfunction.jl
@@ -653,7 +653,7 @@ function SQLexecution(executeSQL::Function, sql::T
    tablesize = size(df)
    row, column = tablesize
    if row == 0  # if 0 row
-      error("The resulting table has 0 row. Possible causes: 1) You might be searching in the wrong place 2) There could be a typo in your search query 3) No data matches your search criteria.")
+      error("The resulting table has 0 row. Possible causes: 1) You might be searching in the wrong place 2) There could be a typo in your search query.")
    elseif column > 30
      error("SQL execution failed. An unexpected error occurred. Please try again.")
    end