From d09e9c107129a53af9c3700ececb66c34f26b43a Mon Sep 17 00:00:00 2001
From: tonaerospace <tonaerospace.etc@gmail.com>
Date: Tue, 11 Mar 2025 00:13:50 +0700
Subject: [PATCH] update

---
 Manifest.toml    |  44 +++++++-------
 Project.toml     |   3 +-
 src/interface.jl | 146 +++++++++++++++++++++++------------------------
 test/runtest.jl  |   2 +-
 4 files changed, 95 insertions(+), 100 deletions(-)

diff --git a/Manifest.toml b/Manifest.toml
index 7650e13..8eb5fef 100644
--- a/Manifest.toml
+++ b/Manifest.toml
@@ -1,8 +1,8 @@
 # This file is machine-generated - editing it directly is not advised
 
-julia_version = "1.11.2"
+julia_version = "1.11.3"
 manifest_format = "2.0"
-project_hash = "6e88822413ea4a623cd914d84de127dc6c57fceb"
+project_hash = "9e0d7dca51b949f2ffa5477b895b90988ec62529"
 
 [[deps.AliasTables]]
 deps = ["PtrArrays", "Random"]
@@ -120,9 +120,9 @@ version = "1.11.0"
 
 [[deps.Distributions]]
 deps = ["AliasTables", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"]
-git-tree-sha1 = "3101c32aab536e7a27b1763c0797dba151b899ad"
+git-tree-sha1 = "03aa5d44647eaec98e1920635cdfed5d5560a8b9"
 uuid = "31c24e10-a181-5473-b8eb-7969acd0382f"
-version = "0.25.113"
+version = "0.25.117"
 
     [deps.Distributions.extensions]
     DistributionsChainRulesCoreExt = "ChainRulesCore"
@@ -200,11 +200,9 @@ version = "1.11.0"
 
 [[deps.GeneralUtils]]
 deps = ["CSV", "DataFrames", "DataStructures", "Dates", "Distributions", "JSON3", "MQTTClient", "PrettyPrinting", "Random", "SHA", "UUIDs"]
-git-tree-sha1 = "978d9a5c3fc30205dd72d4a2a2ed4fa85ebee5cf"
-repo-rev = "main"
-repo-url = "https://git.yiem.cc/ton/GeneralUtils"
+path = "../GeneralUtils"
 uuid = "c6c72f09-b708-4ac8-ac7c-2084d70108fe"
-version = "0.1.0"
+version = "0.2.2"
 
 [[deps.HTTP]]
 deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "PrecompileTools", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"]
@@ -214,9 +212,9 @@ version = "1.10.13"
 
 [[deps.HypergeometricFunctions]]
 deps = ["LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"]
-git-tree-sha1 = "b1c2585431c382e3fe5805874bda6aea90a95de9"
+git-tree-sha1 = "2bd56245074fab4015b9174f24ceba8293209053"
 uuid = "34004b35-14d8-5ef3-9330-4cdb6864b03a"
-version = "0.3.25"
+version = "0.3.27"
 
 [[deps.ICU_jll]]
 deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
@@ -305,12 +303,10 @@ uuid = "b39eb1a6-c29a-53d7-8c32-632cd16f18da"
 version = "1.19.3+0"
 
 [[deps.LLMMCTS]]
-deps = ["GeneralUtils", "JSON3"]
-git-tree-sha1 = "c8ad9715e78bbd19f5ac79e1f1cacf85f141449d"
-repo-rev = "main"
-repo-url = "https://git.yiem.cc/ton/LLMMCTS"
+deps = ["GeneralUtils", "JSON3", "PrettyPrinting"]
+path = "../LLMMCTS"
 uuid = "d76c5a4d-449e-4835-8cc4-dd86ec44f241"
-version = "0.1.2"
+version = "0.1.3"
 
 [[deps.LaTeXStrings]]
 git-tree-sha1 = "dda21b8cbd6a6c40d9d02a73230f9d70fed6918c"
@@ -493,7 +489,7 @@ version = "3.0.15+1"
 deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
 git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
 uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
-version = "0.5.5+0"
+version = "0.5.5+2"
 
 [[deps.OrderedCollections]]
 git-tree-sha1 = "12f1439c4f986bb868acda6ea33ebc78e19b95ad"
@@ -502,9 +498,9 @@ version = "1.7.0"
 
 [[deps.PDMats]]
 deps = ["LinearAlgebra", "SparseArrays", "SuiteSparse"]
-git-tree-sha1 = "949347156c25054de2db3b166c52ac4728cbad65"
+git-tree-sha1 = "966b85253e959ea89c53a9abebbf2e964fbf593b"
 uuid = "90014a1f-27ba-587c-ab20-58faa44d9150"
-version = "0.11.31"
+version = "0.11.32"
 
 [[deps.Parsers]]
 deps = ["Dates", "PrecompileTools", "UUIDs"]
@@ -556,15 +552,15 @@ uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
 version = "1.11.0"
 
 [[deps.PtrArrays]]
-git-tree-sha1 = "77a42d78b6a92df47ab37e177b2deac405e1c88f"
+git-tree-sha1 = "1d36ef11a9aaf1e8b74dacc6a731dd1de8fd493d"
 uuid = "43287f4e-b6f4-7ad1-bb20-aadabca52c3d"
-version = "1.2.1"
+version = "1.3.0"
 
 [[deps.QuadGK]]
 deps = ["DataStructures", "LinearAlgebra"]
-git-tree-sha1 = "cda3b045cf9ef07a08ad46731f5a3165e56cf3da"
+git-tree-sha1 = "9da16da70037ba9d701192e27befedefb91ec284"
 uuid = "1fd47b50-473d-5c70-9696-f719f8f3bcdc"
-version = "2.11.1"
+version = "2.11.2"
 
     [deps.QuadGK.extensions]
     QuadGKEnzymeExt = "Enzyme"
@@ -664,9 +660,9 @@ version = "1.11.0"
 
 [[deps.SpecialFunctions]]
 deps = ["IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"]
-git-tree-sha1 = "2f5d4697f21388cbe1ff299430dd169ef97d7e14"
+git-tree-sha1 = "64cca0c26b4f31ba18f13f6c12af7c85f478cfde"
 uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
-version = "2.4.0"
+version = "2.5.0"
 
     [deps.SpecialFunctions.extensions]
     SpecialFunctionsChainRulesCoreExt = "ChainRulesCore"
diff --git a/Project.toml b/Project.toml
index 037aded..568883e 100644
--- a/Project.toml
+++ b/Project.toml
@@ -23,5 +23,4 @@ URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
 UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
 
 [compat]
-GeneralUtils = "0.1, 0.2"
-LLMMCTS = "0.1"
+Dates = "1.11.0"
diff --git a/src/interface.jl b/src/interface.jl
index 0624a74..75b6bc2 100644
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -2,7 +2,7 @@ module interface
 
 export decisionMaker, evaluator, reflector, transition, query
 
-using LibPQ, DataStructures, JSON3, UUIDs, PrettyPrinting
+using LibPQ, DataStructures, JSON3, UUIDs, PrettyPrinting, Dates
 using GeneralUtils, LLMMCTS
 using ..util, ..llmfunction
 
@@ -136,13 +136,16 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
     You are a helpful assistant that find the data from a database to satisfy the user's query. 
     You are also eager to improve your helpfulness.
 
+    For your information:
+    - Observation: Result of the immediately preceding action
+
     At each round of conversation, the user will give you the current situation:
     User Query: ...
     Example: ...
     Your Q&A: ...
     Your work progress: ...
-    Evaluation: Evaluation of the latest action and observation
-    Suggestion: ...
+    Evaluation: Evaluation of the immediately preceding action and observation
+    Suggestion: Suggestion for the immediately preceding action and observation
     
     You must follow the following guidelines:
     - Keep SQL queries focused only on the provided information.
@@ -155,21 +158,17 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
     - Text information in the database usually stored in lower case. If your search returns empty, try using lower case to search.
 
     You should then respond to the user with interleaving Understanding, Reasoning, Plan, Action:
-    1) Understanding: 
-      - State your understanding about the current situation.
-    2) Reasoning: 
-        - State your step by step reasoning about the current situation.
-    3) Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
-    4) Action_name (Must be aligned with your plan): Can be one of the following functions: 
+    1) Comprehension: 
+      - State your comprehension about the current situation.
+    2) Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
+    3) Action_name (Must be aligned with your plan): Can be one of the following functions: 
         - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
           For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
           Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
-    5) Action_input: Input to the action
-    6) Observation: Result of the immediately preceding action
+    4) Action_input: Input to the action
 
     You should only respond in format as described below:
-    Understanding: ...
-    Reasoning: ...
+    Comprehension: ...
     Plan: ...
     Action_name: ...
     Action_input: ...
@@ -217,21 +216,38 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
     ]
 
     # put in model format
-    prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
-    prompt *=
-      """
-      <|start_header_id|>assistant<|end_header_id|>
-      """
+    prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
     response = text2textInstructLLM(prompt)
-    println("\nSQL decisionMaker() rawresponse: \n", response)
+
+    # LLM tends to generate observation given that it is in the input
+    response = 
+      if occursin("observation:", response)
+        string(split(response, "observation:")[1])
+      elseif occursin("Observation:", response)
+        string(split(response, "Observation:")[1])
+      elseif occursin("observation_", response)
+        string(split(response, "observation_")[1])
+      elseif occursin("Observation_", response)
+        string(split(response, "Observation_")[1])
+      else
+        response
+      end
+
+    # some time LLM output Plan_1: so we need to detect and replace topic numbering
+    regex = r"_[0-1000]+:"
+    matches = collect(eachmatch(regex, response))
+    for m in matches
+      response = replace(response, string(m.match)=>":") 
+    end
 
     if occursin("NULL", response)
       errornote = "\nSQL decisionMaker() NULL response is not allowed"
-      println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__)
+      println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
       continue
     end
 
-    header = ["Understanding", "Reasoning", "Plan", "Action_name", "Action_input"]
+    header = ["Comprehension:", "Plan:", "Action_name:", "Action_input:"]
+    dictkey = ["comprehension", "plan", "action_name", "action_input"]
 
     # detect if there are more than 1 key per categories
     count = GeneralUtils.countGivenWords(response, header)
@@ -241,7 +257,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
       keywordNumber = v
       if keywordNumber > 1
         errornote = "\nSQL query has duplicated keyword, $keyword"
-        println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__)
+        println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
         duplicateKeywordFlag = true
         break
       end
@@ -256,13 +272,13 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
       push!(kw, detected)
     end
     if nothing ∈ kw
-      println("Some keywords are missing, Required keywords=$header, Response keywords=$kw ", @__FILE__, ":", @__LINE__)
+      println("Some keywords are missing, Required keywords=$header, Response keywords=$kw ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
       continue  # try again next loop
     end
 
     # textToDict() search for action_input
-    responsedict = GeneralUtils.textToDict(response, header, 
-                rightmarker=":", symbolkey=true, lowercasekey=true)
+    responsedict = GeneralUtils.textToDict(response, header; 
+                                            dictKey=dictkey, symbolkey=true)
 
     delete!(responsedict, :observation)
 
@@ -281,32 +297,32 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
     toollist = ["TABLEINFO", "GETDATA"]
     if responsedict[:action_name] ∉ toollist
       errornote = "\nYou must only use the given functions"
-      println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__)
+      println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
       continue
     end
 
     for i in toollist
       if occursin(i, responsedict[:action_input])
         errornote = "\n action_name is in action_input which is not allowed."
-        println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__)
+        println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
         continue
       end
     end
 
-    for i ∈ [:understanding, :reasoning, :plan, :action_name, :action_input]
+    for i ∈ [:comprehension, :plan, :action_name, :action_input]
       if length(JSON3.write(responsedict[i])) == 0
         errornote = "\n $i is empty"
-        println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__)
+        println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
         continue
       end
     end
 
     # check if there are more than 1 key per categories
-    for i ∈ [:understanding, :reasoning, :plan, :action_name, :action_input]
+    for i ∈ [:comprehension, :plan, :action_name, :action_input]
       matchkeys = GeneralUtils.findMatchingDictKey(responsedict, i)
       if length(matchkeys) > 1
         errornote = "\n $i has more than one key"
-        println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__)
+        println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
         continue
       end
     end
@@ -316,7 +332,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
     return responsedict
 
   end
-  error("DecisionMaker failed to generate a thought ", response)
+  error("DecisionMaker failed to generate a thought \n", response)
 end
 
 """ Assigns a scalar value to each new child node to be used for selec-
@@ -552,7 +568,7 @@ julia>
 #     ]
 
 #     # put in model format
-#     prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
+#     prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
 #     prompt *=
 #       """
 #       <|start_header_id|>assistant<|end_header_id|>
@@ -603,7 +619,7 @@ julia>
 #         # evaluation score as reward because different answers hold different value for the user.
 #         state[:reward] = responsedict[:score]
 #       end
-#       println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__)
+#       println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
 #       pprintln(Dict(responsedict))
 
 #       return responsedict[:score]
@@ -772,7 +788,7 @@ function evaluator(state::T1, text2textInstructLLM::Function;
 
   <At each round of conversation, the user will give you>
   Trajectory: ...
-  Error note: error note from your previous attempt
+  Error_note: error note from your previous attempt
   </At each round of conversation, the user will give you>
 
   <You must follow the following guidelines>
@@ -828,7 +844,7 @@ function evaluator(state::T1, text2textInstructLLM::Function;
     usermsg =
     """
     Trajectory: $thoughthistory
-    Error note: $errornote
+    Error_note: $errornote
     """
 
     _prompt = 
@@ -838,20 +854,18 @@ function evaluator(state::T1, text2textInstructLLM::Function;
     ]
 
     # put in model format
-    prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
-    prompt *=
-      """
-      <|start_header_id|>assistant<|end_header_id|>
-      """
+    prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
 
-    header = ["Trajectory_evaluation", "Answer_evaluation", "Accepted_as_answer", "Score", "Suggestion"]
+    header = ["Trajectory_evaluation:", "Answer_evaluation:", "Accepted_as_answer:", "Score:", "Suggestion:"]
+    dictkey = ["trajectory_evaluation", "answer_evaluation", "accepted_as_answer", "score", "suggestion"]
 
     response = text2textInstructLLM(prompt)
+
     # make sure every header is in the response
     for i in header
       detected = GeneralUtils.detect_keyword(i, response)
       if detected === nothing
-        errornote = "Keyword $i not found in response"
+        errornote = "Your previous response didn't provide $i"
         errorFlag = true
       end
     end
@@ -859,9 +873,8 @@ function evaluator(state::T1, text2textInstructLLM::Function;
       continue  # skip to the next iteration
     end
 
-    responsedict = GeneralUtils.textToDict(response,
-      header; 
-      rightmarker=":", symbolkey=true, lowercasekey=true)
+    responsedict = GeneralUtils.textToDict(response, header; 
+                                            dictKey=dictkey, symbolkey=true)
 
     # check if dict has all required value
     trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation]
@@ -892,12 +905,12 @@ function evaluator(state::T1, text2textInstructLLM::Function;
       # evaluation score as reward because different answers hold different value for the user.
       state[:reward] = responsedict[:score]
     end
-    println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__)
+    println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
     pprintln(Dict(responsedict))
 
     return responsedict[:score]
   end
-  error("evaluator failed to generate an evaluation")
+  error("Evaluator failed to generate an evaluation, Response: \n$response\n<|End of error|>")
 end
 
 """
@@ -995,15 +1008,9 @@ function reflector(config::T1, state::T2)::String where {T1<:AbstractDict, T2<:A
   ]
 
   # put in model format
-  prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
-  prompt *=
-    """
-    <|start_header_id|>assistant<|end_header_id|>
-    """
-
+  prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
   externalService = config[:externalservice][:text2textinstruct]
 
-
   # apply LLM specific instruct format
   externalService = config[:externalservice][:text2textinstruct]
 
@@ -1271,8 +1278,8 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
   earlystop(state) = state[:reward] >= 8 ? true : false
 
   _, _, resultState = LLMMCTS.runMCTS(initialstate, transition, transitionargs;
-                      horizontalSampleExpansionPhase=5, 
-                      horizontalSampleSimulationPhase=2,
+                      horizontalSampleExpansionPhase=1, 
+                      horizontalSampleSimulationPhase=1,
                       maxSimulationDepth=10, maxiterations=2, 
                       explorationweight=1.0,
                       earlystop=earlystop,
@@ -1314,7 +1321,7 @@ function makeNewState(currentstate::T1, thoughtDict::T4, rawresponse, response::
   reward::T3, isterminal::Bool
   )::NamedTuple{(:newNodeKey, :newstate), Tuple{String, Dict{Symbol, <:Any}}} where {T1<:AbstractDict, T2<:AbstractString, T3<:Number, T4<:AbstractDict}
   
-  keys = [:understanding, :reasoning, :action_name, :action_input, :observation]
+  keys = [:comprehension, :action_name, :action_input, :observation]
   # latestKeys = []
 
   currentstate_latestKey, currentstate_latestIndice = 
@@ -1418,7 +1425,6 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
 
   response = nothing # store for show when error msg show up
   errornote = ""
-  noise = ""
 
   for attempt in 1:10
     usermsg =
@@ -1428,7 +1434,6 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
     Example: $similarSQL
     Your work progress: $workprogress
     $errornote
-    $noise
     """
 
     _prompt = 
@@ -1438,11 +1443,7 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
     ]
 
     # put in model format
-    prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
-    prompt *=
-      """
-      <|start_header_id|>assistant<|end_header_id|>
-      """
+    prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
 
     try
       response = text2textInstructLLM(prompt)
@@ -1457,13 +1458,13 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
         response = replace(response, '`'=>"")
       end
 
-
-      # response = string(split(response, "Please")[1]) # LLM usually add comments which is no need.
-      responsedict = GeneralUtils.textToDict(response,
-                        ["Understanding", "Q1"], 
-                        rightmarker=":", symbolkey=true; lowercasekey=true)
+      header = ["Understanding:", "Q1:"]
+      dictkey = ["understanding", "q1"]
+      
+      responsedict = GeneralUtils.textToDict(response, header; 
+                                            dictKey=dictkey, symbolkey=true)
       response = "Q1: " * responsedict[:q1]
-      println("\n~~~ SQLLLM generatequestion() ", @__FILE__, ":", @__LINE__)
+      println("\n~~~ SQLLLM generatequestion() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
       pprintln(Dict(responsedict))
       return response
     catch e
@@ -1471,8 +1472,7 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
       showerror(io, e)
       errorMsg = String(take!(io))
       st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
-      println("\n~~~ SQLLLM generatequestion() Attempt $attempt. Error occurred: $errorMsg\n$st ", @__FILE__, ":", @__LINE__)
-      noise = GeneralUtils.randstrings(3, 5)
+      println("\n~~~ SQLLLM generatequestion() Attempt $attempt. Error occurred: $errorMsg\n$st ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
     end
   end
   error("generatequestion failed to generate a thought ", response)
diff --git a/test/runtest.jl b/test/runtest.jl
index f19a753..6825630 100644
--- a/test/runtest.jl
+++ b/test/runtest.jl
@@ -51,7 +51,7 @@ function text2textInstructLLM(prompt::String)
     )
   )
 
-  _response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=120)
+  _response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=120, maxattempt=3)
   response = _response[:response][:text]
 
   return response