update

2025-03-18 08:37:35 +07:00
parent e391547991
commit 7fd0d6269a
2 changed files with 106 additions and 107 deletions
--- a/src/llmfunction.jl
+++ b/src/llmfunction.jl
@@ -829,111 +829,115 @@ function compareState(query, highValueStateList)
      - The user's attempted actions and their corresponding results
    </At each round of conversation, you will be given the following>
    <You should then respond to the user with the following>
-      Selected_response: the number of the most accurate and relevant response
-      Rationale: a brief explanation of why you selected this response
+      Comparison: a comparison of the results from each attempt
+      Rationale: a brief explanation of why the selected response is the most accurate and relevant
+      Selected_response_number: the number the selected response in the list of results
+      
    </You should then respond to the user with the following>
    <You should only respond in format as described below>
-    - Selected_response: ...
-    - Rationale: ...
+      Comparison: ...
+      Rationale: ...
+      Selected_response_number: ...
    </You should only respond in format as described below>
    <Here are some examples>
    User's question: "How many German wines do you have?"
    Attempt 1:
-    Action: SELECT * FROM wines WHERE country = 'Germany'
+    Action: SELECT COUNT(*) FROM wines WHERE country = 'Germany'
    Result: 100 wines
    Attempt 2:
-    Action: SELECT * FROM wines WHERE country = 'Germany' AND type = 'Red'
+    Action: SELECT COUNT(*) FROM wines WHERE country = 'Germany' AND type = 'Red'
    Result: 50 red wines
-    Selected_response: 1
-    Rationale: The question is about German wines, so the most accurate response is the one that includes all German wines.
+    Comparison: The second attempt counts only German red wines while the first attempt includes all German wines.
+    Rationale: The user is asking for the number of German wines without specifying a type, so the most accurate response is the first attempt because it includes all German wines.
+    Selected_response_number: 1
    </Here are some examples>

    Let's begin!
  """

-  thoughthistory = ""
-  for (k, v) in state[:thoughtHistory]
-    thoughthistory *= "$k: $v\n" 
-  end
+  # thoughthistory = ""
+  # for (k, v) in state[:thoughtHistory]
+  #   thoughthistory *= "$k: $v\n" 
+  # end

-  errornote = ""
+  # errornote = ""

-  for attempt in 1:10
-    errorFlag = false
+  # for attempt in 1:10
+  #   errorFlag = false

-    usermsg =
-    """
-    Trajectory: $thoughthistory
-    Error_note: $errornote
-    """
+  #   usermsg =
+  #   """
+  #   Trajectory: $thoughthistory
+  #   Error_note: $errornote
+  #   """

-    _prompt = 
-    [
-      Dict(:name=> "system", :text=> systemmsg),
-      Dict(:name=> "user", :text=> usermsg)
-    ]
+  #   _prompt = 
+  #   [
+  #     Dict(:name=> "system", :text=> systemmsg),
+  #     Dict(:name=> "user", :text=> usermsg)
+  #   ]

-    # put in model format
-    prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
+  #   # put in model format
+  #   prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")

-    header = ["Trajectory_evaluation:", "Answer_evaluation:", "Accepted_as_answer:", "Score:", "Suggestion:"]
-    dictkey = ["trajectory_evaluation", "answer_evaluation", "accepted_as_answer", "score", "suggestion"]
+  #   header = ["Trajectory_evaluation:", "Answer_evaluation:", "Accepted_as_answer:", "Score:", "Suggestion:"]
+  #   dictkey = ["trajectory_evaluation", "answer_evaluation", "accepted_as_answer", "score", "suggestion"]

-    response = text2textInstructLLM(prompt)
+  #   response = text2textInstructLLM(prompt)

-    # sometime LLM output something like **Comprehension**: which is not expected
-    response = replace(response, "**"=>"")
-    response = replace(response, "***"=>"")
+  #   # sometime LLM output something like **Comprehension**: which is not expected
+  #   response = replace(response, "**"=>"")
+  #   response = replace(response, "***"=>"")

-    # make sure every header is in the response
-    for i in header
-      detected = GeneralUtils.detect_keyword(i, response)
-      if detected === nothing
-        errornote = "Your previous response didn't provide $i"
-        errorFlag = true
-      end
-    end
-    if errorFlag
-      continue  # skip to the next iteration
-    end
+  #   # make sure every header is in the response
+  #   for i in header
+  #     detected = GeneralUtils.detect_keyword(i, response)
+  #     if detected === nothing
+  #       errornote = "Your previous response didn't provide $i"
+  #       errorFlag = true
+  #     end
+  #   end
+  #   if errorFlag
+  #     continue  # skip to the next iteration
+  #   end

-    responsedict = GeneralUtils.textToDict(response, header; 
-                                            dictKey=dictkey, symbolkey=true)
+  #   responsedict = GeneralUtils.textToDict(response, header; 
+  #                                           dictKey=dictkey, symbolkey=true)
    
-    responsedict[:score] = responsedict[:score][1]  # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
-    try
-      responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
-    catch
-      continue
-    end
+  #   responsedict[:score] = responsedict[:score][1]  # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
+  #   try
+  #     responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
+  #   catch
+  #     continue
+  #   end

-    accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
+  #   accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]

-    if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
-      error("generated accepted_as_answer has wrong format")
-    end
+  #   if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
+  #     error("generated accepted_as_answer has wrong format")
+  #   end

-    # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
-    state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])" 
-    state[:evaluationscore] = responsedict[:score] 
-    state[:accepted_as_answer] = responsedict[:accepted_as_answer]
-    state[:suggestion] = responsedict[:suggestion]
+  #   # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
+  #   state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])" 
+  #   state[:evaluationscore] = responsedict[:score] 
+  #   state[:accepted_as_answer] = responsedict[:accepted_as_answer]
+  #   state[:suggestion] = responsedict[:suggestion]

-    # mark as terminal state when the answer is achieved
-    if accepted_as_answer == "Yes"
+  #   # mark as terminal state when the answer is achieved
+  #   if accepted_as_answer == "Yes"

-      # mark the state as terminal state because the evaluation say so.
-      state[:isterminal] = true
+  #     # mark the state as terminal state because the evaluation say so.
+  #     state[:isterminal] = true

-      # evaluation score as reward because different answers hold different value for the user.
-      state[:reward] = responsedict[:score]
-    end
-    println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
-    pprintln(Dict(responsedict))
+  #     # evaluation score as reward because different answers hold different value for the user.
+  #     state[:reward] = responsedict[:score]
+  #   end
+  #   println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
+  #   pprintln(Dict(responsedict))

-    return responsedict[:score]
-  end
-  error("Evaluator failed to generate an evaluation, Response: \n$response\n<|End of error|>")
+  #   return responsedict[:score]
+  # end
+  # error("Evaluator failed to generate an evaluation, Response: \n$response\n<|End of error|>")
 end