update
This commit is contained in:
@@ -829,111 +829,115 @@ function compareState(query, highValueStateList)
|
||||
- The user's attempted actions and their corresponding results
|
||||
</At each round of conversation, you will be given the following>
|
||||
<You should then respond to the user with the following>
|
||||
Selected_response: the number of the most accurate and relevant response
|
||||
Rationale: a brief explanation of why you selected this response
|
||||
Comparison: a comparison of the results from each attempt
|
||||
Rationale: a brief explanation of why the selected response is the most accurate and relevant
|
||||
Selected_response_number: the number the selected response in the list of results
|
||||
|
||||
</You should then respond to the user with the following>
|
||||
<You should only respond in format as described below>
|
||||
- Selected_response: ...
|
||||
- Rationale: ...
|
||||
Comparison: ...
|
||||
Rationale: ...
|
||||
Selected_response_number: ...
|
||||
</You should only respond in format as described below>
|
||||
<Here are some examples>
|
||||
User's question: "How many German wines do you have?"
|
||||
Attempt 1:
|
||||
Action: SELECT * FROM wines WHERE country = 'Germany'
|
||||
Action: SELECT COUNT(*) FROM wines WHERE country = 'Germany'
|
||||
Result: 100 wines
|
||||
Attempt 2:
|
||||
Action: SELECT * FROM wines WHERE country = 'Germany' AND type = 'Red'
|
||||
Action: SELECT COUNT(*) FROM wines WHERE country = 'Germany' AND type = 'Red'
|
||||
Result: 50 red wines
|
||||
Selected_response: 1
|
||||
Rationale: The question is about German wines, so the most accurate response is the one that includes all German wines.
|
||||
Comparison: The second attempt counts only German red wines while the first attempt includes all German wines.
|
||||
Rationale: The user is asking for the number of German wines without specifying a type, so the most accurate response is the first attempt because it includes all German wines.
|
||||
Selected_response_number: 1
|
||||
</Here are some examples>
|
||||
|
||||
Let's begin!
|
||||
"""
|
||||
|
||||
thoughthistory = ""
|
||||
for (k, v) in state[:thoughtHistory]
|
||||
thoughthistory *= "$k: $v\n"
|
||||
end
|
||||
# thoughthistory = ""
|
||||
# for (k, v) in state[:thoughtHistory]
|
||||
# thoughthistory *= "$k: $v\n"
|
||||
# end
|
||||
|
||||
errornote = ""
|
||||
# errornote = ""
|
||||
|
||||
for attempt in 1:10
|
||||
errorFlag = false
|
||||
# for attempt in 1:10
|
||||
# errorFlag = false
|
||||
|
||||
usermsg =
|
||||
"""
|
||||
Trajectory: $thoughthistory
|
||||
Error_note: $errornote
|
||||
"""
|
||||
# usermsg =
|
||||
# """
|
||||
# Trajectory: $thoughthistory
|
||||
# Error_note: $errornote
|
||||
# """
|
||||
|
||||
_prompt =
|
||||
[
|
||||
Dict(:name=> "system", :text=> systemmsg),
|
||||
Dict(:name=> "user", :text=> usermsg)
|
||||
]
|
||||
# _prompt =
|
||||
# [
|
||||
# Dict(:name=> "system", :text=> systemmsg),
|
||||
# Dict(:name=> "user", :text=> usermsg)
|
||||
# ]
|
||||
|
||||
# put in model format
|
||||
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
|
||||
# # put in model format
|
||||
# prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
|
||||
|
||||
header = ["Trajectory_evaluation:", "Answer_evaluation:", "Accepted_as_answer:", "Score:", "Suggestion:"]
|
||||
dictkey = ["trajectory_evaluation", "answer_evaluation", "accepted_as_answer", "score", "suggestion"]
|
||||
# header = ["Trajectory_evaluation:", "Answer_evaluation:", "Accepted_as_answer:", "Score:", "Suggestion:"]
|
||||
# dictkey = ["trajectory_evaluation", "answer_evaluation", "accepted_as_answer", "score", "suggestion"]
|
||||
|
||||
response = text2textInstructLLM(prompt)
|
||||
# response = text2textInstructLLM(prompt)
|
||||
|
||||
# sometime LLM output something like **Comprehension**: which is not expected
|
||||
response = replace(response, "**"=>"")
|
||||
response = replace(response, "***"=>"")
|
||||
# # sometime LLM output something like **Comprehension**: which is not expected
|
||||
# response = replace(response, "**"=>"")
|
||||
# response = replace(response, "***"=>"")
|
||||
|
||||
# make sure every header is in the response
|
||||
for i in header
|
||||
detected = GeneralUtils.detect_keyword(i, response)
|
||||
if detected === nothing
|
||||
errornote = "Your previous response didn't provide $i"
|
||||
errorFlag = true
|
||||
end
|
||||
end
|
||||
if errorFlag
|
||||
continue # skip to the next iteration
|
||||
end
|
||||
# # make sure every header is in the response
|
||||
# for i in header
|
||||
# detected = GeneralUtils.detect_keyword(i, response)
|
||||
# if detected === nothing
|
||||
# errornote = "Your previous response didn't provide $i"
|
||||
# errorFlag = true
|
||||
# end
|
||||
# end
|
||||
# if errorFlag
|
||||
# continue # skip to the next iteration
|
||||
# end
|
||||
|
||||
responsedict = GeneralUtils.textToDict(response, header;
|
||||
dictKey=dictkey, symbolkey=true)
|
||||
# responsedict = GeneralUtils.textToDict(response, header;
|
||||
# dictKey=dictkey, symbolkey=true)
|
||||
|
||||
responsedict[:score] = responsedict[:score][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
|
||||
try
|
||||
responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
|
||||
catch
|
||||
continue
|
||||
end
|
||||
# responsedict[:score] = responsedict[:score][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
|
||||
# try
|
||||
# responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
|
||||
# catch
|
||||
# continue
|
||||
# end
|
||||
|
||||
accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
|
||||
# accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
|
||||
|
||||
if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
|
||||
error("generated accepted_as_answer has wrong format")
|
||||
end
|
||||
# if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
|
||||
# error("generated accepted_as_answer has wrong format")
|
||||
# end
|
||||
|
||||
# add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
|
||||
state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])"
|
||||
state[:evaluationscore] = responsedict[:score]
|
||||
state[:accepted_as_answer] = responsedict[:accepted_as_answer]
|
||||
state[:suggestion] = responsedict[:suggestion]
|
||||
# # add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
|
||||
# state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])"
|
||||
# state[:evaluationscore] = responsedict[:score]
|
||||
# state[:accepted_as_answer] = responsedict[:accepted_as_answer]
|
||||
# state[:suggestion] = responsedict[:suggestion]
|
||||
|
||||
# mark as terminal state when the answer is achieved
|
||||
if accepted_as_answer == "Yes"
|
||||
# # mark as terminal state when the answer is achieved
|
||||
# if accepted_as_answer == "Yes"
|
||||
|
||||
# mark the state as terminal state because the evaluation say so.
|
||||
state[:isterminal] = true
|
||||
# # mark the state as terminal state because the evaluation say so.
|
||||
# state[:isterminal] = true
|
||||
|
||||
# evaluation score as reward because different answers hold different value for the user.
|
||||
state[:reward] = responsedict[:score]
|
||||
end
|
||||
println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
|
||||
pprintln(Dict(responsedict))
|
||||
# # evaluation score as reward because different answers hold different value for the user.
|
||||
# state[:reward] = responsedict[:score]
|
||||
# end
|
||||
# println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
|
||||
# pprintln(Dict(responsedict))
|
||||
|
||||
return responsedict[:score]
|
||||
end
|
||||
error("Evaluator failed to generate an evaluation, Response: \n$response\n<|End of error|>")
|
||||
# return responsedict[:score]
|
||||
# end
|
||||
# error("Evaluator failed to generate an evaluation, Response: \n$response\n<|End of error|>")
|
||||
end
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user