This commit is contained in:
2025-03-14 21:57:27 +07:00
parent 696a77a483
commit 323c232121
2 changed files with 12 additions and 14 deletions

View File

@@ -395,7 +395,6 @@ function evaluator(state::T1, text2textInstructLLM::Function
- Do not generate additional thoughts or actions.
2) Answer_evaluation:
- Focus only on the matter mentioned in the question and comprehensively analyze how the latest observation's details addresses the question
- State your rationale
3) Accepted_as_answer: Decide whether the latest observation's content answers the question. Can be "Yes" or "No"
Bad example (The observation didn't answers the question):
question: Find cars with 4 wheels.
@@ -471,16 +470,15 @@ function evaluator(state::T1, text2textInstructLLM::Function
responsedict = GeneralUtils.textToDict(response, header;
dictKey=dictkey, symbolkey=true)
# check if dict has all required value
trajectoryevaluation_text::AbstractString = responsedict[:trajectory_evaluation]
answerevaluation_text::AbstractString = responsedict[:answer_evaluation]
# responsedict[:score] = replace(responsedict[:score], r"\(.*?\)" => "") # remove (...) if there is any.
responsedict[:score] = responsedict[:score][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
score::Integer = responsedict[:score]
try
responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
catch
continue
end
accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
suggestion::AbstractString = responsedict[:suggestion]
if accepted_as_answer ["Yes", "No"] # [PENDING] add errornote into the prompt
error("generated accepted_as_answer has wrong format")
@@ -872,9 +870,9 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
earlystop(state) = state[:reward] >= 8 ? true : false
_, _, resultState = LLMMCTS.runMCTS(initialstate, transition, transitionargs;
horizontalSampleExpansionPhase=3,
horizontalSampleExpansionPhase=5,
horizontalSampleSimulationPhase=2,
maxSimulationDepth=10,
maxSimulationDepth=5,
maxiterations=1,
explorationweight=1.0,
earlystop=earlystop,