module interface
export decisionMaker, evaluator, reflector, transition, query
using LibPQ, DataStructures, JSON3, UUIDs, PrettyPrinting, Dates
using GeneralUtils, LLMMCTS
using ..util, ..llmfunction
# ---------------------------------------------- 100 --------------------------------------------- #
""" Think and choose action.
# Arguments
- `state::T2`
A game state
- `context`
A context that will be added to decisionMaker
- `text2textInstructLLM::Function`
A function that handles communication to LLM service
# Return
- `thoughtDict::Dict{Symbol, Any}`
# Example
```jldoctest
julia> using SQLLLM, GeneralUtils, UUIDs, DataStructures, PrettyPrinting
julia> state = Dict(
:isterminal => false,
:lesson => nothing,
:reward => 0,
:evaluation => "None",
:accepted_as_answer => "No",
:thoughtHistory => OrderedDict{Symbol, Any}(:question => "How many wines do you have that can be paired with lamb?"),
:evaluationscore => 0,
:suggestion => "None"
)
julia> context = Dict(:tablelist=> "None")
julia> function text2textInstructLLM(prompt::String)
config = Dict(
:mqttServerInfo => Dict(
:description => "mqtt server info",
:port => 1883,
:broker => "mqtt.yiem.cc"
),
:externalservice => Dict(
:text2textinstruct => Dict(
:mqtttopic => "/loadbalancer/requestingservice",
:description => "text to text service with instruct LLM",
:llminfo => Dict(:name => "llama3instruct")
),
)
)
# apply LLM specific instruct format
externalService = config[:externalservice][:text2textinstruct]
msgMeta = GeneralUtils.generate_msgMeta(
externalService[:mqtttopic],
senderName= "SQLLLM",
senderId= string(uuid4()),
receiverName= "text2textinstruct",
mqttBroker= config[:mqttServerInfo][:broker],
mqttBrokerPort= config[:mqttServerInfo][:port],
)
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> Dict(
:text=> prompt,
:kwargs=> Dict(
:max_tokens=> 512,
:stop=> ["<|eot_id|>"],
:temperature=> 0.2,
)
)
)
_response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
response = _response[:response][:text]
return response
end
julia> result = SQLLLM.decisionMaker(state, context, text2textInstructLLM)
julia> pprintln(result)
Dict(
:action_input => "[\"wine_food\"]",
:thought =>
"Since the user is asking about wine pairing, I need to find a way to connect the \"wine\" and \"food\" tables. The \"wine_food\" table seems like a good starting point.",
:plan =>
"First, I'll get information about the \"wine_food\" table to see how it relates to the other two tables. Then, I'll use this information to craft an instruction that retrieves the wines that can be paired with lamb.",
:observation => "[{\"name\": \"wine_food\", \"columns\": [\"wine_id\", \"food_id\"]}]",
:action_name => "TABLEINFO"
)
```
# TODO
- [] implement RAG to pull similar experience
# Signature
"""
function decisionMaker(state::T1, context, text2textInstructLLM::Function,
; querySQLVectorDBF::Union{T2, Nothing}=nothing
)::Dict{Symbol, Any} where {T1<:AbstractDict, T2<:Function}
# lessonDict =
# if isfile("lesson.json")
# lessonDict = copy(JSON3.read("lesson.json"))
# else
# lessonDict = nothing
# end
# lessonDict = nothing
# lesson =
# if lessonDict === nothing
# ""
# else
# """
# You have attempted to help the user before and failed, either because your reasoning for the
# recommendation was incorrect or your response did not exactly match the user expectation.
# The following lesson(s) give a plan to avoid failing to help the user in the same way you
# did previously. Use them to improve your strategy to help the user.
# Here are some lessons in JSON format:
# $(JSON3.write(lessonDict))
# When providing the thought and action for the current trial, that into account these failed
# trajectories and make sure not to repeat the same mistakes and incorrect answers.
# """
# end
systemmsg =
"""
You are a helpful assistant that find the data from a database to satisfy the user's query.
You are also eager to improve your helpfulness.
For your information:
- Observation: Result of the immediately preceding action
At each round of conversation, the user will give you the following:
User Query: ...
Example: ...
Your Q&A: ...
Your work progress: ...
Evaluation: Evaluation of the immediately preceding action and observation
Suggestion: Suggestion for the immediately preceding action and observation
You must follow the following guidelines:
- Keep SQL queries focused only on the provided information.
You should follow the following guidelines:
- Do not create any table in the database
- A junction table can be used to link tables together. Another use case is for filtering data.
- If you can't find a single table that can be used to answer the user's query, try joining multiple tables to see if you can obtain the answer.
- If you are unable to find the requested information, kindly inform the user, "The current data in our database does not provide the specific answer to your query".
- Text information in the database usually stored in lower case. If your search returns empty, try using lower case to search.
You should then respond to the user with interleaving Understanding, Reasoning, Plan, Action:
1) Comprehension:
- State your comprehension about the current situation.
2) Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
3) Action_name (Must be aligned with your plan): Can be one of the following functions:
- RUNSQL, which you can use to execute SQL against the database. Action_input for this function must be a single SQL query to be executed against the database.
For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
4) Action_input: Input to the action
You should only respond in format as described below:
Comprehension: ...
Plan: ...
Action_name: ...
Action_input: ...
Let's begin!
"""
workprogress = ""
for (k, v) in state[:thoughtHistory]
if k ∉ [:question]
workprogress *= "$k: $v\n"
end
end
response = nothing # store for show when error msg show up
errornote = ""
# provide similar sql only for the first attempt
similarSQL_ = "None"
if length(state[:thoughtHistory]) == 1
sql, distance = querySQLVectorDBF(state[:thoughtHistory][:question])
similarSQL_ = sql !== nothing ? sql : "None"
end
for attempt in 1:10
QandA = generatequestion(state, context, text2textInstructLLM; similarSQL=similarSQL_)
usermsg =
"""
$(context[:tablelist])
User query: $(state[:thoughtHistory][:question])
Example: $similarSQL_
Your Q&A: $QandA
Your work progress: $workprogress
Evaluation: $(state[:evaluation])
Suggestion: $(state[:suggestion])
$errornote
"""
_prompt =
[
Dict(:name=> "system", :text=> systemmsg),
Dict(:name=> "user", :text=> usermsg)
]
# put in model format
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
response = text2textInstructLLM(prompt)
# LLM tends to generate observation given that it is in the input
response =
if occursin("observation:", response)
string(split(response, "observation:")[1])
elseif occursin("Observation:", response)
string(split(response, "Observation:")[1])
elseif occursin("observation_", response)
string(split(response, "observation_")[1])
elseif occursin("Observation_", response)
string(split(response, "Observation_")[1])
else
response
end
# sometime LLM output something like **Comprehension**: which is not expected
response = replace(response, "**"=>"")
response = replace(response, "***"=>"")
# some time LLM output Plan_1: so we need to detect and replace topic numbering
regex = r"_[0-1000]+:"
matches = collect(eachmatch(regex, response))
for m in matches
response = replace(response, string(m.match)=>":")
end
if occursin("NULL", response)
errornote = "\nSQL decisionMaker() NULL response is not allowed"
println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
header = ["Comprehension:", "Plan:", "Action_name:", "Action_input:"]
dictkey = ["comprehension", "plan", "action_name", "action_input"]
# detect if there are more than 1 key per categories
wordcount = GeneralUtils.countGivenWords(response, header)
duplicateKeywordFlag = false
for (i, v) in enumerate(wordcount)
keyword = header[i]
keywordNumber = v
if keywordNumber > 1
errornote = "\nSQL query has duplicated keyword, $keyword"
println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
duplicateKeywordFlag = true
break
end
end
duplicateKeywordFlag == true ? continue : nothing
# check whether response has all header
kw = []
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
for keyword in header
detected = GeneralUtils.detect_keyword(keyword, response)
push!(kw, detected)
end
if nothing ∈ kw
println("Some keywords are missing, Required keywords=$header, Response keywords=$kw ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue # try again next loop
end
# textToDict() search for action_input
responsedict = GeneralUtils.textToDict(response, header;
dictKey=dictkey, symbolkey=true)
delete!(responsedict, :observation)
# remove backticks Error occurred: MethodError: no method matching occursin(::String, ::Vector{String})
if occursin("```", responsedict[:action_input])
sql = GeneralUtils.extract_triple_backtick_text(responsedict[:action_input])[1]
if sql[1:4] == "sql\n"
sql = sql[5:end]
end
sql = split(sql, ';') # some time there are comments in the sql
sql = sql[1] * ';'
responsedict[:action_input] = sql
end
toollist = ["TABLEINFO", "RUNSQL"]
if responsedict[:action_name] ∉ toollist
errornote = "\nYou must only use the given functions"
println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
for i in toollist
if occursin(i, responsedict[:action_input])
errornote = "\n action_name is in action_input which is not allowed."
println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
end
for i ∈ [:comprehension, :plan, :action_name, :action_input]
if length(JSON3.write(responsedict[i])) == 0
errornote = "\n $i is empty"
println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
end
# check if there are more than 1 key per categories
for i ∈ [:comprehension, :plan, :action_name, :action_input]
matchkeys = GeneralUtils.findMatchingDictKey(responsedict, i)
if length(matchkeys) > 1
errornote = "\n $i has more than one key"
println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
end
state[:decisionMaker] = responsedict
return responsedict
end
error("DecisionMaker failed to generate a thought \n", response)
end
# function decisionMaker(state::T1, context, text2textInstructLLM::Function,
# ; querySQLVectorDBF::Union{T2, Nothing}=nothing
# )::Dict{Symbol, Any} where {T1<:AbstractDict, T2<:Function}
# # lessonDict =
# # if isfile("lesson.json")
# # lessonDict = copy(JSON3.read("lesson.json"))
# # else
# # lessonDict = nothing
# # end
# # lessonDict = nothing
# # lesson =
# # if lessonDict === nothing
# # ""
# # else
# # """
# # You have attempted to help the user before and failed, either because your reasoning for the
# # recommendation was incorrect or your response did not exactly match the user expectation.
# # The following lesson(s) give a plan to avoid failing to help the user in the same way you
# # did previously. Use them to improve your strategy to help the user.
# # Here are some lessons in JSON format:
# # $(JSON3.write(lessonDict))
# # When providing the thought and action for the current trial, that into account these failed
# # trajectories and make sure not to repeat the same mistakes and incorrect answers.
# # """
# # end
# systemmsg =
# """
# You are a helpful assistant that find the data from a database to satisfy the user's query.
# You are also eager to improve your helpfulness.
# For your information:
# - Observation: Result of the immediately preceding action
# At each round of conversation, the user will give you the current situation:
# User Query: ...
# Example: ...
# Your Q&A: ...
# Your work progress: ...
# Evaluation: Evaluation of the immediately preceding action and observation
# Suggestion: Suggestion for the immediately preceding action and observation
# You must follow the following guidelines:
# - Keep SQL queries focused only on the provided information.
# You should follow the following guidelines:
# - Do not create any table in the database
# - A junction table can be used to link tables together. Another use case is for filtering data.
# - If you can't find a single table that can be used to answer the user's query, try joining multiple tables to see if you can obtain the answer.
# - If you are unable to find the requested information, kindly inform the user, "The current data in our database does not provide the specific answer to your query".
# - Text information in the database usually stored in lower case. If your search returns empty, try using lower case to search.
# You should then respond to the user with interleaving Understanding, Reasoning, Plan, Action:
# 1) Comprehension:
# - State your comprehension about the current situation.
# 2) Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
# 3) Action_name (Must be aligned with your plan): Can be one of the following functions:
# - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
# For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
# Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
# 4) Action_input: Input to the action
# You should only respond in format as described below:
# Comprehension: ...
# Plan: ...
# Action_name: ...
# Action_input: ...
# Let's begin!
# """
# workprogress = ""
# for (k, v) in state[:thoughtHistory]
# if k ∉ [:question]
# workprogress *= "$k: $v\n"
# end
# end
# response = nothing # store for show when error msg show up
# errornote = ""
# # provide similar sql only for the first attempt
# similarSQL_ = "None"
# if length(state[:thoughtHistory]) == 1
# sql, distance = querySQLVectorDBF(state[:thoughtHistory][:question])
# similarSQL_ = sql !== nothing ? sql : "None"
# end
# for attempt in 1:10
# QandA = generatequestion(state, context, text2textInstructLLM; similarSQL=similarSQL_)
# usermsg =
# """
# $(context[:tablelist])
# User query: $(state[:thoughtHistory][:question])
# Example: $similarSQL_
# Your Q&A: $QandA
# Your work progress: $workprogress
# Evaluation: $(state[:evaluation])
# Suggestion: $(state[:suggestion])
# $errornote
# """
# _prompt =
# [
# Dict(:name=> "system", :text=> systemmsg),
# Dict(:name=> "user", :text=> usermsg)
# ]
# # put in model format
# prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
# response = text2textInstructLLM(prompt)
# # LLM tends to generate observation given that it is in the input
# response =
# if occursin("observation:", response)
# string(split(response, "observation:")[1])
# elseif occursin("Observation:", response)
# string(split(response, "Observation:")[1])
# elseif occursin("observation_", response)
# string(split(response, "observation_")[1])
# elseif occursin("Observation_", response)
# string(split(response, "Observation_")[1])
# else
# response
# end
# # sometime LLM output something like **Comprehension**: which is not expected
# response = replace(response, "**"=>"")
# response = replace(response, "***"=>"")
# # some time LLM output Plan_1: so we need to detect and replace topic numbering
# regex = r"_[0-1000]+:"
# matches = collect(eachmatch(regex, response))
# for m in matches
# response = replace(response, string(m.match)=>":")
# end
# if occursin("NULL", response)
# errornote = "\nSQL decisionMaker() NULL response is not allowed"
# println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# continue
# end
# header = ["Comprehension:", "Plan:", "Action_name:", "Action_input:"]
# dictkey = ["comprehension", "plan", "action_name", "action_input"]
# # detect if there are more than 1 key per categories
# wordcount = GeneralUtils.countGivenWords(response, header)
# duplicateKeywordFlag = false
# for (i, v) in enumerate(wordcount)
# keyword = header[i]
# keywordNumber = v
# if keywordNumber > 1
# errornote = "\nSQL query has duplicated keyword, $keyword"
# println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# duplicateKeywordFlag = true
# break
# end
# end
# duplicateKeywordFlag == true ? continue : nothing
# # check whether response has all header
# kw = []
# # use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
# for keyword in header
# detected = GeneralUtils.detect_keyword(keyword, response)
# push!(kw, detected)
# end
# if nothing ∈ kw
# println("Some keywords are missing, Required keywords=$header, Response keywords=$kw ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# continue # try again next loop
# end
# # textToDict() search for action_input
# responsedict = GeneralUtils.textToDict(response, header;
# dictKey=dictkey, symbolkey=true)
# delete!(responsedict, :observation)
# # remove backticks Error occurred: MethodError: no method matching occursin(::String, ::Vector{String})
# if occursin("```", responsedict[:action_input])
# sql = GeneralUtils.extract_triple_backtick_text(responsedict[:action_input])[1]
# if sql[1:4] == "sql\n"
# sql = sql[5:end]
# end
# sql = split(sql, ';') # some time there are comments in the sql
# sql = sql[1] * ';'
# responsedict[:action_input] = sql
# end
# toollist = ["TABLEINFO", "GETDATA"]
# if responsedict[:action_name] ∉ toollist
# errornote = "\nYou must only use the given functions"
# println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# continue
# end
# for i in toollist
# if occursin(i, responsedict[:action_input])
# errornote = "\n action_name is in action_input which is not allowed."
# println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# continue
# end
# end
# for i ∈ [:comprehension, :plan, :action_name, :action_input]
# if length(JSON3.write(responsedict[i])) == 0
# errornote = "\n $i is empty"
# println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# continue
# end
# end
# # check if there are more than 1 key per categories
# for i ∈ [:comprehension, :plan, :action_name, :action_input]
# matchkeys = GeneralUtils.findMatchingDictKey(responsedict, i)
# if length(matchkeys) > 1
# errornote = "\n $i has more than one key"
# println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# continue
# end
# end
# state[:decisionMaker] = responsedict
# return responsedict
# end
# error("DecisionMaker failed to generate a thought \n", response)
# end
""" Assigns a scalar value to each new child node to be used for selec-
tion and backpropagation. This value effectively quantifies the agent's progress in task completion,
serving as a heuristic to steer the search algorithm towards the most promising regions of the tree.
# Arguments
- `state<:AbstractDict`
one of Yiem's agent
- `text2textInstructLLM::Function`
A function that handles communication to LLM service
# Return
- `score::Integer`
# Example
```jldoctest
julia>
```
# Signature
"""
function evaluator(state::T1, text2textInstructLLM::Function
) where {T1<:AbstractDict}
systemmsg =
"""
You are a helpful assistant that analyzes agent's trajectory to find solutions and observations (i.e., the results of actions) to answer the user's questions.
Definitions:
"question" is the user's question
"understanding" is agent's understanding about the current situation
"reasoning" is agent's step-by-step reasoning about the current situation
"plan" is agent's plan to complete the task from the current situation
"action_name" is the name of the action taken, which can be one of the following functions:
- RUNSQL, which you can use to execute SQL against the database. Action_input for this function must be a single SQL query to be executed against the database.
For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
"action_input" is the input to the action
"observation" is result of the preceding immediate action
Trajectory: ...
Error_note: error note from your previous attempt
- When the search returns no result, validate whether the SQL query makes sense before accepting it as a valid answer.
1) Trajectory_evaluation: Analyze the trajectory of a solution to answer the user's original question.
- Evaluate the correctness of each section and the overall trajectory based on the given question.
- Provide detailed reasoning and analysis, focusing on the latest thought, action, and observation.
- Incomplete trajectory are acceptable if the thoughts and actions up to that point are correct, even if the final answer isn't reached.
- Do not generate additional thoughts or actions.
2) Answer_evaluation:
- Focus only on the matter mentioned in the question and comprehensively analyze how the latest observation's details addresses the question
3) Accepted_as_answer: Decide whether the latest observation's content answers the question. Can be "Yes" or "No"
Bad example (The observation didn't answers the question):
question: Find cars with 4 wheels.
observation: There are an apple in the table.
Good example (The observation answers the question):
question: Find cars with a stereo.
observation: There are 1 cars in the table. 1) brand: Toyota, model: yaris, color: black.
4) Score: Correctness score s where s is a single integer between 0 to 9.
For example:
- 0 indicates that both the trajectory is incorrect, failed or errors and the observation is incorrect or failed
- 4 indicates that the trajectory are correct but the observation is incorrect or failed
- 5 indicates that the trajectory are correct, but no results are returned.
- 6 indicates that the trajectory are correct, but the observation's content doesn't directly answer the question
- 8 indicates that both the trajectory are correct, and the observation's content directly answers the question.
- 9 indicates a perfect perfomance. Both the trajectory are correct, and the observation's content directly answers the question, surpassing your expectations.
5) Suggestion: if accepted_as_answer is "No", provide suggestion.
Trajectory_evaluation: ...
Answer_evaluation: ...
Accepted_as_answer: ...
Score: ...
Suggestion: ...
Let's begin!
"""
thoughthistory = ""
for (k, v) in state[:thoughtHistory]
thoughthistory *= "$k: $v\n"
end
errornote = ""
for attempt in 1:10
errorFlag = false
usermsg =
"""
Trajectory: $thoughthistory
Error_note: $errornote
"""
_prompt =
[
Dict(:name=> "system", :text=> systemmsg),
Dict(:name=> "user", :text=> usermsg)
]
# put in model format
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
header = ["Trajectory_evaluation:", "Answer_evaluation:", "Accepted_as_answer:", "Score:", "Suggestion:"]
dictkey = ["trajectory_evaluation", "answer_evaluation", "accepted_as_answer", "score", "suggestion"]
response = text2textInstructLLM(prompt)
# sometime LLM output something like **Comprehension**: which is not expected
response = replace(response, "**"=>"")
response = replace(response, "***"=>"")
# make sure every header is in the response
for i in header
detected = GeneralUtils.detect_keyword(i, response)
if detected === nothing
errornote = "Your previous response didn't provide $i"
errorFlag = true
end
end
if errorFlag
continue # skip to the next iteration
end
responsedict = GeneralUtils.textToDict(response, header;
dictKey=dictkey, symbolkey=true)
responsedict[:score] = responsedict[:score][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
try
responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
catch
continue
end
accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
if accepted_as_answer ∉ ["Yes", "No"] # [PENDING] add errornote into the prompt
error("generated accepted_as_answer has wrong format")
end
# add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
state[:evaluation] = "$(responsedict[:trajectory_evaluation]) $(responsedict[:answer_evaluation])"
state[:evaluationscore] = responsedict[:score]
state[:accepted_as_answer] = responsedict[:accepted_as_answer]
state[:suggestion] = responsedict[:suggestion]
# mark as terminal state when the answer is achieved
if accepted_as_answer == "Yes"
# mark the state as terminal state because the evaluation say so.
state[:isterminal] = true
# evaluation score as reward because different answers hold different value for the user.
state[:reward] = responsedict[:score]
end
println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
pprintln(Dict(responsedict))
return responsedict[:score]
end
error("Evaluator failed to generate an evaluation, Response: \n$response\n<|End of error|>")
end
"""
# Arguments
# Return
# Example
```jldoctest
julia>
```
# TODO
- [] update docstring
- [] implement the function
- [x] add try block. check result that it is expected before returning
# Signature
"""
function reflector(config::T1, state::T2)::String where {T1<:AbstractDict, T2<:AbstractDict}
# https://github.com/andyz245/LanguageAgentTreeSearch/blob/main/hotpot/hotpot.py
systemmsg =
"""
You will be given a question and a trajectory of the previous help you've done for a user.
You were unsuccessful in helping the user either because you use the wrong syntax, or use the wrong function, or refer to item that don't exist in the database.
In a few sentences, Diagnose a possible reason for failure and devise a new, specific and concise lesson that aims to mitigate the same failure.
Use complete sentences.
You should only respond in JSON format as describe below:
{"reflection": "your relection"}
Here are some examples:
user:
{
"question": "Hello, I would like a get a bottle of wine",
"thought_1": "A customer wants to buy a bottle of wine. Before making a recommendation, I need to know more about their preferences.",
"action_1": {"name": "chatbox", "input": "What is the occasion for which you're buying this wine?"},
"observation_1": "We are holding a wedding party",
"thought_2": "A wedding party, that's a great occasion! The customer might be looking for a celebratory drink. Let me ask some more questions to narrow down the options.",
"action_2": {"name": "chatbox", "input": "What type of food will you be serving at the wedding?"},
"observation_2": "It will be Thai dishes.",
"thought_3": "With Thai food, I should recommend a wine that complements its spicy and savory flavors. And since it's a celebratory occasion, the customer might prefer a full-bodied wine.",
"action_3": {"name": "chatbox", "input": "What is your budget for this bottle of wine?"},
"observation_3": "I would spend up to 50 bucks.",
"thought_4": "Now that I have some more information, it's time to narrow down the options.",
"action_4": {"name": "winestock", "input": "red wine with full body, pairs well with spicy food, budget \$50"},
"observation_4": "I found the following wines in our stock: \n{\n 1: El Enemigo Cabernet Franc 2019\n2: Tantara Chardonnay 2017\n\n}\n",
"thought_5": "Now that I have a list of potential wines, I need to know more about the customer's taste preferences.",
"action_5": {"name": "chatbox", "input": "What type of wine characteristics are you looking for? (e.g. t.e.g. tannin level, sweetness, intensity, acidity)"},
"observation_5": "I like full-bodied red wine with low tannin.",
"thought_6": "Now that I have more information about the customer's preferences, it's time to make a recommendation.",
"action_6": {"name": "recommendbox", "input": "El Enemigo Cabernet Franc 2019"},
"observation_6": "I don't like the one you recommend. I want dry wine."
}
assistant:
{
"reflection": "I asked the user about the occasion, food type, and budget, and then searched for wine in the inventory right away. However, I should have asked the user for the specific wine type and their preferences in order to gather more information before making a recommendation."
}
user:
{
"question": "How many wines suitable to be paired with lamb?",
"thought_1": "The user wants to know how many wines that can be paired with lamb, I will try to find the table that has information about pairing between wines and food items.",
"action_1": {"name": "getdata", "input": "What is the occasion for which you're buying this wine?"},
"observation_1": "We are holding a wedding party",
"thought_2": "A wedding party, that's a great occasion! The customer might be looking for a celebratory drink. Let me ask some more questions to narrow down the options.",
"action_2": {"name": "chatbox", "input": "SELECT * FROM wine_food WHERE obj_description LIKE '%lamb%'"},
"observation_2": "SQL execution error: SQL syntax error. It must end with character ';'",
}
assistant:
{
"reflection": "I need to have ';' at the end of the SQL query."
}
Let's begin!
"""
usermsg =
"""
$(JSON3.write(state[:thoughtHistory]))
"""
_prompt =
[
Dict(:name=> "system", :text=> systemmsg),
Dict(:name=> "user", :text=> usermsg)
]
# put in model format
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
externalService = config[:externalservice][:text2textinstruct]
# apply LLM specific instruct format
externalService = config[:externalservice][:text2textinstruct]
msgMeta = GeneralUtils.generate_msgMeta(
externalService[:mqtttopic];
senderName= "reflector",
senderId= string(uuid4()),
receiverName= "text2textinstruct",
mqttBrokerAddress= config[:mqttServerInfo][:broker],
mqttBrokerPort= config[:mqttServerInfo][:port],
)
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> Dict(
:text=> prompt,
:kwargs=> Dict(
:max_tokens=> 512,
:stop=> ["<|eot_id|>"],
)
)
)
for attempt in 1:10
try
response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
_responseJsonStr = response[:response][:text]
expectedJsonExample =
"""
Here is an expected JSON format:
{"reflection": "..."}
"""
# responseJsonStr, errormsg, success =
# FormatCorrector.jsoncorrection(config, _responseJsonStr, expectedJsonExample)
if !success
error("Not valid JSON")
end
reflectionDict = copy(JSON3.read(responseJsonStr))
# check if dict has all required value
dummya::AbstractString = reflectionDict[:reflection]
return reflectionDict[:reflection]
catch e
io = IOBuffer()
showerror(io, e)
errorMsg = String(take!(io))
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
println("")
@warn "Attempt $attempt. Error occurred: $errorMsg\n$st"
println("")
end
end
error("reflector failed to generate a thought")
end
""" Get a new state
# Arguments
- `state<:AbstractDict`
state's dictionary
- `args::NamedTuple`
Arguments for decisionMaker() and others
# Return
- `NamedTuple{(:newNodeKey, :newstate, :progressvalue), Tuple{String, T, Integer}}`
# Example
```jldoctest
julia> using SQLLLM, DataStructures
julia> state = Dict(
:isterminal => false,
:lesson => nothing,
:reward => 0,
:evaluation => "None",
:accepted_as_answer => "No",
:thoughtHistory => OrderedDict{Symbol, Any}(:question => "How many wines do you have that can be paired with lamb?"),
:evaluationscore => 0,
:suggestion => "None"
)
```
# TODO
- [] add embedding of newstate and store in newstate[:embedding]
- [PENDING] should getdata() return isterminal?
# Signature
"""
function transition(state::T, args::NamedTuple
)::NamedTuple{(:newNodeKey, :newstate, :progressvalue), Tuple{String, T, Integer}} where {T<:AbstractDict}
decisionMakerF::Function = args[:decisionMaker]
evaluatorF::Function = args[:evaluator]
reflector::Function = args[:reflector]
context = args[:context]
executeSQL::Function = args[:executeSQL]
text2textInstructLLM::Function = args[:text2textInstructLLM]
insertSQLVectorDB::Function = args[:insertSQLVectorDB]
querySQLVectorDBF::Function = args[:querySQLVectorDB]
# getting SQL from vectorDB
thoughtDict = decisionMakerF(state, context, text2textInstructLLM; querySQLVectorDBF)
# map action and input() to llm function
response =
if thoughtDict[:action_name] == "listalltables"
# deepcopy(state[:virtualCustomerChatHistory]) because I want to keep it clean
# so that other simulation start from this same node is not contaminated with actioninput
listAllTable_json(executeSQL)
elseif thoughtDict[:action_name] == "TABLEINFO"
input = thoughtDict[:action_input]
tableinfo(executeSQL, input)
elseif thoughtDict[:action_name] == "RUNSQL"
response = SQLexecution(executeSQL, thoughtDict[:action_input])
if response[:success]
extracted = extractContent_dataframe(response[:result], text2textInstructLLM, thoughtDict[:action_input])
(rawresponse=response[:result], result=extracted, errormsg=nothing, success=true)
else
(result=nothing, errormsg=response[:errormsg], success=false)
end
else
error("undefined LLM function. Requesting $(thoughtDict[:action_name])")
end
# this section allow LLM functions above to have different return values.
success::Bool = haskey(response, :success) ? response[:success] : false
result = success ? response[:result] : response[:errormsg]
rawresponse = haskey(response, :rawresponse) ? response[:rawresponse] : nothing
select = haskey(response, :select) ? response[:select] : nothing
reward::Integer = haskey(response, :reward) ? response[:reward] : 0
isterminal::Bool = haskey(response, :isterminal) ? response[:isterminal] : false
newNodeKey, newstate = makeNewState(state, thoughtDict, rawresponse, JSON3.write(result), select, reward, isterminal)
progressvalue::Integer = evaluatorF(newstate, text2textInstructLLM)
return (newNodeKey=newNodeKey, newstate=newstate, progressvalue=progressvalue)
end
""" Ask the database using English language.
# Arguments
- `query<:AbstractString`
A natural language query in English
- `executeSQL::Function`
A function that executes SQL queries against the database
- `text2textInstructLLM::Function`
A function that handles communication with a text-to-text instruction-based language model
# Keyword Arguments
- `insertSQLVectorDB::Union{Function, Nothing}=nothing`
Optional function to insert SQL queries into a vector database for future reference
- `similarSQLVectorDB::Union{Function, Nothing}=nothing`
Optional function to find similar SQL queries from a vector database
# Returns
- `NamedTuple{(:text, :rawresponse), Tuple{Any, Any}}`
- `:text`: The query result in natural language
- `:rawresponse`: The raw database response
# Example
```jldoctest
julia> using LibPQ, JSON3, UUIDs
julia> using SQLLLM, GeneralUtils
julia> function executeSQL(sql)
DBconnection = LibPQ.Connection("host=192.168.88.122 port=5432 dbname=xyz user=zyx password=1234")
result = LibPQ.execute(DBconnection, sql)
close(DBconnection)
return result
end
julia> function text2textInstructLLM(prompt::String)
config = Dict(
:mqttServerInfo => Dict(
:description => "mqtt server info",
:port => 1883,
:broker => "mqtt.yiem.cc"
),
:externalservice => Dict(
:text2textinstruct => Dict(
:mqtttopic => "/loadbalancer/requestingservice",
:description => "text to text service with instruct LLM",
:llminfo => Dict(:name => "llama3instruct")
),
)
)
# apply LLM specific instruct format
externalService = config[:externalservice][:text2textinstruct]
msgMeta = GeneralUtils.generate_msgMeta(
externalService[:mqtttopic],
senderName= "SQLLLM",
senderId= string(uuid4()),
receiverName= "text2textinstruct",
mqttBroker= config[:mqttServerInfo][:broker],
mqttBrokerPort= config[:mqttServerInfo][:port],
)
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> Dict(
:text=> prompt,
:kwargs=> Dict(
:max_tokens=> 512,
:stop=> ["<|eot_id|>"],
:temperature=> 0.2,
)
)
)
_response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
response = _response[:response][:text]
return response
end
julia> query = Dict(:text=> "How many wines do you have that can be paired with lamb?")
julia> result = SQLLLM.query(query, executeSQL, text2textInstructLLM)
julia> println(result)
```
# Signature
"""
function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
insertSQLVectorDB::Union{Function, Nothing}=nothing,
similarSQLVectorDB::Union{Function, Nothing}=nothing,
)::NamedTuple{(:text, :rawresponse), Tuple{Any, Any}} where {T<:AbstractString}
# use similarSQLVectorDB to find similar SQL for the query
sql, distance = similarSQLVectorDB(query)
if sql !== nothing && distance <= 1
# query vector db to get wine
response = SQLexecution(executeSQL, sql)
if response[:success]
# intention = Dict(:intention=> "$(thoughtDict[:plan])")
extracted = extractContent_dataframe(response[:result], text2textInstructLLM, sql)
return (text=extracted, rawresponse=response[:result])
end
end
# do MCTS if no data in the database
# add extra context for Evaluator so that it knows the observation is from seaching a database
initialstate = Dict{Symbol, Any}(
:reward=> 0,
:isterminal=> false,
:evaluation=> "None",
:evaluationscore=> 0,
:suggestion=> "None",
:accepted_as_answer=> "No",
:lesson=> nothing,
# contain question, thought_1, action_1, observation_1, thought_2, ...
:thoughtHistory=> OrderedDict{Symbol, Any}(
#[] :recap=>,
:question=> query,
),
)
# context = Dict(
# :tablelist => listAllTable_str(executeSQL)[:result]
# )
#XXX find a way to recreate the schema from a existing database
context = Dict(
:tablelist =>
"""
Here are SQL that used to create tables in the database:
create table customer (
customer_id uuid primary key default gen_random_uuid (),
customer_firstname varchar(128),
customer_lastname varchar(128),
customer_displayname varchar(128) not null,
customer_username varchar(128),
customer_password varchar(128),
customer_gender varchar(128),
country varchar(128),
telephone varchar(128),
email varchar(128) not null,
customer_birthdate varchar(128),
note text,
other_attributes jsonb,
created_time timestamptz default current_timestamp,
updated_time timestamptz default current_timestamp,
description text
);
create table retailer (
retailer_id uuid primary key default gen_random_uuid (),
retailer_name varchar(128) not null,
retailer_username varchar(128) not null,
retailer_password varchar(128) not null,
retailer_address text not null,
country varchar(128) not null,
contact_person varchar(128) not null,
telephone varchar(128) not null,
email varchar(128) not null,
note text,
other_attributes jsonb,
created_time timestamptz default current_timestamp,
updated_time timestamptz default current_timestamp,
description text
);
create table food (
food_id uuid primary key default gen_random_uuid (),
food_name varchar(128) not null,
country varchar(128),
spiciness integer,
sweetness integer,
sourness integer,
savoriness integer,
bitterness integer,
serving_temperature integer,
note text,
other_attributes jsonb,
created_time timestamptz default current_timestamp,
updated_time timestamptz default current_timestamp,
description text
);
create table wine (
wine_id uuid primary key default gen_random_uuid (),
seo_name varchar(128) not null,
wine_name varchar(128) not null,
winery varchar(128) not null,
vintage integer not null,
region varchar(128) not null,
country varchar(128) not null,
wine_type varchar(128) not null,
grape varchar(128) not null,
serving_temperature varchar(128) not null,
intensity integer,
sweetness integer,
tannin integer,
acidity integer,
fizziness integer,
tasting_notes text,
note text,
other_attributes jsonb,
created_time timestamptz default current_timestamp,
updated_time timestamptz default current_timestamp,
description text
);
create table wine_food (
wine_id uuid references wine(wine_id),
food_id uuid references food(food_id),
constraint wine_food_id primary key (wine_id, food_id),
created_time timestamptz default current_timestamp,
updated_time timestamptz default current_timestamp
);
CREATE TABLE retailer_wine (
retailer_id uuid references retailer(retailer_id),
wine_id uuid references wine(wine_id),
constraint retailer_wine_id primary key (retailer_id, wine_id),
price NUMERIC(10, 2),
currency varchar(3) not null,
created_time timestamptz default current_timestamp,
updated_time timestamptz default current_timestamp
);
CREATE TABLE retailer_food (
retailer_id uuid references retailer(retailer_id),
food_id uuid references food(food_id),
constraint retailer_food_id primary key (retailer_id, food_id),
price NUMERIC(10, 2),
currency varchar(3) not null,
created_time timestamptz default current_timestamp,
updated_time timestamptz default current_timestamp
);
"""
)
transitionargs = (
decisionMaker=decisionMaker,
evaluator=evaluator,
reflector=reflector,
context=context,
executeSQL=executeSQL,
text2textInstructLLM=text2textInstructLLM,
querySQLVectorDB=similarSQLVectorDB,
insertSQLVectorDB=insertSQLVectorDB,
)
earlystop(state) = state[:reward] >= 8 ? true : false
root, _, resultState, highValueState =
LLMMCTS.runMCTS(initialstate, transition, transitionargs;
horizontalSampleExpansionPhase=5,
horizontalSampleSimulationPhase=2,
maxSimulationDepth=5,
maxiterations=1,
explorationweight=1.0,
earlystop=earlystop,
saveSimulatedNode=true,
multithread=true)
# compare all high value state answer then select the best one
if length(highValueState) > 0
# open("/appfolder/app/highValueState.json", "w") do io
# JSON3.pretty(io, highValueState)
# end
selected = compareState(query, highValueState, text2textInstructLLM)
resultState = highValueState[selected]
end
latestKey, latestInd = GeneralUtils.findHighestIndexKey(resultState[:thoughtHistory], "observation")
action_input = Symbol("action_input_$latestInd") # latest sql
sql = resultState[:thoughtHistory][action_input]
extracted = resultState[:thoughtHistory][latestKey]
# add to vectorDB only if the answer is achieved and the state is terminal
if insertSQLVectorDB !== nothing && resultState[:isterminal] == true &&
resultState[:rawresponse] !== nothing
insertSQLVectorDB(resultState[:thoughtHistory][:question], sql)
end
if extracted === nothing
println("query() return nothing")
end
return (text=extracted, rawresponse=resultState[:rawresponse])
end
""" Make a new state.
# Arguments
# Return
# Example
```jldoctest
julia>
```
# Signature
"""
function makeNewState(currentstate::T1, thoughtDict::T4, rawresponse, response::T2, select::Union{T3, Nothing},
reward::T3, isterminal::Bool
)::NamedTuple{(:newNodeKey, :newstate), Tuple{String, Dict{Symbol, <:Any}}} where {T1<:AbstractDict, T2<:AbstractString, T3<:Number, T4<:AbstractDict}
keys = [:comprehension, :action_name, :action_input, :observation]
# latestKeys = []
currentstate_latestKey, currentstate_latestIndice =
GeneralUtils.findHighestIndexKey(currentstate[:thoughtHistory], keys[1])
nextindice = currentstate_latestKey !== nothing ? currentstate_latestIndice + 1 : 1
# currentstate_latestKey == :NA ? 1 : currentstate_latestIndice + 1
currentstate_latestKey = makekey.(keys, nextindice)
# add Thought, action, observation to thoughtHistory
newstate = deepcopy(currentstate)
for (x, y) in zip(keys, currentstate_latestKey)
if x != :observation
newstate[:thoughtHistory][y] = thoughtDict[Symbol(x)]
else
newstate[:thoughtHistory][y] = response
end
end
newstate[:reward] = reward
newstate[:select] = select
newstate[:isterminal] = isterminal
newstate[:rawresponse] = rawresponse # whatever return from action
newNodeKey = GeneralUtils.uuid4snakecase()
return (newNodeKey=newNodeKey, newstate=newstate)
end
function generatequestion(state::T1, context, text2textInstructLLM::Function;
similarSQL::Union{T2, Nothing}=nothing
)::String where {T1<:AbstractDict, T2<:AbstractString}
similarSQL =
if similarSQL === nothing
"None"
else
"This is the closest matching SQL statement for a similar query: $similarSQL"
end
systemmsg =
"""
You are a SQL expert that generate multiple questions about the current situation.
At each round of conversation, the user will give you the current situation:
User query: ...
Example: ...
Your work progress: ...
About the tables in the database:
- Column name can be the same in different tables. Refer to column comments to get more details.
- Columns represent properties of the items the table represents. For example, the 'color' column in a "dealer_car" table corresponds to the color of the dealer's car.
- A junction table can be used to link tables together.
You must follow the following guidelines:
1) Your question must be specific to locating each piece of information mentioned in the query and how to retrieve it.
2) Your question should be specific, self-contained and not require any additional context.
3) Some information can be accessed by joining multiple tables.
4) Do not generate any question or comments at the end.
You should follow the following guidelines:
- When querying data in the database, start with broad search terms and refine your query later for more precise results.
You should then respond to the user with:
1) Understanding:
- State your understanding about the current situation.
2) Q: Given the situation, "ask yourself" about the situation at least five, but no more than ten, questions.
3) A: Given the situation, "answer to yourself" the best you can.
- Do not generate any text after the last answer.
You must only respond in format as described below:
Understanding: ...
Q1: ...
A1: ...
Q2: ...
A2: ...
Q3: ...
A3: ...
...
Here are some examples:
Q: What information in the hints is not necessary based on the query?
A: Country is not specified in the query thus it should not be included in an SQL
Q: How can I modify a SQL example to fit my specific query needs?
A: ...
Let's begin!
"""
workprogress = ""
for (k, v) in state[:thoughtHistory]
if k ∉ [:query]
workprogress *= "$k: $v\n"
end
end
response = nothing # store for show when error msg show up
errornote = ""
for attempt in 1:10
usermsg =
"""
$(context[:tablelist])
User query: $(state[:thoughtHistory][:question])
Example: $similarSQL
Your work progress: $workprogress
$errornote
"""
_prompt =
[
Dict(:name=> "system", :text=> systemmsg),
Dict(:name=> "user", :text=> usermsg)
]
# put in model format
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
try
response = text2textInstructLLM(prompt)
# check if response is valid
q_number = count("Q", response)
if q_number < 1
errornote = "too few question"
error("too few questions only $q_number questions are generated")
end
if occursin('`', response)
response = replace(response, '`'=>"")
end
header = ["Understanding:", "Q1:"]
dictkey = ["understanding", "q1"]
responsedict = GeneralUtils.textToDict(response, header;
dictKey=dictkey, symbolkey=true)
response = "Q1: " * responsedict[:q1]
# println("\n~~~ SQLLLM generatequestion() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
pprintln(Dict(responsedict))
return response
catch e
io = IOBuffer()
showerror(io, e)
errorMsg = String(take!(io))
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
println("\n~~~ SQLLLM generatequestion() Attempt $attempt. Error occurred: $errorMsg\n$st ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
end
end
error("generatequestion failed to generate a thought ", response)
end
end # module interface