16 Commits

Author SHA1 Message Date
narawat lamaiin
bf223b64b2 update 2025-04-27 22:32:22 +07:00
narawat lamaiin
d9c842bba5 update 2025-04-25 21:13:12 +07:00
narawat lamaiin
b8fd331c1a update 2025-04-13 21:45:58 +07:00
narawat lamaiin
00b0ab01a4 update 2025-04-04 15:05:16 +07:00
narawat lamaiin
fd5ac82662 update 2025-04-01 21:17:03 +07:00
narawat lamaiin
bc0f735ab7 update 2025-03-22 20:26:41 +07:00
3d03a4d351 update 2025-03-21 10:04:22 +07:00
568e0ff54f update 2025-03-20 16:08:40 +07:00
83a20faab6 update 2025-03-19 19:11:06 +07:00
418c543d44 update 2025-03-19 11:29:31 +07:00
e6ce6f9954 update 2025-03-18 21:22:12 +07:00
7fd0d6269a update 2025-03-18 08:37:35 +07:00
e391547991 update 2025-03-18 07:54:23 +07:00
7c9ceb06f8 update 2025-03-18 07:34:51 +07:00
14c881741e update 2025-03-16 22:11:23 +07:00
0873b1341f mark new version 2025-03-15 11:43:05 +07:00
11 changed files with 737 additions and 181 deletions

View File

@@ -1,6 +1,6 @@
# This file is machine-generated - editing it directly is not advised
julia_version = "1.11.3"
julia_version = "1.11.4"
manifest_format = "2.0"
project_hash = "9e0d7dca51b949f2ffa5477b895b90988ec62529"
@@ -202,7 +202,7 @@ version = "1.11.0"
deps = ["CSV", "DataFrames", "DataStructures", "Dates", "Distributions", "JSON3", "MQTTClient", "PrettyPrinting", "Random", "SHA", "UUIDs"]
path = "../GeneralUtils"
uuid = "c6c72f09-b708-4ac8-ac7c-2084d70108fe"
version = "0.2.2"
version = "0.2.3"
[[deps.HTTP]]
deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "PrecompileTools", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"]
@@ -306,7 +306,7 @@ version = "1.19.3+0"
deps = ["GeneralUtils", "JSON3", "PrettyPrinting"]
path = "../LLMMCTS"
uuid = "d76c5a4d-449e-4835-8cc4-dd86ec44f241"
version = "0.1.3"
version = "0.1.4"
[[deps.LaTeXStrings]]
git-tree-sha1 = "dda21b8cbd6a6c40d9d02a73230f9d70fed6918c"
@@ -471,7 +471,7 @@ version = "0.3.27+1"
[[deps.OpenLibm_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
version = "0.8.1+2"
version = "0.8.1+4"
[[deps.OpenSSL]]
deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"]

View File

@@ -1,7 +1,7 @@
name = "SQLLLM"
uuid = "2ebc79c7-cc10-4a3a-9665-d2e1d61e63d3"
authors = ["narawat lamaiin <narawat@outlook.com>"]
version = "0.2.3"
version = "0.2.4"
[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"

View File

@@ -10,6 +10,8 @@ using ..util, ..llmfunction
""" Think and choose action.
# Arguments
@@ -139,7 +141,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
For your information:
- Observation: Result of the immediately preceding action
At each round of conversation, the user will give you the current situation:
At each round of conversation, the user will give you the following:
User Query: ...
Example: ...
Your Q&A: ...
@@ -157,12 +159,12 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
- If you are unable to find the requested information, kindly inform the user, "The current data in our database does not provide the specific answer to your query".
- Text information in the database usually stored in lower case. If your search returns empty, try using lower case to search.
You should then respond to the user with interleaving Understanding, Reasoning, Plan, Action:
1) Comprehension:
- State your comprehension about the current situation.
2) Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
3) Action_name (Must be aligned with your plan): Can be one of the following functions:
- GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
You should then respond to the user with interleaving Comprehension, Plan, Action_name, Action_input:
Comprehension: state your comprehension about the current situation.
Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
Action_name: (Typically corresponds to the execution of the first step in your plan)
Can be one of the following function names:
- RUNSQL, which you can use to execute SQL against the database. Action_input for this function must be a single SQL query to be executed against the database.
For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
4) Action_input: Input to the action
@@ -184,7 +186,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
end
response = nothing # store for show when error msg show up
errornote = ""
errornote = "N/A"
# provide similar sql only for the first attempt
similarSQL_ = "None"
@@ -193,8 +195,20 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
similarSQL_ = sql !== nothing ? sql : "None"
end
header = ["Comprehension:", "Plan:", "Action_name:", "Action_input:"]
dictkey = ["comprehension", "plan", "action_name", "action_input"]
llmkwargs=Dict(
:num_ctx => 32768,
:temperature => 0.1,
)
for attempt in 1:10
if attempt > 1
println("\nERROR SQLLLM decisionMaker() attempt $attempt/10 ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
llmkwargs[:temperature] = 0.1 * attempt
end
QandA = generatequestion(state, context, text2textInstructLLM; similarSQL=similarSQL_)
usermsg =
@@ -206,7 +220,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
Your work progress: $workprogress
Evaluation: $(state[:evaluation])
Suggestion: $(state[:suggestion])
$errornote
P.S. $errornote
"""
_prompt =
@@ -216,8 +230,9 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
]
# put in model format
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
response = text2textInstructLLM(prompt)
prompt = GeneralUtils.formatLLMtext(_prompt, "granite3")
response = text2textInstructLLM(prompt; llmkwargs=llmkwargs)
response = GeneralUtils.deFormatLLMtext(response, "granite3")
# LLM tends to generate observation given that it is in the input
response =
@@ -245,42 +260,38 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
end
if occursin("NULL", response)
errornote = "\nSQL decisionMaker() NULL response is not allowed"
println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
errornote = "\nYour previous attempt was NULL. This is not allowed"
println("\nERROR SQLLLM decisionMaker() $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
header = ["Comprehension:", "Plan:", "Action_name:", "Action_input:"]
dictkey = ["comprehension", "plan", "action_name", "action_input"]
# detect if there are more than 1 key per categories
wordcount = GeneralUtils.countGivenWords(response, header)
duplicateKeywordFlag = false
for (i, v) in enumerate(wordcount)
keyword = header[i]
keywordNumber = v
if keywordNumber > 1
errornote = "\nSQL query has duplicated keyword, $keyword"
println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
duplicateKeywordFlag = true
break
end
end
duplicateKeywordFlag == true ? continue : nothing
# # detect if there are more than 1 key per categories
# wordcount = GeneralUtils.countGivenWords(response, header)
# duplicateKeywordFlag = false
# for (i, v) in enumerate(wordcount)
# keyword = header[i]
# keywordNumber = v
# if keywordNumber > 1
# errornote = "\nSQL query has duplicated keyword, $keyword"
# println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# duplicateKeywordFlag = true
# break
# end
# end
# duplicateKeywordFlag == true ? continue : nothing
# check whether response has all header
kw = []
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
for keyword in header
detected = GeneralUtils.detect_keyword(keyword, response)
push!(kw, detected)
end
if nothing kw
println("Some keywords are missing, Required keywords=$header, Response keywords=$kw ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue # try again next loop
detected_kw = GeneralUtils.detect_keyword(header, response)
if 0 values(detected_kw)
errornote = "\nYour previous attempt did not have all points according to the required response format"
println("\nERROR SQLLLM decisionMaker() $errornote \n$response", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
elseif sum(values(detected_kw)) > length(header)
errornote = "\nYour previous attempt has duplicated points according to the required response format"
println("\nERROR SQLLLM decisionMaker() $errornote \n$response", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
# textToDict() search for action_input
responsedict = GeneralUtils.textToDict(response, header;
dictKey=dictkey, symbolkey=true)
@@ -298,35 +309,25 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
responsedict[:action_input] = sql
end
toollist = ["TABLEINFO", "GETDATA"]
toollist = ["TABLEINFO", "RUNSQL"]
if responsedict[:action_name] toollist
errornote = "\nYou must only use the given functions"
println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
errornote = "\nYour previous attempt has action_name that is not in the tool list"
println("\nERROR SQLLLM decisionMaker() $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
for i in toollist
if occursin(i, responsedict[:action_input])
errornote = "\n action_name is in action_input which is not allowed."
println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
errornote = "\nYour previous attempt has action_name in action_input which is not allowed"
println("\nERROR SQLLLM decisionMaker() $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
end
for i [:comprehension, :plan, :action_name, :action_input]
for i Symbol.(dictkey)
if length(JSON3.write(responsedict[i])) == 0
errornote = "\n $i is empty"
println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
end
# check if there are more than 1 key per categories
for i [:comprehension, :plan, :action_name, :action_input]
matchkeys = GeneralUtils.findMatchingDictKey(responsedict, i)
if length(matchkeys) > 1
errornote = "\n $i has more than one key"
println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
errornote = "\nYour previous attempt has empty value for $i"
println("\nERROR SQLLLM decisionMaker() $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
end
@@ -336,9 +337,10 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
return responsedict
end
error("DecisionMaker failed to generate a thought \n", response)
error("SQLLLM DecisionMaker() failed to generate a thought \n", response)
end
""" Assigns a scalar value to each new child node to be used for selec-
tion and backpropagation. This value effectively quantifies the agent's progress in task completion,
serving as a heuristic to steer the search algorithm towards the most promising regions of the tree.
@@ -359,7 +361,7 @@ julia>
# Signature
"""
function evaluator(state::T1, text2textInstructLLM::Function
function evaluator(state::T1, text2textInstructLLM::Function; maxattempt=10
) where {T1<:AbstractDict}
systemmsg =
@@ -372,9 +374,9 @@ function evaluator(state::T1, text2textInstructLLM::Function
"reasoning" is agent's step-by-step reasoning about the current situation
"plan" is agent's plan to complete the task from the current situation
"action_name" is the name of the action taken, which can be one of the following functions:
- GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
- RUNSQL, which you can use to execute SQL against the database. Action_input for this function must be a single SQL query to be executed against the database.
For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
"action_input" is the input to the action
"observation" is result of the preceding immediate action
@@ -430,14 +432,11 @@ function evaluator(state::T1, text2textInstructLLM::Function
end
errornote = ""
for attempt in 1:10
errorFlag = false
for attempt in 1:maxattempt
usermsg =
"""
Trajectory: $thoughthistory
Error_note: $errornote
P.S. $errornote
"""
_prompt =
@@ -447,27 +446,28 @@ function evaluator(state::T1, text2textInstructLLM::Function
]
# put in model format
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
prompt = GeneralUtils.formatLLMtext(_prompt, "granite3")
header = ["Trajectory_evaluation:", "Answer_evaluation:", "Accepted_as_answer:", "Score:", "Suggestion:"]
dictkey = ["trajectory_evaluation", "answer_evaluation", "accepted_as_answer", "score", "suggestion"]
response = text2textInstructLLM(prompt)
response = text2textInstructLLM(prompt, modelsize="medium")
response = GeneralUtils.deFormatLLMtext(response, "granite3")
# sometime LLM output something like **Comprehension**: which is not expected
response = replace(response, "**"=>"")
response = replace(response, "***"=>"")
# make sure every header is in the response
for i in header
detected = GeneralUtils.detect_keyword(i, response)
if detected === nothing
errornote = "Your previous response didn't provide $i"
errorFlag = true
end
end
if errorFlag
continue # skip to the next iteration
# check whether response has all header
detected_kw = GeneralUtils.detect_keyword(header, response)
if 0 values(detected_kw)
errornote = "Your previous attempt does not have all answer points"
println("\nERROR SQLLLM evaluator() Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
elseif sum(values(detected_kw)) > length(header)
errornote = "Your previous attempt has duplicated answer point"
println("\nERROR SQLLLM evaluator() Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
responsedict = GeneralUtils.textToDict(response, header;
@@ -477,13 +477,17 @@ function evaluator(state::T1, text2textInstructLLM::Function
try
responsedict[:score] = parse(Int, responsedict[:score]) # convert string "5" into integer 5
catch
errornote = "Your previous attempt's score has wrong format"
println("\nERROR SQLLLM evaluator() Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
accepted_as_answer::AbstractString = responsedict[:accepted_as_answer]
if accepted_as_answer ["Yes", "No"] # [PENDING] add errornote into the prompt
error("generated accepted_as_answer has wrong format")
errornote = "Your previous attempt's accepted_as_answer has wrong format"
println("\nERROR SQLLLM evaluator() Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
# add to state here instead to in transition() because the latter causes julia extension crash (a bug in julia extension)
@@ -501,7 +505,7 @@ function evaluator(state::T1, text2textInstructLLM::Function
# evaluation score as reward because different answers hold different value for the user.
state[:reward] = responsedict[:score]
end
println("\n~~~ Evaluator() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
println("\nEvaluator() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
pprintln(Dict(responsedict))
return responsedict[:score]
@@ -604,7 +608,7 @@ function reflector(config::T1, state::T2)::String where {T1<:AbstractDict, T2<:A
]
# put in model format
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
prompt = GeneralUtils.formatLLMtext(_prompt, "granite3")
externalService = config[:externalservice][:text2textinstruct]
# apply LLM specific instruct format
@@ -720,7 +724,7 @@ function transition(state::T, args::NamedTuple
elseif thoughtDict[:action_name] == "TABLEINFO"
input = thoughtDict[:action_input]
tableinfo(executeSQL, input)
elseif thoughtDict[:action_name] == "GETDATA"
elseif thoughtDict[:action_name] == "RUNSQL"
response = SQLexecution(executeSQL, thoughtDict[:action_input])
if response[:success]
extracted = extractContent_dataframe(response[:result], text2textInstructLLM, thoughtDict[:action_input])
@@ -749,15 +753,22 @@ end
# Arguments
- `query<:AbstractString`
a query
A natural language query in English
- `executeSQL::Function`
a connection object to a database
A function that executes SQL queries against the database
- `text2textInstructLLM::Function`
A function that handles communication to text2text instruct LLM service.
# Return
- `resulttext::String`
The result of the query in English.
A function that handles communication with a text-to-text instruction-based language model
# Keyword Arguments
- `insertSQLVectorDB::Union{Function, Nothing}=nothing`
Optional function to insert SQL queries into a vector database for future reference
- `similarSQLVectorDB::Union{Function, Nothing}=nothing`
Optional function to find similar SQL queries from a vector database
# Returns
- `NamedTuple{(:text, :rawresponse), Tuple{Any, Any}}`
- `:text`: The query result in natural language
- `:rawresponse`: The raw database response
# Example
```jldoctest
@@ -824,7 +835,7 @@ julia> println(result)
function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
insertSQLVectorDB::Union{Function, Nothing}=nothing,
similarSQLVectorDB::Union{Function, Nothing}=nothing,
) where {T<:AbstractString}
)::NamedTuple{(:text, :rawresponse), Tuple{Any, Any}} where {T<:AbstractString}
# use similarSQLVectorDB to find similar SQL for the query
sql, distance = similarSQLVectorDB(query)
@@ -855,9 +866,128 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
:question=> query,
),
)
# context = Dict(
# :tablelist => listAllTable_str(executeSQL)[:result]
# )
#XXX find a way to recreate the schema from a existing database
context = Dict(
:tablelist => listAllTable_str(executeSQL)[:result]
:tablelist =>
"""
Here are SQL that used to create tables in the database:
create table customer (
customer_id uuid primary key default gen_random_uuid (),
customer_firstname varchar(128),
customer_lastname varchar(128),
customer_displayname varchar(128) not null,
customer_username varchar(128),
customer_password varchar(128),
customer_gender varchar(128),
country varchar(128),
telephone varchar(128),
email varchar(128) not null,
customer_birthdate varchar(128),
note text,
other_attributes jsonb,
created_time timestamptz default current_timestamp,
updated_time timestamptz default current_timestamp,
description text
);
create table retailer (
retailer_id uuid primary key default gen_random_uuid (),
retailer_name varchar(128) not null,
retailer_username varchar(128) not null,
retailer_password varchar(128) not null,
retailer_address text not null,
country varchar(128) not null,
contact_person varchar(128) not null,
telephone varchar(128) not null,
email varchar(128) not null,
note text,
other_attributes jsonb,
created_time timestamptz default current_timestamp,
updated_time timestamptz default current_timestamp,
description text
);
create table food (
food_id uuid primary key default gen_random_uuid (),
food_name varchar(128) not null,
country varchar(128),
spiciness integer,
sweetness integer,
sourness integer,
savoriness integer,
bitterness integer,
serving_temperature integer,
note text,
other_attributes jsonb,
created_time timestamptz default current_timestamp,
updated_time timestamptz default current_timestamp,
description text
);
create table wine (
wine_id uuid primary key default gen_random_uuid (),
seo_name varchar(128) not null,
wine_name varchar(128) not null,
winery varchar(128) not null,
vintage integer not null,
region varchar(128) not null,
country varchar(128) not null,
wine_type varchar(128) not null,
grape varchar(128) not null,
serving_temperature varchar(128) not null,
intensity integer,
sweetness integer,
tannin integer,
acidity integer,
fizziness integer,
tasting_notes text,
note text,
other_attributes jsonb,
created_time timestamptz default current_timestamp,
updated_time timestamptz default current_timestamp,
description text
);
create table wine_food (
wine_id uuid references wine(wine_id),
food_id uuid references food(food_id),
constraint wine_food_id primary key (wine_id, food_id),
created_time timestamptz default current_timestamp,
updated_time timestamptz default current_timestamp
);
CREATE TABLE retailer_wine (
retailer_id uuid references retailer(retailer_id),
wine_id uuid references wine(wine_id),
constraint retailer_wine_id primary key (retailer_id, wine_id),
price NUMERIC(10, 2),
currency varchar(3) not null,
created_time timestamptz default current_timestamp,
updated_time timestamptz default current_timestamp
);
CREATE TABLE retailer_food (
retailer_id uuid references retailer(retailer_id),
food_id uuid references food(food_id),
constraint retailer_food_id primary key (retailer_id, food_id),
price NUMERIC(10, 2),
currency varchar(3) not null,
created_time timestamptz default current_timestamp,
updated_time timestamptz default current_timestamp
);
"""
)
transitionargs = (
decisionMaker=decisionMaker,
evaluator=evaluator,
@@ -871,15 +1001,25 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
earlystop(state) = state[:reward] >= 8 ? true : false
_, _, resultState = LLMMCTS.runMCTS(initialstate, transition, transitionargs;
horizontalSampleExpansionPhase=5,
horizontalSampleSimulationPhase=2,
maxSimulationDepth=5,
maxiterations=1,
explorationweight=1.0,
earlystop=earlystop,
saveSimulatedNode=true,
multithread=true)
root, _, resultState, highValueState =
LLMMCTS.runMCTS(initialstate, transition, transitionargs;
horizontalSampleExpansionPhase=3,
horizontalSampleSimulationPhase=3,
maxSimulationDepth=5,
maxiterations=1,
explorationweight=1.0,
earlystop=earlystop,
saveSimulatedNode=true,
multithread=false)
# compare all high value state answer then select the best one
if length(highValueState) > 0
# open("/appfolder/app/highValueState.json", "w") do io
# JSON3.pretty(io, highValueState)
# end
selected = compareState(query, highValueState, text2textInstructLLM)
resultState = highValueState[selected] #BUG compareState() select 0
end
latestKey, latestInd = GeneralUtils.findHighestIndexKey(resultState[:thoughtHistory], "observation")
action_input = Symbol("action_input_$latestInd") # latest sql
sql = resultState[:thoughtHistory][action_input]
@@ -896,7 +1036,9 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
println("query() return nothing")
end
return (text=extracted, rawresponse=resultState[:rawresponse])
result = (text=extracted, rawresponse=resultState[:rawresponse])
return result
end
@@ -925,7 +1067,7 @@ function makeNewState(currentstate::T1, thoughtDict::T4, rawresponse, response::
nextindice = currentstate_latestKey !== nothing ? currentstate_latestIndice + 1 : 1
# currentstate_latestKey == :NA ? 1 : currentstate_latestIndice + 1
currentstate_latestKey = makeNextKey.(keys, nextindice)
currentstate_latestKey = makekey.(keys, nextindice)
# add Thought, action, observation to thoughtHistory
newstate = deepcopy(currentstate)
@@ -948,11 +1090,8 @@ function makeNewState(currentstate::T1, thoughtDict::T4, rawresponse, response::
end
makeNextKey(key, indice) = Symbol("$(key)_$indice")
function generatequestion(state::T1, context, text2textInstructLLM::Function;
similarSQL::Union{T2, Nothing}=nothing
similarSQL::Union{T2, Nothing}=nothing, maxattempt=10
)::String where {T1<:AbstractDict, T2<:AbstractString}
similarSQL =
@@ -1011,6 +1150,9 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
Let's begin!
"""
header = ["Understanding:", "Q1:"]
dictkey = ["understanding", "q1"]
workprogress = ""
for (k, v) in state[:thoughtHistory]
@@ -1022,14 +1164,14 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
response = nothing # store for show when error msg show up
errornote = ""
for attempt in 1:10
for attempt in 1:maxattempt
usermsg =
"""
$(context[:tablelist])
User query: $(state[:thoughtHistory][:question])
Example: $similarSQL
Your work progress: $workprogress
$errornote
P.S. $errornote
"""
_prompt =
@@ -1039,37 +1181,28 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
]
# put in model format
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
prompt = GeneralUtils.formatLLMtext(_prompt, "granite3")
try
response = text2textInstructLLM(prompt)
# check if response is valid
q_number = count("Q", response)
if q_number < 1
errornote = "too few question"
error("too few questions only $q_number questions are generated")
end
if occursin('`', response)
response = replace(response, '`'=>"")
end
header = ["Understanding:", "Q1:"]
dictkey = ["understanding", "q1"]
responsedict = GeneralUtils.textToDict(response, header;
dictKey=dictkey, symbolkey=true)
response = "Q1: " * responsedict[:q1]
println("\n~~~ SQLLLM generatequestion() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
pprintln(Dict(responsedict))
return response
catch e
io = IOBuffer()
showerror(io, e)
errorMsg = String(take!(io))
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
println("\n~~~ SQLLLM generatequestion() Attempt $attempt. Error occurred: $errorMsg\n$st ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
response = text2textInstructLLM(prompt, modelsize="medium")
response = GeneralUtils.deFormatLLMtext(response, "granite3")
# check if response is valid
q_number = count("Q", response)
if q_number < 1
errornote = "Your previous attempt has too few question."
println("\nERROR YiemAgent generatequestion(). Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
if occursin('`', response)
response = replace(response, '`'=>"")
end
responsedict = GeneralUtils.textToDict(response, header;
dictKey=dictkey, symbolkey=true)
response = "Q1: " * responsedict[:q1]
println("\nSQLLLM generatequestion() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
pprintln(Dict(responsedict))
return response
end
error("generatequestion failed to generate a thought ", response)
end
@@ -1119,8 +1252,6 @@ end

View File

@@ -1,10 +1,10 @@
module llmfunction
export listAllTable_json, listAllTable_str, tableinfo, getdata, finalAnswerBox,
getTableNameFromSQL, extractContent_dataframe, SQLexecution
getTableNameFromSQL, extractContent_dataframe, SQLexecution, compareState
using HTTP, JSON3, URIs, Random, PrettyPrinting, UUIDs, LibPQ, Tables, DataFrames, CSV,
DataStructures, StatsBase
DataStructures, StatsBase, Dates
using GeneralUtils, LLMMCTS
using ..util
@@ -36,7 +36,7 @@ julia> result = response[:result]
# Signature
"""
function listAllTable_json(executeSQL::Function
)::NamedTuple{(:result, :success),Tuple{DataFrame,Bool}}
)::NamedTuple{(:result, :success),Tuple{DataFrame,Bool}}
sql = """
SELECT
@@ -406,9 +406,10 @@ function getdata_decisionMaker(state::Dict, context::Dict, text2textInstructLLM:
]
# put in model format
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
prompt = GeneralUtils.formatLLMtext(_prompt, "granite3")
try
response = text2textInstructLLM(prompt)
response = text2textInstructLLM(prompt, modelsize="medium")
response = GeneralUtils.deFormatLLMtext(response, "granite3")
header = ["Comprehension:", "Plan:", "Code:"]
dictkey = ["comprehension", "plan", "code"]
@@ -627,12 +628,13 @@ function extractContent_dataframe(df::DataFrame, text2textInstructLLM::Function,
]
# put in model format
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
prompt = GeneralUtils.formatLLMtext(_prompt, "granite3")
header = ["About_resulting_table:", "Search_summary:"]
dictkey = ["about_resulting_table", "search_summary"]
for i in 1:5
response = text2textInstructLLM(prompt)
response = text2textInstructLLM(prompt, modelsize="medium")
response = GeneralUtils.deFormatLLMtext(response, "granite3")
kw = []
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
@@ -762,13 +764,14 @@ function getTableNameFromSQL(sql::T, text2textInstructLLM::Function)::Vector{Str
]
# put in model format
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")
prompt = GeneralUtils.formatLLMtext(_prompt, "granite3")
header = ["Table_name:"]
dictkey = ["table_name"]
for attempt in 1:5
try
response = text2textInstructLLM(prompt)
response = text2textInstructLLM(prompt, modelsize="medium")
response = GeneralUtils.deFormatLLMtext(response, "granite3")
responsedict = GeneralUtils.textToDict(response, header;
dictKey=dictkey, symbolkey=true)
response = copy(JSON3.read(responsedict[:table_name]))
@@ -788,6 +791,178 @@ function getTableNameFromSQL(sql::T, text2textInstructLLM::Function)::Vector{Str
end
""" Compare multiple solution attempts and select the most accurate one.
This function evaluates multiple solution attempts for a given question and determines which attempt
provides the most accurate and relevant response. It uses an LLM to analyze and compare the attempts,
considering their actions and observations.
# Arguments
- `question::String`
The original question or task that was attempted to be solved
- `highValueStateList::Vector{Dict}`
List of states containing different solution attempts and their results
- `text2textInstructLLM::Function`
A function that handles communication to LLM service
# Returns
- `Integer`
The index of the selected best response (1-based indexing)
# Example
```jldoctest
julia>
```
# Notes
- The function makes up to 10 attempts to get a valid response from the LLM
- Each state in highValueStateList should contain a thoughtHistory with action_input and observation
- The LLM evaluates attempts based on accuracy and relevance to the original question
"""
function compareState(question::String, highValueStateList::Vector{T},
text2textInstructLLM::Function)::Integer where {T<:AbstractDict}
systemmsg =
"""
<Your profile>
- You are a helpful assistant
</Your profile>
<Situation>
The user has made multiple attempts to solve the question, resulting in various answers
<Your mission>
- Identify and select the most accurate and relevant response from these multiple results for the user
</Your mission>
<At each round of conversation, you will be given the following>
Question: the question the user is trying to answer
Attempt: the user's attempted actions and their corresponding results
</At each round of conversation, you will be given the following>
<You should then respond to the user with the following>
Comparison: a comparison of all results from all attempts
Rationale: a brief explanation of why the selected response is the most accurate and relevant
Selected_response_number: the number the selected response in the list of results (e.g., 1, 2, 3, ...)
</You should then respond to the user with the following>
<You should only respond in format as described below>
Comparison: ...
Rationale: ...
Selected_response_number: ...
</You should only respond in format as described below>
<Here are some examples>
User's question: "How many German wines do you have?"
Attempt 1:
Action: SELECT COUNT(*) FROM wines WHERE country = 'Germany'
Result: 100 wines
Attempt 2:
Action: SELECT COUNT(*) FROM wines WHERE country = 'Germany' AND type = 'Red'
Result: 50 red wines
Comparison: The second attempt counts only German red wines while the first attempt includes all German wines.
Rationale: The user is asking for the number of German wines without specifying a type, so the most accurate response is the first attempt because it includes all German wines.
Selected_response_number:1
</Here are some examples>
Let's begin!
"""
potentialSolution = []
keys = [:action_input, :observation]
# extract the last action_name, action_input, observation of each state in highValueStateList and store them in a dictionary then push into potentialSolution
for state in highValueStateList
thoughtHistory = state[:thoughtHistory]
_, currentstate_latestIndice =
GeneralUtils.findHighestIndexKey(thoughtHistory, keys[1])
latestKeys = makekey.(keys, currentstate_latestIndice)
d = Dict()
# get the last action_name, action_input, observation of currentstate
for (i,v) in enumerate(keys)
d[v] = thoughtHistory[latestKeys[i]]
end
push!(potentialSolution, d)
end
"""
# put potential solutions from potentialSolution into the following form
Attempt 1
action_name:
action_input:
observation:
Attempt 2
action_name:
action_input:
observation:
...
"""
potentialSolutionStr = ""
for (i, state) in enumerate(potentialSolution)
potentialSolutionStr *= "Attempt $i\n"
for k in keys
potentialSolutionStr *= "$k: $(state[k])\n"
println("")
end
end
errornote = ""
for attempt in 1:10
errorFlag = false
usermsg =
"""
Question: $question
Attempts: $potentialSolutionStr
P.S. $errornote
"""
_prompt =
[
Dict(:name=> "system", :text=> systemmsg),
Dict(:name=> "user", :text=> usermsg)
]
# put in model format
prompt = GeneralUtils.formatLLMtext(_prompt, "granite3")
header = ["Comparison:", "Rationale:", "Selected_response_number:"]
dictkey = ["comparison", "rationale", "selected_response_number"]
response = text2textInstructLLM(prompt, modelsize="medium")
# sometime LLM output something like **Comprehension**: which is not expected
response = replace(response, "**"=>"")
response = replace(response, "***"=>"")
response = GeneralUtils.deFormatLLMtext(response, "granite3")
# make sure every header is in the response
for i in header
detected = GeneralUtils.detect_keyword(i, response)
if detected === nothing
errornote = "Your previous attempt didn't provide $i"
errorFlag = true
end
end
if errorFlag
println("\nERROR SQLLLM compareState() Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue # skip to the next iteration
end
responsedict = GeneralUtils.textToDict(response, header; dictKey=dictkey, symbolkey=true)
responsedict[:selected_response_number] = responsedict[:selected_response_number][1] # some time "6\nThe trajectories are incomplete" is generated but I only need the number.
try
responsedict[:selected_response_number] = parse(Int, responsedict[:selected_response_number]) # convert string "5" into integer 5
catch
errornote = "In your previous attempt, Selected_response_number was not a number. It must be a number."
println("\nERROR SQLLLM compareState() Attempt $attempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
continue
end
println("\n~~~ compareState() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
pprintln(Dict(responsedict))
return responsedict[:selected_response_number]
end
error("compareState() failed to generate an evaluation, Response: \n$response\n<|End of error|>", @__FILE__, ":", @__LINE__, " $(Dates.now())")
end
@@ -824,8 +999,6 @@ end

View File

@@ -1,6 +1,8 @@
module util
export makekey
makekey(key, indice) = Symbol("$(key)_$indice")

153
system_prompt_template.jl Normal file
View File

@@ -0,0 +1,153 @@
"""
Default system message template:
<Your role>
- You are a helpful assistant
</Your role>
<Situation>
- Describe the current situation
</Situation>
<Your vision>
- state your vision of how the situation will evolve, what would you want the situation to evolve into
</Your vision>
<Your mission>
- state the goal
</Your mission>
<Your mission's objective includes>
- Break the goal into smaller steps
</Your mission's objective includes>
<Your responsibility includes>
- state the mini goals that fall under your responsibility
</Your responsibility includes>
<Your responsibility does NOT includes>
-
</Your responsibility does NOT includes>
<At each round of conversation, you will be given the following information>
</At each round of conversation, you will be given the following information>
<You must follow the following guidelines>
-
</You must follow the following guidelines>
<You should then respond to the user with interleaving Comprehension, Plan, Action_name, Action_input>
Comprehension: State your comprehension about the current situation.
Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
Action_name: (Typically corresponds to the execution of the first step in your plan) Can be one of the following function names:
- CHATBOX which you can use to talk with the user. The input is your intentions for the dialogue. Be specific.
- CHECKRESOURCES which you can use to check resources
- IMPLEMENT which you can use to implement the solution
Action_input: Detail the input for the action.
</You should then respond to the user with interleaving Comprehension, Plan, Action_name, Action_input>
<You should only respond in format as described below>
Comprehension: ...
Plan: ...
Action_name: ...
Action_input: ...
</You should only respond in format as described below>
<Here are some examples>
</Here are some examples>
Let's begin!
Example:
<Your profile>
- You are a founder of a tech startup
</Your profile>
<Situation>
- The global rise in bedridden patients, driven by an aging population, presents significant challenges for caregivers. Family members often become primary caretakers, leading to physical and emotional strain. This situation frequently forces caregivers to make difficult choices, including leaving their careers to provide full-time care, which impacts both family finances and personal well-being.
</Situation>
<Your vision>
- We want to develop a system that can help people with bedridden patients and their families so that they could go on with their lives.
</Your vision>
<Your mission>
- To create an innovative caregiving support platform that reduces the physical and emotional burden on family caregivers while ensuring quality care for bedridden patients
</Your mission>
<Your mission's objectives include>
- Develop smart monitoring systems for patient safety
- Create automated alert mechanisms for critical situations
- Design user-friendly interfaces for remote patient monitoring
- Implement AI-driven predictive care recommendations
- Build a support network connecting caregivers with healthcare professionals
- Establish training modules for family caregivers
</Your mission's objectives include>
<Your responsibilities include>
- Lead product vision and strategy development
- Oversee technical implementation and system architecture
- Coordinate with healthcare experts for medical validation
- Ensure compliance with healthcare regulations
- Manage stakeholder relationships
- Drive fundraising and business development
</Your responsibilities include>
<At each round of conversation, you will be given the following>
Challenges: user's specific caregiving challenges
Context: context and severity of the situation
Feedback: comments from family caregivers
Solutions: potential solution based on immediate and long-term impact
</At each round of conversation, you will be given the following>
<You must follow the following guidelines>
- Always prioritize patient safety and well-being
- Maintain empathy and understanding in all interactions
- Focus on practical, implementable solutions
- Consider both immediate needs and long-term sustainability
- Respect privacy and confidentiality of all stakeholders
- Follow healthcare regulations and best practices
</You must follow the following guidelines>
<You should then respond to the user with interleaving Comprehension, Plan, Action_name, Action_input>
Comprehension: State your comprehension about the current situation.
Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
Action_name: (Typically corresponds to the execution of the first step in your plan)
Can be one of the following function names:
- CHATBOX which you can use to talk with the user. The input is your intentions for the dialogue. Be specific.
- CHECKRESOURCES which you can use to check resources
- IMPLEMENT which you can use to implement the solution
Action_input: Detail the input for the action.
</You should then respond to the user with interleaving Comprehension, Plan, Action_name, Action_input>
<You should only respond in format as described below>
Comprehension: ...
Plan: ...
Action_name: ...
Action_input: ...
</You should only respond in format as described below>
<Here are some examples>
Example 1:
Challenges: "My mother needs constant monitoring at night, but I'm exhausted from lack of sleep."
Context: Elderly patient with dementia, requires 24/7 supervision
Feedback: "Need urgent solution for night monitoring"
Solutions: Smart monitoring system with motion sensors and alerts
Comprehension: The caregiver is experiencing severe sleep deprivation due to nighttime monitoring requirements
Plan:
1. Assess current monitoring needs
2. Propose smart monitoring system installation
3. Set up emergency alert system
4. Train family on system usage
Action_name: CHATBOX
Action_input: Discuss specific nighttime behaviors and incidents to determine optimal sensor placement and alert thresholds
Example 2:
Challenges: "Managing medication schedules is becoming overwhelming"
Context: Patient on multiple medications with complex timing requirements
Feedback: "Need help with medication management"
Solutions: Automated medication reminder and tracking system
Comprehension: Caregiver struggling with complex medication management tasks
Plan:
1. Review current medication schedule
2. Implement automated reminder system
3. Set up medication tracking log
4. Connect with pharmacy for refill automation
Action_name: IMPLEMENT
Action_input: Deploy medication management module with smart alerts and compliance tracking
</Here are some examples>
Let's begin!
"""

41
test/Manifest.toml Normal file
View File

@@ -0,0 +1,41 @@
# This file is machine-generated - editing it directly is not advised
julia_version = "1.11.4"
manifest_format = "2.0"
project_hash = "71d91126b5a1fb1020e1098d9d492de2a4438fd2"
[[deps.Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
version = "1.11.0"
[[deps.InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
version = "1.11.0"
[[deps.Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
version = "1.11.0"
[[deps.Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
version = "1.11.0"
[[deps.Random]]
deps = ["SHA"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
version = "1.11.0"
[[deps.SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
version = "0.7.0"
[[deps.Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
version = "1.11.0"
[[deps.Test]]
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
version = "1.11.0"

2
test/Project.toml Normal file
View File

@@ -0,0 +1,2 @@
[deps]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

View File

@@ -53,9 +53,10 @@ function text2textInstructLLM(prompt::String; maxattempt=3)
response = nothing
for attempts in 1:maxattempt
_response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=180, maxattempt=2)
response = _response[:response][:text]
if response !== nothing
_response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=300, maxattempt=2)
payload = _response[:response]
if _response[:success] && payload[:text] !== nothing
response = _response[:response][:text]
break
else
println("\n<text2textInstructLLM()> attempt $attempts/$maxattempt failed ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
@@ -113,6 +114,7 @@ function similarSQLVectorDB(query; maxdistance::Integer=100)
# get embedding of the query
df = findSimilarTextFromVectorDB(query, tablename,
"function_input_embedding", executeSQLVectorDB)
# println(df[1, [:id, :function_output]])
row, col = size(df)
distance = row == 0 ? Inf : df[1, :distance]
# distance = 100 # CHANGE this is for testing only
@@ -156,8 +158,14 @@ end
sessionId = "555"
# query = "How many German wines do you have?"
# highValueStateList = copy(JSON3.read("/appfolder/app/highValueState_1.json"))
# selectedState = SQLLLM.compareState(query, highValueStateList, text2textInstructLLM)
# query = Dict(:text=> "How many wines from France do you have that can be paired with lamb?")
query = "How many French wines do you have?"
query = "How many French wines from Yiem store under 100 dollars do you have?"
# query = "retailer: Yiem, wine_type: red, sweetness: 1-2, intensity: 4-5, wine price: 20-40"
# query = "wine_type: white, country: United States, sweetness: 1-2, tannin: 3, food to be served with wine: pizza"
# query = "wine_type: white, country: Austria, food to be served with wine: pork"

View File

@@ -1,16 +1,70 @@
using Revise
# using Revise
# using SQLLLM, LLMMCTS, DataStructures, JSON3
# query = "How many German wines do you have?"
# highValueStateList = copy(JSON3.read("/appfolder/app/highValueState_1.json"))
# selectedState = SQLLLM.compareState(query, highValueStateList)
function testf(a)::NamedTuple{(:a, :b), Tuple{Union{Nothing, Int}, Int}}
if a == 1
return (a=nothing, b=5)
else
return (a=5, b=5)
end
end
q = testf(1)
w = testf(2)

View File

@@ -1,8 +0,0 @@
table_name,comment
customer,"The customer table stores information about customers. It includes details such as first name, last name, display name, username, password, gender, country, telephone number, email, birthdate, additional_search_term, other attributes (in JSON format) and a description."
wine,"The wine table stores information about different wines. It includes details namely id, name, brand, manufacturer, region, country, wine_type, grape_variety, serving_temperature, intensity, sweetness, tannin, acidity, fizziness, additional_search_term, other attributes (in JSON format) and a description."
wine_food,"The wine_food table represents the association between wines and food items. It establishes a many-to-many relationship, allowing us to link specific wines with various food items."
food,"The food table represents various food items. It stores information related to food names, country of origin, taste attributes (spiciness, sweetness, sourness, savoriness, and bitterness), serving temperature, additional_search_term, other attributes (in JSON format) and a description."
retailer,"The retailer table stores information about different retailers. It includes details related to retailer names, usernames, passwords, addresses, contact persons, telephone numbers, email addresses, additional_search_term, other attributes (in JSON format) and a description."
retailer_wine,"The retailer_wine table represents the relationship between retailers and wines. It stores information about the wines available from which retailers, including vintage, their price, and the currency."
retailer_food,"The retailer_food table represents the relationship between retailers and food items. It stores information about the food items available from which retailers, including their price and the currency."
1 table_name comment
2 customer The customer table stores information about customers. It includes details such as first name, last name, display name, username, password, gender, country, telephone number, email, birthdate, additional_search_term, other attributes (in JSON format) and a description.
3 wine The wine table stores information about different wines. It includes details namely id, name, brand, manufacturer, region, country, wine_type, grape_variety, serving_temperature, intensity, sweetness, tannin, acidity, fizziness, additional_search_term, other attributes (in JSON format) and a description.
4 wine_food The wine_food table represents the association between wines and food items. It establishes a many-to-many relationship, allowing us to link specific wines with various food items.
5 food The food table represents various food items. It stores information related to food names, country of origin, taste attributes (spiciness, sweetness, sourness, savoriness, and bitterness), serving temperature, additional_search_term, other attributes (in JSON format) and a description.
6 retailer The retailer table stores information about different retailers. It includes details related to retailer names, usernames, passwords, addresses, contact persons, telephone numbers, email addresses, additional_search_term, other attributes (in JSON format) and a description.
7 retailer_wine The retailer_wine table represents the relationship between retailers and wines. It stores information about the wines available from which retailers, including vintage, their price, and the currency.
8 retailer_food The retailer_food table represents the relationship between retailers and food items. It stores information about the food items available from which retailers, including their price and the currency.