17 Commits

Author SHA1 Message Date
narawat lamaiin
b3345514ca update 2025-01-25 14:21:52 +07:00
narawat lamaiin
112db2929c update 2025-01-15 08:35:25 +07:00
narawat lamaiin
aa7973ca7e update 2025-01-11 16:57:00 +07:00
narawat lamaiin
bba3c26301 update 2025-01-06 13:13:16 +07:00
narawat lamaiin
dcf57420d1 update 2025-01-05 13:34:41 +07:00
4fa16c4b76 update 2025-01-04 16:11:20 +07:00
370f3501b9 update 2025-01-01 07:53:18 +07:00
210aecb183 update 2024-12-27 20:47:22 +07:00
38108d7e8d update 2024-12-27 20:33:24 +07:00
1e36ea96e9 update 2024-12-23 07:20:40 +07:00
4f7f23565e update 2024-12-22 11:32:11 +07:00
narawat lamaiin
d7d97454a9 update 2024-12-19 18:49:20 +07:00
narawat lamaiin
debd663f44 update 2024-12-10 13:13:36 +07:00
narawat lamaiin
d0262124d3 update 2024-12-09 21:46:02 +07:00
narawat lamaiin
726599bab3 update 2024-12-09 21:44:58 +07:00
narawat lamaiin
ba74688143 update 2024-12-09 21:27:15 +07:00
narawat lamaiin
d1a05741ba update 2024-12-09 21:17:54 +07:00
4 changed files with 128 additions and 141 deletions

View File

@@ -2,7 +2,7 @@
julia_version = "1.11.2"
manifest_format = "2.0"
project_hash = "7276bacf2126ccf319f58f1cc1aab4f9a73ac360"
project_hash = "6e88822413ea4a623cd914d84de127dc6c57fceb"
[[deps.AliasTables]]
deps = ["PtrArrays", "Random"]
@@ -306,11 +306,11 @@ version = "1.19.3+0"
[[deps.LLMMCTS]]
deps = ["GeneralUtils", "JSON3"]
git-tree-sha1 = "5be792515b65e464d15ac8db6a9f6f6628c7d81a"
git-tree-sha1 = "c8ad9715e78bbd19f5ac79e1f1cacf85f141449d"
repo-rev = "main"
repo-url = "https://git.yiem.cc/ton/LLMMCTS"
uuid = "d76c5a4d-449e-4835-8cc4-dd86ec44f241"
version = "0.1.0"
version = "0.1.2"
[[deps.LaTeXStrings]]
git-tree-sha1 = "dda21b8cbd6a6c40d9d02a73230f9d70fed6918c"

View File

@@ -1,7 +1,7 @@
name = "SQLLLM"
uuid = "2ebc79c7-cc10-4a3a-9665-d2e1d61e63d3"
authors = ["narawat lamaiin <narawat@outlook.com>"]
version = "0.1.0"
version = "0.2.2"
[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
@@ -23,5 +23,5 @@ URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[compat]
GeneralUtils = "0.1.0"
LLMMCTS = "0.1.0"
GeneralUtils = "0.1, 0.2"
LLMMCTS = "0.1"

View File

@@ -133,25 +133,26 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
systemmsg =
"""
You are a helpful assistant that get the data from a database to satisfy the user's query.
You are a helpful assistant that find the data from a database to satisfy the user's query.
You are also eager to improve your helpfulness.
At each round of conversation, the user will give you the current situation:
User Query: ...
Hints: ...
Example: ...
Your Q&A: ...
Your work progress: ...
Evaluation: Evaluation of the latest action and observation
Suggestion: ...
You should consider the following guidelines:
You must follow the following guidelines:
- Keep SQL queries focused only on the provided information.
You should follow the following guidelines:
- Do not create any table in the database
- Column name can be the same in different tables. Refer to column comments to get more details by using TABLEINFO function
- A junction table can be used to link tables together. Another use case is for filtering data.
- If you can't find a single table that can be used to answer the user's query, try joining multiple tables to see if you can obtain the answer.
- If you are unable to find the requested information, kindly inform the user, "The current data in our database does not provide the specific answer to your query".
- Text information in the database usually stored in lower case. If your search returns empty, try using lower case to search.
- Do not use backticks (`). Use double quotes instead.
You should then respond to the user with interleaving Understanding, Reasoning, Plan, Action:
1) Understanding:
@@ -160,8 +161,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
- State your step by step reasoning about the current situation.
3) Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
4) Action_name (Must be aligned with your plan): Can be one of the following functions:
- TABLEINFO[list_of_table_name], which you can use to get the data type of a table column. "list_of_table_name" is a list of table name you want to get info. e.g. TABLEINFO["table name 1", "table name 2"]
- GETDATA[SQL], which you can use to get the data from the database. "SQL" is the single SQL command to be executed against the database.
- GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
5) Action_input: Input to the action
@@ -173,7 +173,6 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
Plan: ...
Action_name: ...
Action_input: ...
Observation: ...
Let's begin!
"""
@@ -203,7 +202,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
"""
$(context[:tablelist])
User query: $(state[:thoughtHistory][:question])
Hints: $similarSQL_
Example: $similarSQL_
Your Q&A: $QandA
Your work progress: $workprogress
Evaluation: $(state[:evaluation])
@@ -223,37 +222,70 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
"""
<|start_header_id|>assistant<|end_header_id|>
"""
try
response = text2textInstructLLM(prompt)
println("\nSQL decisionMaker() rawresponse: \n", response)
if occursin("NULL", response)
errornote = "\nSQL decisionMaker() NULL response is not allowed"
println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
continue
end
header = ["Understanding", "Reasoning", "Plan", "Action_name", "Action_input", "Observation"]
# detect if there are more than 1 key per categories
count = GeneralUtils.countGivenWords(response, header)
duplicateKeywordFlag = false
for (i, v) in enumerate(count)
keyword = header[i]
keywordNumber = v
if keywordNumber > 1
errornote = "\nSQL query has duplicated keyword, $keyword"
println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
duplicateKeywordFlag = true
break
end
end
duplicateKeywordFlag == true ? continue : nothing
# textToDict() search for action_input
responsedict = GeneralUtils.textToDict(response,
["Understanding", "Reasoning", "Plan", "Action_name", "Action_input", "Observation"],
responsedict = GeneralUtils.textToDict(response, header,
rightmarker=":", symbolkey=true, lowercasekey=true)
delete!(responsedict, :observation)
# remove backticks
# remove backticks Error occurred: MethodError: no method matching occursin(::String, ::Vector{String})
if occursin("```", responsedict[:action_input])
responsedict[:action_input] =
GeneralUtils.extract_triple_backtick_text(responsedict[:action_input])
sql = GeneralUtils.extract_triple_backtick_text(responsedict[:action_input])[1]
if sql[1:4] == "sql\n"
sql = sql[5:end]
end
sql = split(sql, ';') # some time there are comments in the sql
sql = sql[1] * ';'
responsedict[:action_input] = sql
end
toollist = ["TABLEINFO", "GETDATA"]
if responsedict[:action_name] toollist
error("SQL decisionMaker() didn't use the given functions ", @__FILE__, " ", @__LINE__)
errornote = "\nYou must only use the given functions"
println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
continue
end
for i in toollist
if occursin(i, responsedict[:action_input])
error("Action_name is in action_input which is not allowed.")
errornote = "\n action_name is in action_input which is not allowed."
println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
continue
end
end
for i [:understanding, :reasoning, :plan, :action_name, :action_input]
if length(JSON3.write(responsedict[i])) == 0
error("$i is empty ", @__FILE__, " ", @__LINE__)
errornote = "\n $i is empty"
println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
continue
end
end
@@ -261,22 +293,16 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
for i [:understanding, :reasoning, :plan, :action_name, :action_input]
matchkeys = GeneralUtils.findMatchingDictKey(responsedict, i)
if length(matchkeys) > 1
error("DecisionMaker has more than one key per categories")
errornote = "\n $i has more than one key"
println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
continue
end
end
state[:decisionMaker] = responsedict
return responsedict
catch e
io = IOBuffer()
showerror(io, e)
errorMsg = String(take!(io))
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
println("")
println("\n~~~ SQLLLM decisionMaker() Attempt $attempt. Error occurred: $errorMsg\n$st ", @__FILE__, " ", @__LINE__)
println("")
end
end
error("DecisionMaker failed to generate a thought ", response)
end
@@ -302,7 +328,7 @@ julia>
# Signature
"""
function evaluator(state::T1, text2textInstructLLM::Function;
addSQLVectorDB::Union{Function, Nothing}=nothing
insertSQLVectorDB::Union{Function, Nothing}=nothing
) where {T1<:AbstractDict}
# systemmsg =
@@ -446,8 +472,9 @@ function evaluator(state::T1, text2textInstructLLM::Function;
"reasoning" is agent's step-by-step reasoning about the current situation
"plan" is agent's plan to complete the task from the current situation
"action_name" is the name of the action taken, which can be one of the following functions:
- TABLEINFO[list_of_table_name], which you can use to get the data type of a table column. "list_of_table_name" is a list of table name you want to get info. e.g. TABLEINFO["table name 1", "table name 2"]
- GETDATA[SQL], which you can use to get the data from the database. "SQL" is the single SQL command to be executed against the database.
- GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
"action_input" is the input to the action
"observation" is result of the preceding immediate action
@@ -768,7 +795,7 @@ function transition(state::T, args::NamedTuple
context = args[:context]
executeSQL::Function = args[:executeSQL]
text2textInstructLLM::Function = args[:text2textInstructLLM]
addSQLVectorDBF::Function = args[:addSQLVectorDB]
insertSQLVectorDB::Function = args[:insertSQLVectorDB]
querySQLVectorDBF::Function = args[:querySQLVectorDB]
# getting SQL from vectorDB
@@ -781,7 +808,7 @@ function transition(state::T, args::NamedTuple
# so that other simulation start from this same node is not contaminated with actioninput
listAllTable_json(executeSQL)
elseif thoughtDict[:action_name] == "TABLEINFO"
input = copy(JSON3.read(thoughtDict[:action_input]))
input = thoughtDict[:action_input] # BUG thoughtDict[:action_input] = "\"wine\""
tableinfo(executeSQL, input)
elseif thoughtDict[:action_name] == "GETDATA"
response = SQLexecution(executeSQL, thoughtDict[:action_input])
@@ -804,7 +831,7 @@ function transition(state::T, args::NamedTuple
isterminal::Bool = haskey(response, :isterminal) ? response[:isterminal] : false
newNodeKey, newstate = makeNewState(state, thoughtDict, rawresponse, JSON3.write(result), select, reward, isterminal)
progressvalue::Integer = evaluatorF(newstate, text2textInstructLLM;
addSQLVectorDB=addSQLVectorDBF)
insertSQLVectorDB=insertSQLVectorDB)
return (newNodeKey=newNodeKey, newstate=newstate, progressvalue=progressvalue)
end
@@ -887,7 +914,7 @@ julia> println(result)
# Signature
"""
function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
addSQLVectorDB::Union{Function, Nothing}=nothing,
insertSQLVectorDB::Union{Function, Nothing}=nothing,
similarSQLVectorDB::Union{Function, Nothing}=nothing,
) where {T<:AbstractString}
@@ -931,7 +958,7 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
executeSQL=executeSQL,
text2textInstructLLM=text2textInstructLLM,
querySQLVectorDB=similarSQLVectorDB,
addSQLVectorDB=addSQLVectorDB,
insertSQLVectorDB=insertSQLVectorDB,
)
earlystop(state) = state[:reward] >= 8 ? true : false
@@ -945,8 +972,14 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
extracted = resultState[:thoughtHistory][latestKey]
# add to vectorDB only if the answer is achieved and the state is terminal
if addSQLVectorDB !== nothing && resultState[:isterminal] == true
addSQLVectorDB(resultState[:thoughtHistory][:question], sql)
if insertSQLVectorDB !== nothing && resultState[:isterminal] == true &&
resultState[:rawresponse] !== nothing
insertSQLVectorDB(resultState[:thoughtHistory][:question], sql)
end
if extracted === nothing #BUG
println("query() return nothing")
end
return (text=extracted, rawresponse=resultState[:rawresponse])
@@ -1017,11 +1050,11 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
systemmsg =
"""
You are a helpful assistant that generate multiple questions about the current situation.
You are a SQL expert that generate multiple questions about the current situation.
At each round of conversation, the user will give you the current situation:
User query: ...
Hints: ...
Example: ...
Your work progress: ...
About the tables in the database:
@@ -1035,6 +1068,9 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
3) Some information can be accessed by joining multiple tables.
4) Do not generate any question or comments at the end.
You should follow the following guidelines:
- When querying data in the database, start with broad search terms and refine your query later for more precise results.
You should then respond to the user with:
1) Understanding:
- State your understanding about the current situation.
@@ -1052,6 +1088,13 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
A3: ...
...
Here are some examples:
Q: What information in the hints is not necessary based on the query?
A: Country is not specified in the query thus it should not be included in an SQL
Q: How can I modify a SQL example to fit my specific query needs?
A: ...
Let's begin!
"""
@@ -1071,7 +1114,7 @@ function generatequestion(state::T1, context, text2textInstructLLM::Function;
"""
$(context[:tablelist])
User query: $(state[:thoughtHistory][:question])
Hints: $similarSQL
Example: $similarSQL
Your work progress: $workprogress
$errornote
$noise

View File

@@ -520,65 +520,6 @@ julia> response = SQLLLM.SQLexecution(executeSQL, sql)
# Signature
"""
# function SQLexecution(executeSQL::Function, sql::T
# )::NamedTuple{(:result, :success, :errormsg, :reward, :isterminal), Tuple{Union{DataFrame, Nothing}, Bool, Union{String, Nothing}, Integer, Bool}} where {T<:AbstractString}
# println("\n~~~ 1-01 ", @__FILE__, " ", @__LINE__)
# #XXX dummy SQL. use for testing
# # sql = "SELECT w.wine_name FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.\"food_name\" = 'lamb';"
# # sql = " SELECT w.wine_name FROM wine w JOIN food f ON f.food_name = 'lamb' JOIN wine_food wf ON w.wine_id = wf.wine_id AND f.food_id = wf.food_id GROUP BY w.wine_name ORDER BY COUNT(DISTINCT w.wine_id) DESC;"
# # sql = " SELECT COUNT(DISTINCT wf.wine_id) FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.food_name ILIKE '%lamb%'"
# #XXX use for package testing, remove when done
# # ans = "1.schilfwein zweigelt 2.cabernet sauvignon reserve limited edition"
# # ans = "There are 1500 wines that can be paired with lamb."
# # ans = "1500"
# # return (response=ans, errormsg=nothing, reward=1, isterminal=true)
# # add LIMIT to the SQL to prevent loading large data
# sql = strip(sql)
# println("\n~~~ SQL 1", @__FILE__, " ", @__LINE__)
# println(sql)
# println("\n~~~ 1-02 ", @__FILE__, " ", @__LINE__)
# if sql[end] != ';'
# errorMsg = "Error, SQL execution failed because it does not ended with ';'"
# return (result=nothing, success=false, errormsg=errorMsg, reward=0, isterminal=false)
# end
# println("\n~~~ 1-03 ", @__FILE__, " ", @__LINE__)
# if !occursin("LIMIT", sql)
# # sql = sql[1:end-1] * " LIMIT 100;"
# sql = sql[1:end-1] * " ORDER BY RANDOM() LIMIT 2;"
# end
# println("\n~~~ SQL 2", @__FILE__, " ", @__LINE__)
# println(sql)
# println("\n~~~ 1-1 ", @__FILE__, " ", @__LINE__)
# result = executeSQL(sql)
# println("\n~~~ 1-2 ", @__FILE__, " ", @__LINE__)
# df = DataFrame(result)
# println("\n~~~ raw df ", df)
# tablesize = size(df)
# println("\n~~~ df size ", tablesize)
# println("\n~~~ 6 ", @__FILE__, " ", @__LINE__)
# row = tablesize[1]
# println("\n~~~ 7 ", @__FILE__, " ", @__LINE__)
# if row == 0 # if 0 row
# errorMsg = "The resulting table has 0 row. Possible causes: 1) SQL is incorrect 2) There is no data that match your search criteria."
# return (result=nothing, success=false, errormsg=errorMsg, reward=0, isterminal=false)
# end
# println("\n~~~ 8 ", @__FILE__, " ", @__LINE__)
# df1 =
# if row > 2
# # ramdom row to pick
# df[sample(1:nrow(df), 2, replace=false), :] # random select 2 rows from df
# else
# df
# end
# println("\n~~~ SQLexecution result ", @__FILE__, " ", @__LINE__)
# println(df1)
# return (result=df1, success=true, errormsg=nothing, reward=1, isterminal=true)
# end
function SQLexecution(executeSQL::Function, sql::T
) where {T<:AbstractString}
@@ -596,9 +537,12 @@ function SQLexecution(executeSQL::Function, sql::T
# add LIMIT to the SQL to prevent loading large data
sql = strip(sql)
# remove DISTINCT keyword because it is incompatible with RANDOM()
sql = replace(sql, "DISTINCT" => "")
if sql[end] == ';'
if !occursin("LIMIT", sql)
# sql = sql[1:end-1] * " LIMIT 100;"
sql = sql[1:end-1] * " ORDER BY RANDOM() LIMIT 2;"
end
else
@@ -613,7 +557,7 @@ function SQLexecution(executeSQL::Function, sql::T
tablesize = size(df)
row, column = tablesize
if row == 0 # if 0 row
error("The resulting table has 0 row. Possible causes: 1) You might be searching in the wrong place 2) There could be a typo in your search query.")
error("The resulting table has 0 row. Possible causes: 1) Your search criteria might be too specific. Relaxing some conditions could yield better results. Remember, you can always refine your search later. 2) There could be a typo in your search query. 3) You might be searching in the wrong place.")
elseif column > 30
error("SQL execution failed. An unexpected error occurred. Please try again.")
end