9 Commits

Author SHA1 Message Date
ton
c1ac00829c Merge pull request 'WIP v0.2.2-dev' (#1) from v0.2.2-dev into main
Reviewed-on: #1
2025-01-25 07:38:21 +00:00
narawat lamaiin
b3345514ca update 2025-01-25 14:21:52 +07:00
narawat lamaiin
112db2929c update 2025-01-15 08:35:25 +07:00
narawat lamaiin
aa7973ca7e update 2025-01-11 16:57:00 +07:00
narawat lamaiin
bba3c26301 update 2025-01-06 13:13:16 +07:00
narawat lamaiin
dcf57420d1 update 2025-01-05 13:34:41 +07:00
4fa16c4b76 update 2025-01-04 16:11:20 +07:00
370f3501b9 update 2025-01-01 07:53:18 +07:00
210aecb183 update 2024-12-27 20:47:22 +07:00
3 changed files with 94 additions and 135 deletions

View File

@@ -1,7 +1,7 @@
name = "SQLLLM" name = "SQLLLM"
uuid = "2ebc79c7-cc10-4a3a-9665-d2e1d61e63d3" uuid = "2ebc79c7-cc10-4a3a-9665-d2e1d61e63d3"
authors = ["narawat lamaiin <narawat@outlook.com>"] authors = ["narawat lamaiin <narawat@outlook.com>"]
version = "0.2.1" version = "0.2.2"
[deps] [deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
@@ -23,5 +23,5 @@ URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[compat] [compat]
GeneralUtils = "0.1.0" GeneralUtils = "0.1, 0.2"
LLMMCTS = "0.1.2" LLMMCTS = "0.1"

View File

@@ -149,7 +149,6 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
You should follow the following guidelines: You should follow the following guidelines:
- Do not create any table in the database - Do not create any table in the database
- Column name can be the same in different tables. Refer to column comments to get more details by using TABLEINFO function
- A junction table can be used to link tables together. Another use case is for filtering data. - A junction table can be used to link tables together. Another use case is for filtering data.
- If you can't find a single table that can be used to answer the user's query, try joining multiple tables to see if you can obtain the answer. - If you can't find a single table that can be used to answer the user's query, try joining multiple tables to see if you can obtain the answer.
- If you are unable to find the requested information, kindly inform the user, "The current data in our database does not provide the specific answer to your query". - If you are unable to find the requested information, kindly inform the user, "The current data in our database does not provide the specific answer to your query".
@@ -162,8 +161,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
- State your step by step reasoning about the current situation. - State your step by step reasoning about the current situation.
3) Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific. 3) Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
4) Action_name (Must be aligned with your plan): Can be one of the following functions: 4) Action_name (Must be aligned with your plan): Can be one of the following functions:
- TABLEINFO[list_of_table_name], which you can use to get the data type of a table column. "list_of_table_name" is a list of table name you want to get info. e.g. TABLEINFO["table name 1", "table name 2"] - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
- GETDATA[SQL], which you can use to get the data from the database. "SQL" is a single SQL command to be executed against the database.
For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator. For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'. Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
5) Action_input: Input to the action 5) Action_input: Input to the action
@@ -224,18 +222,31 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
""" """
<|start_header_id|>assistant<|end_header_id|> <|start_header_id|>assistant<|end_header_id|>
""" """
try
response = text2textInstructLLM(prompt) response = text2textInstructLLM(prompt)
println("\nSQL decisionMaker() rawresponse: ", response) println("\nSQL decisionMaker() rawresponse: \n", response)
if occursin("NULL", response)
errornote = "\nSQL decisionMaker() NULL response is not allowed"
println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
continue
end
header = ["Understanding", "Reasoning", "Plan", "Action_name", "Action_input", "Observation"] header = ["Understanding", "Reasoning", "Plan", "Action_name", "Action_input", "Observation"]
# detect if there are more than 1 key per categories # detect if there are more than 1 key per categories
count = GeneralUtils.countGivenWords(response, header) count = GeneralUtils.countGivenWords(response, header)
if sum(count) > length(header) duplicateKeywordFlag = false
error("\nSQL decisionMaker() duplicated keywords", @__FILE__, " ", @__LINE__) for (i, v) in enumerate(count)
keyword = header[i]
keywordNumber = v
if keywordNumber > 1
errornote = "\nSQL query has duplicated keyword, $keyword"
println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
duplicateKeywordFlag = true
break
end end
end
duplicateKeywordFlag == true ? continue : nothing
# textToDict() search for action_input # textToDict() search for action_input
responsedict = GeneralUtils.textToDict(response, header, responsedict = GeneralUtils.textToDict(response, header,
@@ -257,18 +268,24 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
toollist = ["TABLEINFO", "GETDATA"] toollist = ["TABLEINFO", "GETDATA"]
if responsedict[:action_name] toollist if responsedict[:action_name] toollist
error("SQL decisionMaker() didn't use the given functions ", @__FILE__, " ", @__LINE__) errornote = "\nYou must only use the given functions"
println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
continue
end end
for i in toollist for i in toollist
if occursin(i, responsedict[:action_input]) if occursin(i, responsedict[:action_input])
error("Action_name is in action_input which is not allowed.") errornote = "\n action_name is in action_input which is not allowed."
println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
continue
end end
end end
for i [:understanding, :reasoning, :plan, :action_name, :action_input] for i [:understanding, :reasoning, :plan, :action_name, :action_input]
if length(JSON3.write(responsedict[i])) == 0 if length(JSON3.write(responsedict[i])) == 0
error("$i is empty ", @__FILE__, " ", @__LINE__) errornote = "\n $i is empty"
println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
continue
end end
end end
@@ -276,22 +293,15 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function,
for i [:understanding, :reasoning, :plan, :action_name, :action_input] for i [:understanding, :reasoning, :plan, :action_name, :action_input]
matchkeys = GeneralUtils.findMatchingDictKey(responsedict, i) matchkeys = GeneralUtils.findMatchingDictKey(responsedict, i)
if length(matchkeys) > 1 if length(matchkeys) > 1
error("DecisionMaker has more than one key per categories") errornote = "\n $i has more than one key"
println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
continue
end end
end end
state[:decisionMaker] = responsedict state[:decisionMaker] = responsedict
return responsedict return responsedict
catch e
io = IOBuffer()
showerror(io, e)
errorMsg = String(take!(io))
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
println("")
println("\n~~~ SQLLLM decisionMaker() Attempt $attempt. Error occurred: $errorMsg\n$st ", @__FILE__, " ", @__LINE__)
println("")
end
end end
error("DecisionMaker failed to generate a thought ", response) error("DecisionMaker failed to generate a thought ", response)
@@ -462,8 +472,9 @@ function evaluator(state::T1, text2textInstructLLM::Function;
"reasoning" is agent's step-by-step reasoning about the current situation "reasoning" is agent's step-by-step reasoning about the current situation
"plan" is agent's plan to complete the task from the current situation "plan" is agent's plan to complete the task from the current situation
"action_name" is the name of the action taken, which can be one of the following functions: "action_name" is the name of the action taken, which can be one of the following functions:
- TABLEINFO[list_of_table_name], which you can use to get the data type of a table column. "list_of_table_name" is a list of table name you want to get info. e.g. TABLEINFO["table name 1", "table name 2"] - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database.
- GETDATA[SQL], which you can use to get the data from the database. "SQL" is the single SQL command to be executed against the database. For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
"action_input" is the input to the action "action_input" is the input to the action
"observation" is result of the preceding immediate action "observation" is result of the preceding immediate action
@@ -797,7 +808,7 @@ function transition(state::T, args::NamedTuple
# so that other simulation start from this same node is not contaminated with actioninput # so that other simulation start from this same node is not contaminated with actioninput
listAllTable_json(executeSQL) listAllTable_json(executeSQL)
elseif thoughtDict[:action_name] == "TABLEINFO" elseif thoughtDict[:action_name] == "TABLEINFO"
input = copy(JSON3.read(thoughtDict[:action_input])) input = thoughtDict[:action_input] # BUG thoughtDict[:action_input] = "\"wine\""
tableinfo(executeSQL, input) tableinfo(executeSQL, input)
elseif thoughtDict[:action_name] == "GETDATA" elseif thoughtDict[:action_name] == "GETDATA"
response = SQLexecution(executeSQL, thoughtDict[:action_input]) response = SQLexecution(executeSQL, thoughtDict[:action_input])
@@ -967,6 +978,10 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function;
insertSQLVectorDB(resultState[:thoughtHistory][:question], sql) insertSQLVectorDB(resultState[:thoughtHistory][:question], sql)
end end
if extracted === nothing #BUG
println("query() return nothing")
end
return (text=extracted, rawresponse=resultState[:rawresponse]) return (text=extracted, rawresponse=resultState[:rawresponse])
end end

View File

@@ -520,65 +520,6 @@ julia> response = SQLLLM.SQLexecution(executeSQL, sql)
# Signature # Signature
""" """
# function SQLexecution(executeSQL::Function, sql::T
# )::NamedTuple{(:result, :success, :errormsg, :reward, :isterminal), Tuple{Union{DataFrame, Nothing}, Bool, Union{String, Nothing}, Integer, Bool}} where {T<:AbstractString}
# println("\n~~~ 1-01 ", @__FILE__, " ", @__LINE__)
# #XXX dummy SQL. use for testing
# # sql = "SELECT w.wine_name FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.\"food_name\" = 'lamb';"
# # sql = " SELECT w.wine_name FROM wine w JOIN food f ON f.food_name = 'lamb' JOIN wine_food wf ON w.wine_id = wf.wine_id AND f.food_id = wf.food_id GROUP BY w.wine_name ORDER BY COUNT(DISTINCT w.wine_id) DESC;"
# # sql = " SELECT COUNT(DISTINCT wf.wine_id) FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.food_name ILIKE '%lamb%'"
# #XXX use for package testing, remove when done
# # ans = "1.schilfwein zweigelt 2.cabernet sauvignon reserve limited edition"
# # ans = "There are 1500 wines that can be paired with lamb."
# # ans = "1500"
# # return (response=ans, errormsg=nothing, reward=1, isterminal=true)
# # add LIMIT to the SQL to prevent loading large data
# sql = strip(sql)
# println("\n~~~ SQL 1", @__FILE__, " ", @__LINE__)
# println(sql)
# println("\n~~~ 1-02 ", @__FILE__, " ", @__LINE__)
# if sql[end] != ';'
# errorMsg = "Error, SQL execution failed because it does not ended with ';'"
# return (result=nothing, success=false, errormsg=errorMsg, reward=0, isterminal=false)
# end
# println("\n~~~ 1-03 ", @__FILE__, " ", @__LINE__)
# if !occursin("LIMIT", sql)
# # sql = sql[1:end-1] * " LIMIT 100;"
# sql = sql[1:end-1] * " ORDER BY RANDOM() LIMIT 2;"
# end
# println("\n~~~ SQL 2", @__FILE__, " ", @__LINE__)
# println(sql)
# println("\n~~~ 1-1 ", @__FILE__, " ", @__LINE__)
# result = executeSQL(sql)
# println("\n~~~ 1-2 ", @__FILE__, " ", @__LINE__)
# df = DataFrame(result)
# println("\n~~~ raw df ", df)
# tablesize = size(df)
# println("\n~~~ df size ", tablesize)
# println("\n~~~ 6 ", @__FILE__, " ", @__LINE__)
# row = tablesize[1]
# println("\n~~~ 7 ", @__FILE__, " ", @__LINE__)
# if row == 0 # if 0 row
# errorMsg = "The resulting table has 0 row. Possible causes: 1) SQL is incorrect 2) There is no data that match your search criteria."
# return (result=nothing, success=false, errormsg=errorMsg, reward=0, isterminal=false)
# end
# println("\n~~~ 8 ", @__FILE__, " ", @__LINE__)
# df1 =
# if row > 2
# # ramdom row to pick
# df[sample(1:nrow(df), 2, replace=false), :] # random select 2 rows from df
# else
# df
# end
# println("\n~~~ SQLexecution result ", @__FILE__, " ", @__LINE__)
# println(df1)
# return (result=df1, success=true, errormsg=nothing, reward=1, isterminal=true)
# end
function SQLexecution(executeSQL::Function, sql::T function SQLexecution(executeSQL::Function, sql::T
) where {T<:AbstractString} ) where {T<:AbstractString}
@@ -596,9 +537,12 @@ function SQLexecution(executeSQL::Function, sql::T
# add LIMIT to the SQL to prevent loading large data # add LIMIT to the SQL to prevent loading large data
sql = strip(sql) sql = strip(sql)
# remove DISTINCT keyword because it is incompatible with RANDOM()
sql = replace(sql, "DISTINCT" => "")
if sql[end] == ';' if sql[end] == ';'
if !occursin("LIMIT", sql) if !occursin("LIMIT", sql)
# sql = sql[1:end-1] * " LIMIT 100;"
sql = sql[1:end-1] * " ORDER BY RANDOM() LIMIT 2;" sql = sql[1:end-1] * " ORDER BY RANDOM() LIMIT 2;"
end end
else else