diff --git a/Project.toml b/Project.toml index 2b85257..763aca5 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "SQLLLM" uuid = "2ebc79c7-cc10-4a3a-9665-d2e1d61e63d3" authors = ["narawat lamaiin "] -version = "0.2.1" +version = "0.2.2" [deps] CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" @@ -23,5 +23,5 @@ URIs = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" [compat] -GeneralUtils = "0.1.0" -LLMMCTS = "0.1.2" +GeneralUtils = "0.1, 0.2" +LLMMCTS = "0.1" diff --git a/src/interface.jl b/src/interface.jl index a37a10c..520a706 100644 --- a/src/interface.jl +++ b/src/interface.jl @@ -149,7 +149,6 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, You should follow the following guidelines: - Do not create any table in the database - - Column name can be the same in different tables. Refer to column comments to get more details by using TABLEINFO function - A junction table can be used to link tables together. Another use case is for filtering data. - If you can't find a single table that can be used to answer the user's query, try joining multiple tables to see if you can obtain the answer. - If you are unable to find the requested information, kindly inform the user, "The current data in our database does not provide the specific answer to your query". @@ -162,8 +161,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, - State your step by step reasoning about the current situation. 3) Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific. 4) Action_name (Must be aligned with your plan): Can be one of the following functions: - - TABLEINFO[list_of_table_name], which you can use to get the data type of a table column. "list_of_table_name" is a list of table name you want to get info. e.g. TABLEINFO["table name 1", "table name 2"] - - GETDATA[SQL], which you can use to get the data from the database. "SQL" is a single SQL command to be executed against the database. + - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database. For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator. Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'. 5) Action_input: Input to the action @@ -224,75 +222,87 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, """ <|start_header_id|>assistant<|end_header_id|> """ + response = text2textInstructLLM(prompt) + println("\nSQL decisionMaker() rawresponse: \n", response) - try - response = text2textInstructLLM(prompt) - println("\nSQL decisionMaker() rawresponse: ", response) - - header = ["Understanding", "Reasoning", "Plan", "Action_name", "Action_input", "Observation"] - - # detect if there are more than 1 key per categories - count = GeneralUtils.countGivenWords(response, header) - if sum(count) > length(header) - error("\nSQL decisionMaker() duplicated keywords", @__FILE__, " ", @__LINE__) - end - - # textToDict() search for action_input - responsedict = GeneralUtils.textToDict(response, header, - rightmarker=":", symbolkey=true, lowercasekey=true) - - delete!(responsedict, :observation) - - # remove backticks Error occurred: MethodError: no method matching occursin(::String, ::Vector{String}) - if occursin("```", responsedict[:action_input]) - sql = GeneralUtils.extract_triple_backtick_text(responsedict[:action_input])[1] - if sql[1:4] == "sql\n" - sql = sql[5:end] - end - sql = split(sql, ';') # some time there are comments in the sql - sql = sql[1] * ';' - - responsedict[:action_input] = sql - end - - toollist = ["TABLEINFO", "GETDATA"] - if responsedict[:action_name] ∉ toollist - error("SQL decisionMaker() didn't use the given functions ", @__FILE__, " ", @__LINE__) - end - - for i in toollist - if occursin(i, responsedict[:action_input]) - error("Action_name is in action_input which is not allowed.") - end - end - - for i ∈ [:understanding, :reasoning, :plan, :action_name, :action_input] - if length(JSON3.write(responsedict[i])) == 0 - error("$i is empty ", @__FILE__, " ", @__LINE__) - end - end - - # check if there are more than 1 key per categories - for i ∈ [:understanding, :reasoning, :plan, :action_name, :action_input] - matchkeys = GeneralUtils.findMatchingDictKey(responsedict, i) - if length(matchkeys) > 1 - error("DecisionMaker has more than one key per categories") - end - end - - state[:decisionMaker] = responsedict - - return responsedict - catch e - io = IOBuffer() - showerror(io, e) - errorMsg = String(take!(io)) - st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace())) - println("") - println("\n~~~ SQLLLM decisionMaker() Attempt $attempt. Error occurred: $errorMsg\n$st ", @__FILE__, " ", @__LINE__) - println("") + if occursin("NULL", response) + errornote = "\nSQL decisionMaker() NULL response is not allowed" + println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__) + continue end + header = ["Understanding", "Reasoning", "Plan", "Action_name", "Action_input", "Observation"] + + # detect if there are more than 1 key per categories + count = GeneralUtils.countGivenWords(response, header) + duplicateKeywordFlag = false + for (i, v) in enumerate(count) + keyword = header[i] + keywordNumber = v + if keywordNumber > 1 + errornote = "\nSQL query has duplicated keyword, $keyword" + println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__) + duplicateKeywordFlag = true + break + end + end + duplicateKeywordFlag == true ? continue : nothing + + # textToDict() search for action_input + responsedict = GeneralUtils.textToDict(response, header, + rightmarker=":", symbolkey=true, lowercasekey=true) + + delete!(responsedict, :observation) + + # remove backticks Error occurred: MethodError: no method matching occursin(::String, ::Vector{String}) + if occursin("```", responsedict[:action_input]) + sql = GeneralUtils.extract_triple_backtick_text(responsedict[:action_input])[1] + if sql[1:4] == "sql\n" + sql = sql[5:end] + end + sql = split(sql, ';') # some time there are comments in the sql + sql = sql[1] * ';' + + responsedict[:action_input] = sql + end + + toollist = ["TABLEINFO", "GETDATA"] + if responsedict[:action_name] ∉ toollist + errornote = "\nYou must only use the given functions" + println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__) + continue + end + + for i in toollist + if occursin(i, responsedict[:action_input]) + errornote = "\n action_name is in action_input which is not allowed." + println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__) + continue + end + end + + for i ∈ [:understanding, :reasoning, :plan, :action_name, :action_input] + if length(JSON3.write(responsedict[i])) == 0 + errornote = "\n $i is empty" + println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__) + continue + end + end + + # check if there are more than 1 key per categories + for i ∈ [:understanding, :reasoning, :plan, :action_name, :action_input] + matchkeys = GeneralUtils.findMatchingDictKey(responsedict, i) + if length(matchkeys) > 1 + errornote = "\n $i has more than one key" + println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__) + continue + end + end + + state[:decisionMaker] = responsedict + + return responsedict + end error("DecisionMaker failed to generate a thought ", response) end @@ -462,8 +472,9 @@ function evaluator(state::T1, text2textInstructLLM::Function; "reasoning" is agent's step-by-step reasoning about the current situation "plan" is agent's plan to complete the task from the current situation "action_name" is the name of the action taken, which can be one of the following functions: - - TABLEINFO[list_of_table_name], which you can use to get the data type of a table column. "list_of_table_name" is a list of table name you want to get info. e.g. TABLEINFO["table name 1", "table name 2"] - - GETDATA[SQL], which you can use to get the data from the database. "SQL" is the single SQL command to be executed against the database. + - GETDATA, which you can use to get the data from the database. Action_input for this function must be a single SQL query to be executed against the database. + For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator. + Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'. "action_input" is the input to the action "observation" is result of the preceding immediate action @@ -797,7 +808,7 @@ function transition(state::T, args::NamedTuple # so that other simulation start from this same node is not contaminated with actioninput listAllTable_json(executeSQL) elseif thoughtDict[:action_name] == "TABLEINFO" - input = copy(JSON3.read(thoughtDict[:action_input])) + input = thoughtDict[:action_input] # BUG thoughtDict[:action_input] = "\"wine\"" tableinfo(executeSQL, input) elseif thoughtDict[:action_name] == "GETDATA" response = SQLexecution(executeSQL, thoughtDict[:action_input]) @@ -967,6 +978,10 @@ function query(query::T, executeSQL::Function, text2textInstructLLM::Function; insertSQLVectorDB(resultState[:thoughtHistory][:question], sql) end + if extracted === nothing #BUG + println("query() return nothing") + end + return (text=extracted, rawresponse=resultState[:rawresponse]) end diff --git a/src/llmfunction.jl b/src/llmfunction.jl index 261b9e4..035e85e 100644 --- a/src/llmfunction.jl +++ b/src/llmfunction.jl @@ -520,65 +520,6 @@ julia> response = SQLLLM.SQLexecution(executeSQL, sql) # Signature """ -# function SQLexecution(executeSQL::Function, sql::T -# )::NamedTuple{(:result, :success, :errormsg, :reward, :isterminal), Tuple{Union{DataFrame, Nothing}, Bool, Union{String, Nothing}, Integer, Bool}} where {T<:AbstractString} -# println("\n~~~ 1-01 ", @__FILE__, " ", @__LINE__) -# #XXX dummy SQL. use for testing -# # sql = "SELECT w.wine_name FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.\"food_name\" = 'lamb';" -# # sql = " SELECT w.wine_name FROM wine w JOIN food f ON f.food_name = 'lamb' JOIN wine_food wf ON w.wine_id = wf.wine_id AND f.food_id = wf.food_id GROUP BY w.wine_name ORDER BY COUNT(DISTINCT w.wine_id) DESC;" -# # sql = " SELECT COUNT(DISTINCT wf.wine_id) FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.food_name ILIKE '%lamb%'" - -# #XXX use for package testing, remove when done -# # ans = "1.schilfwein zweigelt 2.cabernet sauvignon reserve limited edition" -# # ans = "There are 1500 wines that can be paired with lamb." -# # ans = "1500" -# # return (response=ans, errormsg=nothing, reward=1, isterminal=true) - -# # add LIMIT to the SQL to prevent loading large data -# sql = strip(sql) -# println("\n~~~ SQL 1", @__FILE__, " ", @__LINE__) -# println(sql) -# println("\n~~~ 1-02 ", @__FILE__, " ", @__LINE__) - -# if sql[end] != ';' -# errorMsg = "Error, SQL execution failed because it does not ended with ';'" -# return (result=nothing, success=false, errormsg=errorMsg, reward=0, isterminal=false) -# end -# println("\n~~~ 1-03 ", @__FILE__, " ", @__LINE__) -# if !occursin("LIMIT", sql) -# # sql = sql[1:end-1] * " LIMIT 100;" -# sql = sql[1:end-1] * " ORDER BY RANDOM() LIMIT 2;" -# end - -# println("\n~~~ SQL 2", @__FILE__, " ", @__LINE__) -# println(sql) -# println("\n~~~ 1-1 ", @__FILE__, " ", @__LINE__) -# result = executeSQL(sql) -# println("\n~~~ 1-2 ", @__FILE__, " ", @__LINE__) -# df = DataFrame(result) -# println("\n~~~ raw df ", df) -# tablesize = size(df) -# println("\n~~~ df size ", tablesize) -# println("\n~~~ 6 ", @__FILE__, " ", @__LINE__) -# row = tablesize[1] -# println("\n~~~ 7 ", @__FILE__, " ", @__LINE__) -# if row == 0 # if 0 row -# errorMsg = "The resulting table has 0 row. Possible causes: 1) SQL is incorrect 2) There is no data that match your search criteria." -# return (result=nothing, success=false, errormsg=errorMsg, reward=0, isterminal=false) -# end -# println("\n~~~ 8 ", @__FILE__, " ", @__LINE__) -# df1 = -# if row > 2 -# # ramdom row to pick -# df[sample(1:nrow(df), 2, replace=false), :] # random select 2 rows from df -# else -# df -# end - -# println("\n~~~ SQLexecution result ", @__FILE__, " ", @__LINE__) -# println(df1) -# return (result=df1, success=true, errormsg=nothing, reward=1, isterminal=true) -# end function SQLexecution(executeSQL::Function, sql::T ) where {T<:AbstractString} @@ -596,9 +537,12 @@ function SQLexecution(executeSQL::Function, sql::T # add LIMIT to the SQL to prevent loading large data sql = strip(sql) + + # remove DISTINCT keyword because it is incompatible with RANDOM() + sql = replace(sql, "DISTINCT" => "") + if sql[end] == ';' if !occursin("LIMIT", sql) - # sql = sql[1:end-1] * " LIMIT 100;" sql = sql[1:end-1] * " ORDER BY RANDOM() LIMIT 2;" end else