module llmfunction export listAllTable_json, listAllTable_str, tableinfo, getdata, finalAnswerBox, getTableNameFromSQL, extractContent_dataframe, SQLexecution using HTTP, JSON3, URIs, Random, PrettyPrinting, UUIDs, LibPQ, Tables, DataFrames, CSV, DataStructures, StatsBase using GeneralUtils, LLMMCTS using ..util # ---------------------------------------------- 100 --------------------------------------------- # """ List all tables in the database and return in JSON format. # Arguments - `executeSQL::Function` A connection object to Postgres database # Return - `NamedTuple{(:result, :success), Tuple{DataFrame, Bool}}` # Example ```jldoctest julia> using LibPQ, SQLLLM julia> function executeSQL(sql) DBconnection = LibPQ.Connection("host=192.168.88.122 port=5432 dbname=xyz user=zyx password=1234") result = LibPQ.execute(DBconnection, sql) close(DBconnection) return result end julia> response = SQLLLM.listAllTable_json(executeSQL) julia> result = response[:result] ``` # Signature """ function listAllTable_json(executeSQL::Function )::NamedTuple{(:result, :success),Tuple{DataFrame,Bool}} sql = """ SELECT table_name, obj_description(relfilenode, 'pg_class') AS table_comment, string_agg(column_name || ' (' || data_type || ')', ', ') AS columns FROM information_schema.columns JOIN pg_class ON table_name = relname WHERE table_schema = 'public' GROUP BY table_name, relfilenode ORDER BY table_name; """ result = executeSQL(sql) df = DataFrame(result) tablesinfo_df = df return (result=tablesinfo_df, success=true) end function listAllTable_str(executeSQL::Function )::NamedTuple{(:result, :success),Tuple{String,Bool}} sql = """ SELECT table_name, obj_description(relfilenode, 'pg_class') AS table_comment, string_agg(column_name || ' (' || data_type || ')', ', ') AS columns FROM information_schema.columns JOIN pg_class ON table_name = relname WHERE table_schema = 'public' GROUP BY table_name, relfilenode ORDER BY table_name; """ result = executeSQL(sql) df = DataFrame(result) tableinfo = "Here are a list of available tables in the database (each row is in this format: table name; table comment; table columns): \n" for i in 1:size(df)[1] table_name = df[i, 1] table_comment = df[i, 2] columns = df[i, 3] tableinfo *= "$i. $table_name; $table_comment; $columns\n" end return (result=tableinfo, success=true) end """ Get table description, column comments and the first 3-rows of the table data # Arguments - `executeSQL::Function` A connection object to Postgres database # Return - `tableinfo::String` # Signature """ function tableinfo_str(executeSQL::Function, tablename::String)::NamedTuple{(:result, :success),Tuple{String,Bool}} sql = """ SELECT column_name, data_type, col_description(format('%s.%s', table_schema, table_name)::regclass::oid, ordinal_position) AS column_comment FROM information_schema.columns WHERE table_name = '$tablename' AND table_schema = 'public'; """ result = executeSQL(sql) df = DataFrame(result) tableinfo = "Here are info of table $tablename (each row is in this format: column name; data type; column comment):\n" for i in 1:size(df)[1] column_name = df[i, 1] column_datatype = df[i, 2] column_comment = df[i, 3] tableinfo *= "$i. $column_name; $column_datatype; $column_comment \n" end return (result=tableinfo, success=true) end """ Get table description, column comments. # Arguments - `executeSQL::Function` A connection object to Postgres database - `tablenames<:AbstractVector` A list of table name to get description # Return - `NamedTuple{(:result), Tuple{String}}` Text contain multiple table info # Example ```jldoctest julia> using SQLLLM, LibPQ julia> function executeSQL(sql) DBconnection = LibPQ.Connection("host=192.168.88.122 port=5432 dbname=xyz user=zyx password=1234") result = LibPQ.execute(DBconnection, sql) close(DBconnection) return result end julia> response = SQLLLM.tableinfo(executeSQL, ["wine", "food"]) julia> result = response[:result] ``` # Signature """ function tableinfo(executeSQL::Function, tablenames::T )::NamedTuple{(:result,),Tuple{String}} where {T<:AbstractVector} # list all tables in a database sql = """ SELECT pg_namespace.nspname AS schema_name, relname AS table_name, pg_catalog.obj_description(pg_class.oid) AS comment FROM pg_class INNER JOIN pg_namespace ON pg_namespace.oid = pg_class.relnamespace WHERE pg_namespace.nspname = 'public' -- Replace 'public' with your desired schema AND pg_class.relkind IN ('r', 't'); """ _result = executeSQL(sql) df = DataFrame(_result) alltable_df = df[:, [:table_name, :comment]] tableNameList = alltable_df.table_name |> collect # check if the requested table name exist in the database notExistingTable = [] for i in tablenames if i ∉ tableNameList push!(notExistingTable, i) end end if !isempty(notExistingTable) result = "Error, the following tables does not exist in the database: $(JSON3.write(notExistingTable))" return (result=result,) end tableInfoStr = "" for i in tablenames x, _ = tableinfo_str(executeSQL, i) tableInfoStr *= x end return (result=tableInfoStr,) end # """ Convert a query process in English into SQL, execute and get the result from the database. # # Arguments # - `query<:AbstractString` # A query to a database in SQL. # - `context::Union{Dict, Nothing}` # A context to be available at transition() # - `executeSQL::Function` # A connection object connected to the database # - `text2textInstructLLM::Function` # A function that handles communication to LLM service. # # Return # - `NamedTuple{(:result, :errormsg, success), Tuple{String, String, Bool}}` # # TODO # - [x] getdata directly using sql execute # # Signature # """ # function getdata(query::T, context::Union{Dict,Nothing}, executeSQL::Function, # text2textInstructLLM::Function; # ) where {T<:AbstractString} # response = SQLexecution(executeSQL, query) # if response[:success] # extracted = extractContent_dataframe(response[:result], context, text2textInstructLLM) # response_ = (result=extracted, errormsg=nothing, success=true) # return response_ # else # response_ = (result=nothing, errormsg=response[:errormsg], success=false) # return response_ # end # end """ # Arguments `v::Integer` dummy variable # Return # Example ```jldoctest julia> ``` # TODO - [] update docstring - [PENDING] implement the function # Signature """ function getdata_evaluator(newstate, config) return (evaluation="None", score=0) end """ State transition # Arguments - `state<:AbstractDict` A game state - `args::NamedTuple` Arguments for various function within transition() # Return - `NamedTuple{(:newNodeKey, :newstate, :progressvalue), Tuple{String, T, Integer}}` # Signature """ function getdata_transition(state::T, args::NamedTuple )::NamedTuple{(:newNodeKey, :newstate, :progressvalue),Tuple{String,T,Integer}} where {T<:AbstractDict} # decisionMaker::Function = args[:decisionMaker] # evaluator::Function = args[:evaluator] # reflector::Function = args[:reflector] context = args[:context] executeSQL::Function = args[:executeSQL] text2textInstructLLM::Function = args[:text2textInstructLLM] thought, sql = if state[:code] !== nothing result = getdata_decisionMaker(state, context, text2textInstructLLM) result[:thought], result[:code] else nothing, state[:question] end # make new state newNodeKey = GeneralUtils.uuid4snakecase() newstate = deepcopy(state) response, success, errormsg, reward, isterminal = if sql !== nothing response, success, errormsg, reward, isterminal = SQLexecution(executeSQL, sql) else (result=nothing, success=false, errormsg="SQL execution failed. An unexpected error occurred. Please try again.", reward=0, isterminal=false) end println("getdata_transition() 1 ", @__FILE__, " ", @__LINE__) newstate[:code] = sql newstate[:response] = response newstate[:errorexplain] = thought newstate[:errormsg] = errormsg newstate[:reward] = reward newstate[:isterminal] = isterminal if response !== nothing extracted = extractContent_dataframe(response, context, text2textInstructLLM) newstate[:response] = extracted end println("getdata_transition() 2 ", @__FILE__, " ", @__LINE__) stateevaluation = "None" progressvalue = 0 return (newNodeKey=newNodeKey, newstate=newstate, progressvalue=progressvalue) end """ Make a decision using LLM # Arguments - `state::Dict` A game state - `context::Dict` Additional context for LLM to use - `text2textInstructLLM::Function` A function to handles communication to LLM # Return - `NamedTuple{(:thought, :code, :success, :errormsg), Tuple{String, String, Bool, Union{String, Nothing}}}` # Signature """ function getdata_decisionMaker(state::Dict, context::Dict, text2textInstructLLM::Function )::NamedTuple{(:thought, :code, :success, :errormsg),Tuple{Union{String,Nothing},Union{String,Nothing},Bool,Union{String,Nothing}}} Hints = "None" # """ # Here are some useful SQL programs: # $usefulSQL # """ # systemmsg = # """ # You are an assistant helping the user to execute SQL code from the user's query. # At each round of conversation, the user will give you: # Context: ... # User intention: ... # Code executed from the last round: ... # Execution error: execution error of the last round code. # You should consider the following guidelines: # - Text information in the database is sometimes stored in lower case. If your search returns empty, try using lower case to search. # You should then respond to the user with: # - thought: Why the code does not complete the task. What does the execution error imply exactly? # - plan: Step-by-step instructions of how to complete the task. # 1) Focus on improving the code from the last round. # 2) Do not create any table in the database. # - code: # 1) Write new improved code. # 2) Do not wrap the code and no comment as it will be executed directly without any modification against the database. # You should only respond in format as described below and nothing more: # thought: ... # plan: # 1) ... # 2) ... # ... # code: ... # Let's begin! # """ systemmsg = """ You are an assistant helping the user to execute SQL code from the user's query. At each round of conversation, the user will give you: Context: ... User intention: ... Code executed from the last round: ... Execution error: execution error of the last round code. You should consider the following guidelines: - Text information in the database is sometimes stored in lower case. If your search returns empty, try using lower case to search. You should then respond to the user with: 1) Understanding: - State your understanding about the current situation. 2) Reasoning: - State your step by step reasoning about the current situation. 3) Plan: Step-by-step instructions of how to complete the task. - Focus on improving the code from the last round. - Do not create any table in the database. 4) Code: - Write new improved code. - Do not wrap the code and no comment as it will be executed directly without any modification against the database. You should only respond in format as described below and nothing more: Understanding: ... Reasoning: ... Plan: 1) ... 2) ... ... Code: ... Let's begin! """ noise = "" note_flag = "" for attempt in 1:10 usermsg = """ Context: $(context[:mentionedTableInfo]) User intention: $(context[:userintention]) Code executed from the last round: $(state[:code]) Execution error: $(state[:errormsg]) $noise $note_flag """ _prompt = [ Dict(:name => "system", :text => systemmsg), Dict(:name => "user", :text => usermsg) ] # put in model format prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct") prompt *= """ <|start_header_id|>assistant<|end_header_id|> """ try response = text2textInstructLLM(prompt) responsedict = GeneralUtils.textToDict(response, ["Understanding", "Reasoning", "Plan", "Code"]; rightmarker=":", symbolkey=true, lowercasekey=true) _code = responsedict[:code] code = strip(_code) if length(code) < 2 error("No code available.") elseif code == state[:code] error("generated code is the same as earlier.") else end # check code if occursin("CREATE TABLE", code) note_flag = "Note: Create new table is not allowed." error("create table is not allowed") elseif occursin("```", code) error("Note: code contains backtick ` which is not allowed") elseif code[end] != ';' error("SQL does not ending with ';'") elseif count(';', code) > 1 error("Multiple SQL statement are not allowed") else end println("\n~~~ getdata_decisionMaker() ", @__FILE__, " ", @__LINE__) pprintln(Dict(responsedict)) return (thought=responsedict[:reasoning], code=code, success=true, errormsg=nothing) catch e io = IOBuffer() showerror(io, e) errorMsg = String(take!(io)) st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace())) print("Attempt $attempt. Error occurred: $errorMsg\n$st") println("") noise = GeneralUtils.randstrings(3, 5) end end return (thought=nothing, code=nothing, success=false, errormsg="Failed to generate SQL after numerous attempts.") end """ Execute a given SQL. # Arguments - `sql::T<:AbstractString` A SQL command - `executeSQL::Function` A connection object to a database # Return - `NamedTuple{(:result, :errormsg, :reward, :isterminal), Tuple{Union{Nothing, DataFrame}, String, Integer, Bool}}` # Example ```jldoctest julia> using LibPQ, SQLLLM julia> function executeSQL(sql) DBconnection = LibPQ.Connection("host=192.168.88.122 port=5432 dbname=xyz user=zyx password=1234") result = LibPQ.execute(DBconnection, sql) close(DBconnection) return result end julia> response = SQLLLM.SQLexecution(executeSQL, sql) ``` # Signature """ # function SQLexecution(executeSQL::Function, sql::T # )::NamedTuple{(:result, :success, :errormsg, :reward, :isterminal), Tuple{Union{DataFrame, Nothing}, Bool, Union{String, Nothing}, Integer, Bool}} where {T<:AbstractString} # println("\n~~~ 1-01 ", @__FILE__, " ", @__LINE__) # #XXX dummy SQL. use for testing # # sql = "SELECT w.wine_name FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.\"food_name\" = 'lamb';" # # sql = " SELECT w.wine_name FROM wine w JOIN food f ON f.food_name = 'lamb' JOIN wine_food wf ON w.wine_id = wf.wine_id AND f.food_id = wf.food_id GROUP BY w.wine_name ORDER BY COUNT(DISTINCT w.wine_id) DESC;" # # sql = " SELECT COUNT(DISTINCT wf.wine_id) FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.food_name ILIKE '%lamb%'" # #XXX use for package testing, remove when done # # ans = "1.schilfwein zweigelt 2.cabernet sauvignon reserve limited edition" # # ans = "There are 1500 wines that can be paired with lamb." # # ans = "1500" # # return (response=ans, errormsg=nothing, reward=1, isterminal=true) # # add LIMIT to the SQL to prevent loading large data # sql = strip(sql) # println("\n~~~ SQL 1", @__FILE__, " ", @__LINE__) # println(sql) # println("\n~~~ 1-02 ", @__FILE__, " ", @__LINE__) # if sql[end] != ';' # errorMsg = "Error, SQL execution failed because it does not ended with ';'" # return (result=nothing, success=false, errormsg=errorMsg, reward=0, isterminal=false) # end # println("\n~~~ 1-03 ", @__FILE__, " ", @__LINE__) # if !occursin("LIMIT", sql) # # sql = sql[1:end-1] * " LIMIT 100;" # sql = sql[1:end-1] * " ORDER BY RANDOM() LIMIT 2;" # end # println("\n~~~ SQL 2", @__FILE__, " ", @__LINE__) # println(sql) # println("\n~~~ 1-1 ", @__FILE__, " ", @__LINE__) # result = executeSQL(sql) # println("\n~~~ 1-2 ", @__FILE__, " ", @__LINE__) # df = DataFrame(result) # println("\n~~~ raw df ", df) # tablesize = size(df) # println("\n~~~ df size ", tablesize) # println("\n~~~ 6 ", @__FILE__, " ", @__LINE__) # row = tablesize[1] # println("\n~~~ 7 ", @__FILE__, " ", @__LINE__) # if row == 0 # if 0 row # errorMsg = "The resulting table has 0 row. Possible causes: 1) SQL is incorrect 2) There is no data that match your search criteria." # return (result=nothing, success=false, errormsg=errorMsg, reward=0, isterminal=false) # end # println("\n~~~ 8 ", @__FILE__, " ", @__LINE__) # df1 = # if row > 2 # # ramdom row to pick # df[sample(1:nrow(df), 2, replace=false), :] # random select 2 rows from df # else # df # end # println("\n~~~ SQLexecution result ", @__FILE__, " ", @__LINE__) # println(df1) # return (result=df1, success=true, errormsg=nothing, reward=1, isterminal=true) # end function SQLexecution(executeSQL::Function, sql::T ) where {T<:AbstractString} try #XXX dummy SQL. use for testing # sql = "SELECT w.wine_name FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.\"food_name\" = 'lamb';" # sql = " SELECT w.wine_name FROM wine w JOIN food f ON f.food_name = 'lamb' JOIN wine_food wf ON w.wine_id = wf.wine_id AND f.food_id = wf.food_id GROUP BY w.wine_name ORDER BY COUNT(DISTINCT w.wine_id) DESC;" # sql = " SELECT COUNT(DISTINCT wf.wine_id) FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.food_name ILIKE '%lamb%'" #XXX use for package testing, remove when done # ans = "1.schilfwein zweigelt 2.cabernet sauvignon reserve limited edition" # ans = "There are 1500 wines that can be paired with lamb." # ans = "1500" # return (response=ans, errormsg=nothing, reward=1, isterminal=true) # add LIMIT to the SQL to prevent loading large data sql = strip(sql) if sql[end] == ';' if !occursin("LIMIT", sql) # sql = sql[1:end-1] * " LIMIT 100;" sql = sql[1:end-1] * " ORDER BY RANDOM() LIMIT 2;" end else sql = sql * ";" end println("\n~~~ SQLexecution() SQL: ", @__FILE__, " ", @__LINE__) println(sql) result = executeSQL(sql) df = DataFrame(result) tablesize = size(df) row, column = tablesize if row == 0 # if 0 row error("The resulting table has 0 row. Possible causes: 1) Your search criteria might be too specific. Relaxing some conditions could yield better results. Remember, you can always refine your search later. 2) There could be a typo in your search query. 3) You might be searching in the wrong place.") elseif column > 30 error("SQL execution failed. An unexpected error occurred. Please try again.") end df1 = if row > 2 # ramdom row to pick df[sample(1:nrow(df), 2, replace=false), :] # random select 2 rows from df else df end println("\n~~~ SQLexecution() result: ", @__FILE__, " ", @__LINE__) println(df1) return (result=df1, success=true, errormsg=nothing) catch e io = IOBuffer() showerror(io, e) errorMsg = String(take!(io)) st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace())) println(errorMsg) response = (result=nothing, success=false, errormsg=errorMsg) return response end end """ Extract content from a dataframe with LLM. # Arguments - `df::DataFrame` A dataframe to be read. - `context::Dict` A dictionary to give LLM more context - `text2textInstructLLM::Function` A function that handles communication to LLM service # Return - `result::String` # Signature """ function extractContent_dataframe(df::DataFrame, text2textInstructLLM::Function )::String tablesize = size(df) row = tablesize[1] column = tablesize[2] #[PENDING] Since selected column depend on the question, there should be a better way to select column on the fly, not hard coded like this. # df1 = # if column > 10 # assuming if columns > 10, agent is getting wine info but the info is too much # selectedcolumn = ["wine_id", # "wine_name", # "winery", # "region", # "country", # "wine_type", # "grape", # "serving_temperature", # "intensity", # "sweetness", # "tannin", # "acidity", # "fizziness", # "tasting_notes"] # df1 = df[:, selectedcolumn] # else # df # end df1 = df dfstr = GeneralUtils.dfToString(df1) systemmsg = """ You are an assistant that readouts the resulting table after the user executing SQL command. At each round of conversation, the user will give you: - User intention: ... - Resulting table dimension: ... - Resulting table: The resulting table after executing the user's intention. You should then respond to the user with: - About_resulting_table: 1) What is the resulting table represent? - Search_summary: 1) Summarize the table's content based on the user intension in verbal English. Here are some example: Bad example (you are not Summarize the table content): there are 2 columns in the table i.e. "cash" and "number". 2) Do not generate additional text. You should only respond in format as described below: About_resulting_table: ... Search_summary: ... Let's begin! """ usermsg = """ Resulting table: $dfstr """ _prompt = [ Dict(:name => "system", :text => systemmsg), Dict(:name => "user", :text => usermsg) ] # put in model format prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct") prompt *= """ <|start_header_id|>assistant<|end_header_id|> """ for i in 1:5 response = text2textInstructLLM(prompt) responsedict = GeneralUtils.textToDict(response, ["About_resulting_table", "Search_summary"], rightmarker=":", symbolkey=true) # result = dfstr result = """ Summary: $(responsedict[:Search_summary]) More details: $dfstr """ if row > 2 result *= "There are many more rows, but they are truncated because there are too many of them." end println("\n~~~ extractContent_dataframe() ", @__FILE__, " ", @__LINE__) println(result) return result end error("Failed to get Code part.") end """ Extract a database's table name that mentioned in SQL # Arguments - `sql<:AbstractString` SQL command - `text2textInstructLLM::Function` A function that handles communication to LLM service # Return - `tablename::Vector{String}` A list of table name # Example ```jldoctest julia> using SQLLLM, UUIDs, GeneralUtils julia> sql = "Get all rows from the \"food\" table where the description contains the word \"lamb\". Then, join this result with the \"wine_food\" table on the \"food_id\" column to get a list of wines that can be paired with lamb. Finally, group the result by the \"wine_id\" column and count the number of unique wines." julia> function text2textInstructLLM(prompt::String) config = Dict( :mqttServerInfo => Dict( :description => "mqtt server info", :port => 1883, :broker => "mqtt.yiem.cc" ), :externalservice => Dict( :text2textinstruct => Dict( :mqtttopic => "/loadbalancer/requestingservice", :description => "text to text service with instruct LLM", :llminfo => Dict(:name => "llama3instruct") ), ) ) # apply LLM specific instruct format externalService = config[:externalservice][:text2textinstruct] msgMeta = GeneralUtils.generate_msgMeta( externalService[:mqtttopic], senderName= "SQLLLM", senderId= string(uuid4()), receiverName= "text2textinstruct", mqttBroker= config[:mqttServerInfo][:broker], mqttBrokerPort= config[:mqttServerInfo][:port], ) outgoingMsg = Dict( :msgMeta=> msgMeta, :payload=> Dict( :text=> prompt, :kwargs=> Dict( :max_tokens=> 512, :stop=> ["<|eot_id|>"], :temperature=> 0.2, ) ) ) _response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg) response = _response[:response][:text] return response end julia> result = SQLLLM.getTableNameFromSQL(sql, text2textInstructLLM) ``` # Signature """ function getTableNameFromSQL(sql::T, text2textInstructLLM::Function)::Vector{String} where {T<:AbstractString} systemmsg = """ Extract table name out of the user query. At each round of conversation, the user will give you: Query: ... You should then respond to the user with: - table_name: a list of table name that the user mentioned in the query. For example, ["color", "type"] You must only respond in format as described below: table_name: ["...", "...", ...] Let's begin! """ usermsg = """ Query: $sql """ _prompt = [ Dict(:name => "system", :text => systemmsg), Dict(:name => "user", :text => usermsg) ] # put in model format prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct") prompt *= """ <|start_header_id|>assistant<|end_header_id|> """ for attempt in 1:5 try response = text2textInstructLLM(prompt) responsedict = GeneralUtils.textToDict(response, ["table_name"], rightmarker=":", symbolkey=true) response = copy(JSON3.read(responsedict[:table_name])) return response catch e io = IOBuffer() showerror(io, e) errorMsg = String(take!(io)) st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace())) println("") println("Attempt $attempt. Error occurred: $errorMsg\n$st") println("") end end error("getTableNameFromSQL failed to generate a thought") end end # module llmfunction