update
This commit is contained in:
103
src/SQLLLM.jl
Normal file
103
src/SQLLLM.jl
Normal file
@@ -0,0 +1,103 @@
|
||||
module SQLLLM
|
||||
|
||||
# export
|
||||
|
||||
|
||||
""" Order by dependencies of each file. The 1st included file must not depend on any other
|
||||
files and each file can only depend on the file included before it.
|
||||
"""
|
||||
|
||||
include("type.jl")
|
||||
using .type
|
||||
|
||||
include("util.jl")
|
||||
using .util
|
||||
|
||||
include("llmfunction.jl")
|
||||
using .llmfunction
|
||||
|
||||
include("interface.jl")
|
||||
using .interface
|
||||
|
||||
|
||||
# ---------------------------------------------- 100 --------------------------------------------- #
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
end # module SQLLLM
|
||||
1342
src/interface.jl
Normal file
1342
src/interface.jl
Normal file
File diff suppressed because it is too large
Load Diff
955
src/llmfunction.jl
Normal file
955
src/llmfunction.jl
Normal file
@@ -0,0 +1,955 @@
|
||||
module llmfunction
|
||||
|
||||
export listAllTable_json, listAllTable_str, tableinfo, getdata, finalAnswerBox,
|
||||
getTableNameFromSQL, extractContent_dataframe, SQLexecution
|
||||
|
||||
using HTTP, JSON3, URIs, Random, PrettyPrinting, UUIDs, LibPQ, Tables, DataFrames, CSV,
|
||||
DataStructures, StatsBase
|
||||
using GeneralUtils, LLMMCTS
|
||||
using ..util
|
||||
|
||||
# ---------------------------------------------- 100 --------------------------------------------- #
|
||||
|
||||
|
||||
""" List all tables in the database and return in JSON format.
|
||||
|
||||
# Arguments
|
||||
- `executeSQL::Function`
|
||||
A connection object to Postgres database
|
||||
|
||||
# Return
|
||||
- `NamedTuple{(:result, :success), Tuple{DataFrame, Bool}}`
|
||||
|
||||
# Example
|
||||
```jldoctest
|
||||
julia> using LibPQ, SQLLLM
|
||||
julia> function executeSQL(sql)
|
||||
DBconnection = LibPQ.Connection("host=192.168.88.122 port=5432 dbname=xyz user=zyx password=1234")
|
||||
result = LibPQ.execute(DBconnection, sql)
|
||||
close(DBconnection)
|
||||
return result
|
||||
end
|
||||
julia> response = SQLLLM.listAllTable_json(executeSQL)
|
||||
julia> result = response[:result]
|
||||
```
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function listAllTable_json(executeSQL::Function
|
||||
)::NamedTuple{(:result, :success), Tuple{DataFrame, Bool}}
|
||||
|
||||
sql =
|
||||
"""
|
||||
SELECT
|
||||
table_name,
|
||||
obj_description(relfilenode, 'pg_class') AS table_comment,
|
||||
string_agg(column_name || ' (' || data_type || ')', ', ') AS columns
|
||||
FROM
|
||||
information_schema.columns
|
||||
JOIN
|
||||
pg_class ON table_name = relname
|
||||
WHERE
|
||||
table_schema = 'public'
|
||||
GROUP BY
|
||||
table_name, relfilenode
|
||||
ORDER BY
|
||||
table_name;
|
||||
"""
|
||||
|
||||
result = executeSQL(sql)
|
||||
df = DataFrame(result)
|
||||
tablesinfo_df = df
|
||||
|
||||
return (result=tablesinfo_df, success=true)
|
||||
end
|
||||
|
||||
|
||||
function listAllTable_str(executeSQL::Function
|
||||
)::NamedTuple{(:result, :success), Tuple{String, Bool}}
|
||||
sql =
|
||||
"""
|
||||
SELECT
|
||||
table_name,
|
||||
obj_description(relfilenode, 'pg_class') AS table_comment,
|
||||
string_agg(column_name || ' (' || data_type || ')', ', ') AS columns
|
||||
FROM
|
||||
information_schema.columns
|
||||
JOIN
|
||||
pg_class ON table_name = relname
|
||||
WHERE
|
||||
table_schema = 'public'
|
||||
GROUP BY
|
||||
table_name, relfilenode
|
||||
ORDER BY
|
||||
table_name;
|
||||
"""
|
||||
result = executeSQL(sql)
|
||||
df = DataFrame(result)
|
||||
tableinfo = "Here are a list of available tables in the database (each row is in this format: table name; table comment; table columns): \n"
|
||||
for i in 1:size(df)[1]
|
||||
table_name = df[i, 1]
|
||||
table_comment = df[i, 2]
|
||||
columns = df[i, 3]
|
||||
tableinfo *= "$i. $table_name; $table_comment; $columns\n"
|
||||
end
|
||||
return (result=tableinfo, success=true)
|
||||
end
|
||||
|
||||
|
||||
""" Get table description, column comments and the first 3-rows of the table data
|
||||
|
||||
# Arguments
|
||||
- `executeSQL::Function`
|
||||
A connection object to Postgres database
|
||||
|
||||
# Return
|
||||
- `tableinfo::String`
|
||||
|
||||
# Signature
|
||||
"""
|
||||
|
||||
|
||||
function tableinfo_str(executeSQL::Function, tablename::String)::NamedTuple{(:result, :success), Tuple{String, Bool}}
|
||||
|
||||
sql =
|
||||
"""
|
||||
SELECT
|
||||
column_name,
|
||||
data_type,
|
||||
col_description(format('%s.%s', table_schema, table_name)::regclass::oid, ordinal_position) AS column_comment
|
||||
FROM
|
||||
information_schema.columns
|
||||
WHERE
|
||||
table_name = '$tablename'
|
||||
AND table_schema = 'public';
|
||||
"""
|
||||
|
||||
result = executeSQL(sql)
|
||||
df = DataFrame(result)
|
||||
|
||||
tableinfo = "Here are info of table $tablename (each row is in this format: column name; data type; column comment):\n"
|
||||
for i in 1:size(df)[1]
|
||||
column_name = df[i, 1]
|
||||
column_datatype = df[i, 2]
|
||||
column_comment = df[i, 3]
|
||||
tableinfo *= "$i. $column_name; $column_datatype; $column_comment \n"
|
||||
end
|
||||
|
||||
return (result=tableinfo, success=true)
|
||||
end
|
||||
|
||||
|
||||
""" Get table description, column comments.
|
||||
|
||||
# Arguments
|
||||
- `executeSQL::Function`
|
||||
A connection object to Postgres database
|
||||
- `tablenames<:AbstractVector`
|
||||
A list of table name to get description
|
||||
|
||||
# Return
|
||||
- `NamedTuple{(:result), Tuple{String}}`
|
||||
Text contain multiple table info
|
||||
|
||||
# Example
|
||||
```jldoctest
|
||||
julia> using SQLLLM, LibPQ
|
||||
julia> function executeSQL(sql)
|
||||
DBconnection = LibPQ.Connection("host=192.168.88.122 port=5432 dbname=xyz user=zyx password=1234")
|
||||
result = LibPQ.execute(DBconnection, sql)
|
||||
close(DBconnection)
|
||||
return result
|
||||
end
|
||||
julia> response = SQLLLM.tableinfo(executeSQL, ["wine", "food"])
|
||||
julia> result = response[:result]
|
||||
```
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function tableinfo(executeSQL::Function, tablenames::T
|
||||
)::NamedTuple{(:result,), Tuple{String}} where {T<:AbstractVector}
|
||||
# list all tables in a database
|
||||
sql =
|
||||
"""
|
||||
SELECT pg_namespace.nspname AS schema_name,
|
||||
relname AS table_name,
|
||||
pg_catalog.obj_description(pg_class.oid) AS comment
|
||||
FROM pg_class
|
||||
INNER JOIN pg_namespace ON pg_namespace.oid = pg_class.relnamespace
|
||||
WHERE pg_namespace.nspname = 'public' -- Replace 'public' with your desired schema
|
||||
AND pg_class.relkind IN ('r', 't');
|
||||
"""
|
||||
|
||||
_result = executeSQL(sql)
|
||||
df = DataFrame(_result)
|
||||
alltable_df = df[:, [:table_name, :comment]]
|
||||
tableNameList = alltable_df.table_name |> collect
|
||||
|
||||
# check if the requested table name exist in the database
|
||||
notExistingTable = []
|
||||
for i in tablenames
|
||||
if i ∉ tableNameList
|
||||
push!(notExistingTable, i)
|
||||
end
|
||||
end
|
||||
if !isempty(notExistingTable)
|
||||
result =
|
||||
"Error, the following tables does not exist in the database: $(JSON3.write(notExistingTable))"
|
||||
return (result=result,)
|
||||
end
|
||||
|
||||
tableInfoStr = ""
|
||||
for i in tablenames
|
||||
x, _ = tableinfo_str(executeSQL, i)
|
||||
tableInfoStr *= x
|
||||
end
|
||||
|
||||
return (result=tableInfoStr,)
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
""" Convert a query process in English into SQL, execute and get the result from the database.
|
||||
|
||||
# Arguments
|
||||
- `query<:AbstractString`
|
||||
A query to a database in SQL.
|
||||
- `context::Union{Dict, Nothing}`
|
||||
A context to be available at transition()
|
||||
- `executeSQL::Function`
|
||||
A connection object connected to the database
|
||||
- `text2textInstructLLM::Function`
|
||||
A function that handles communication to LLM service.
|
||||
|
||||
# Return
|
||||
- `NamedTuple{(:result, :errormsg, success), Tuple{String, String, Bool}}`
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function getdata(query::T, context::Union{Dict, Nothing}, executeSQL::Function,
|
||||
text2textInstructLLM::Function;
|
||||
)::NamedTuple{(:result, :errormsg, :success), Tuple{String, Union{String, Nothing}, Bool}} where {T<:AbstractString}
|
||||
|
||||
# get table info here because it'll be called only 1-time. If this function is in
|
||||
# getdata_decisionMaker(), it'll be called everytime
|
||||
mentionedtable = getTableNameFromSQL(query, text2textInstructLLM)
|
||||
mentionedTableInfo = tableinfo(executeSQL, mentionedtable)[:result]
|
||||
context[:mentionedTableInfo] = mentionedTableInfo
|
||||
|
||||
initialstate = Dict{Symbol, Any}(
|
||||
:reward=> 0,
|
||||
:isterminal=> false,
|
||||
:evaluation=> nothing,
|
||||
:errormsg=> nothing,
|
||||
:errorexplain=> nothing,
|
||||
|
||||
:question=> query,
|
||||
:code=> nothing,
|
||||
:response=> nothing,
|
||||
)
|
||||
|
||||
transitionargs = (
|
||||
# decisionMaker=getdata_decisionMaker,
|
||||
# evaluator=getdata_evaluator,
|
||||
# reflector=getdata_reflector,
|
||||
context=context,
|
||||
executeSQL=executeSQL,
|
||||
text2textInstructLLM=text2textInstructLLM
|
||||
)
|
||||
result_1, result_2 = LLMMCTS.runMCTS(initialstate, getdata_transition, transitionargs;
|
||||
totalsample=1, maxdepth=3, maxiterations=1, explorationweight=1.0)
|
||||
|
||||
if result_2[:isterminal] == true
|
||||
return (result=result_2[:response], errormsg=nothing, success=true) # succues=true to finish getdata()
|
||||
else
|
||||
# return (response="Failed to act with the following error message: $(result_2[:errorexplain])", select=nothing, reward=0, success=false)
|
||||
return (result="Failed to get the data. $(result_1[:errormsg])",
|
||||
errormsg=result_1[:errormsg], success=false)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
|
||||
# Arguments
|
||||
`v::Integer`
|
||||
dummy variable
|
||||
|
||||
# Return
|
||||
|
||||
# Example
|
||||
```jldoctest
|
||||
julia>
|
||||
```
|
||||
|
||||
# TODO
|
||||
- [] update docstring
|
||||
- [PENDING] implement the function
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function getdata_evaluator(newstate, config)
|
||||
|
||||
return (evaluation="None", score=0)
|
||||
end
|
||||
|
||||
|
||||
""" State transition
|
||||
|
||||
# Arguments
|
||||
- `state<:AbstractDict`
|
||||
A game state
|
||||
- `args::NamedTuple`
|
||||
Arguments for various function within transition()
|
||||
|
||||
# Return
|
||||
- `NamedTuple{(:newNodeKey, :newstate, :progressvalue), Tuple{String, T, Integer}}`
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function getdata_transition(state::T, args::NamedTuple
|
||||
)::NamedTuple{(:newNodeKey, :newstate, :progressvalue), Tuple{String, T, Integer}} where {T<:AbstractDict}
|
||||
|
||||
|
||||
# decisionMaker::Function = args[:decisionMaker]
|
||||
# evaluator::Function = args[:evaluator]
|
||||
# reflector::Function = args[:reflector]
|
||||
context = args[:context]
|
||||
executeSQL::Function = args[:executeSQL]
|
||||
text2textInstructLLM::Function = args[:text2textInstructLLM]
|
||||
|
||||
thought, sql =
|
||||
if state[:code] !== nothing
|
||||
result = getdata_decisionMaker(state, context, text2textInstructLLM)
|
||||
result[:thought], result[:code]
|
||||
else
|
||||
nothing, state[:question]
|
||||
end
|
||||
|
||||
# make new state
|
||||
newNodeKey = GeneralUtils.uuid4snakecase()
|
||||
newstate = deepcopy(state)
|
||||
|
||||
response, success, errormsg, reward, isterminal =
|
||||
if sql !== nothing
|
||||
response, success, errormsg, reward, isterminal = SQLexecution(executeSQL, sql)
|
||||
else
|
||||
(result= nothing,
|
||||
success= false,
|
||||
errormsg= "SQL execution failed. An unexpected error occurred. Please try again.",
|
||||
reward=0,
|
||||
isterminal=false)
|
||||
end
|
||||
|
||||
newstate[:code] = sql
|
||||
newstate[:response] = response
|
||||
newstate[:errorexplain] = thought
|
||||
newstate[:errormsg] = errormsg
|
||||
newstate[:reward] = reward
|
||||
newstate[:isterminal] = isterminal
|
||||
if response !== nothing
|
||||
extracted = extractContent_dataframe(response, context, text2textInstructLLM)
|
||||
newstate[:response] = extracted
|
||||
end
|
||||
|
||||
stateevaluation = "None"
|
||||
progressvalue = 0
|
||||
|
||||
return (newNodeKey=newNodeKey, newstate=newstate, progressvalue=progressvalue)
|
||||
end
|
||||
|
||||
|
||||
""" Make a decision using LLM
|
||||
|
||||
# Arguments
|
||||
- `state::Dict`
|
||||
A game state
|
||||
- `context::Dict`
|
||||
Additional context for LLM to use
|
||||
- `text2textInstructLLM::Function`
|
||||
A function to handles communication to LLM
|
||||
|
||||
# Return
|
||||
- `NamedTuple{(:thought, :code, :success, :errormsg), Tuple{String, String, Bool, Union{String, Nothing}}}`
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function getdata_decisionMaker(state::Dict, context::Dict, text2textInstructLLM::Function
|
||||
)::NamedTuple{(:thought, :code, :success, :errormsg), Tuple{Union{String, Nothing}, Union{String, Nothing}, Bool, Union{String, Nothing}}}
|
||||
|
||||
Hints = "None"
|
||||
|
||||
# """
|
||||
# Here are some useful SQL programs:
|
||||
# $usefulSQL
|
||||
# """
|
||||
|
||||
# systemmsg =
|
||||
# """
|
||||
# You are an assistant helping the user to execute SQL code from the user's query.
|
||||
|
||||
# At each round of conversation, the user will give you:
|
||||
# Context: ...
|
||||
# User intention: ...
|
||||
# Code executed from the last round: ...
|
||||
# Execution error: execution error of the last round code.
|
||||
|
||||
# You should consider the following guidelines:
|
||||
# - Text information in the database is sometimes stored in lower case. If your search returns empty, try using lower case to search.
|
||||
|
||||
# You should then respond to the user with:
|
||||
# - thought: Why the code does not complete the task. What does the execution error imply exactly?
|
||||
# - plan: Step-by-step instructions of how to complete the task.
|
||||
# 1) Focus on improving the code from the last round.
|
||||
# 2) Do not create any table in the database.
|
||||
# - code:
|
||||
# 1) Write new improved code.
|
||||
# 2) Do not wrap the code and no comment as it will be executed directly without any modification against the database.
|
||||
|
||||
# You should only respond in format as described below and nothing more:
|
||||
# thought: ...
|
||||
# plan:
|
||||
# 1) ...
|
||||
# 2) ...
|
||||
# ...
|
||||
# code: ...
|
||||
|
||||
# Let's begin!
|
||||
# """
|
||||
|
||||
systemmsg =
|
||||
"""
|
||||
You are an assistant helping the user to execute SQL code from the user's query.
|
||||
|
||||
At each round of conversation, the user will give you:
|
||||
Context: ...
|
||||
User intention: ...
|
||||
Code executed from the last round: ...
|
||||
Execution error: execution error of the last round code.
|
||||
|
||||
You should consider the following guidelines:
|
||||
- Text information in the database is sometimes stored in lower case. If your search returns empty, try using lower case to search.
|
||||
|
||||
You should then respond to the user with:
|
||||
1) Understanding:
|
||||
- State your understanding about the current situation.
|
||||
2) Reasoning:
|
||||
- State your step by step reasoning about the current situation.
|
||||
3) Plan: Step-by-step instructions of how to complete the task.
|
||||
- Focus on improving the code from the last round.
|
||||
- Do not create any table in the database.
|
||||
4) Code:
|
||||
- Write new improved code.
|
||||
- Do not wrap the code and no comment as it will be executed directly without any modification against the database.
|
||||
|
||||
You should only respond in format as described below and nothing more:
|
||||
Understanding: ...
|
||||
Reasoning: ...
|
||||
Plan:
|
||||
1) ...
|
||||
2) ...
|
||||
...
|
||||
Code: ...
|
||||
|
||||
Let's begin!
|
||||
"""
|
||||
|
||||
note_flag = ""
|
||||
for attempt in 1:10
|
||||
usermsg =
|
||||
"""
|
||||
Context:
|
||||
$(context[:mentionedTableInfo])
|
||||
User intention: $(context[:userintention])
|
||||
Code executed from the last round: $(state[:code])
|
||||
Execution error: $(state[:errormsg])
|
||||
$note_flag
|
||||
"""
|
||||
|
||||
_prompt =
|
||||
[
|
||||
Dict(:name=> "system", :text=> systemmsg),
|
||||
Dict(:name=> "user", :text=> usermsg)
|
||||
]
|
||||
|
||||
# put in model format
|
||||
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
|
||||
prompt *=
|
||||
"""
|
||||
<|start_header_id|>assistant<|end_header_id|>
|
||||
"""
|
||||
try
|
||||
response = text2textInstructLLM(prompt)
|
||||
responsedict = GeneralUtils.textToDict(response,
|
||||
["Understanding", "Reasoning", "Plan", "Code"];
|
||||
rightmarker=":", symbolkey=true, lowercasekey=true)
|
||||
_code = responsedict[:code]
|
||||
code = strip(_code)
|
||||
|
||||
if length(code) < 2
|
||||
error("No code available.")
|
||||
elseif code == state[:code]
|
||||
error("generated code is the same as earlier.")
|
||||
else
|
||||
end
|
||||
|
||||
# check code
|
||||
if occursin("CREATE TABLE", code)
|
||||
note_flag = "Note: Create new table is not allowed."
|
||||
error("create table is not allowed")
|
||||
elseif occursin("```", code)
|
||||
error("Note: code contains backtick ` which is not allowed")
|
||||
elseif code[end] != ';'
|
||||
error("SQL does not ending with ';'")
|
||||
elseif count(';', code) > 1
|
||||
error("Multiple SQL statement are not allowed")
|
||||
else
|
||||
end
|
||||
|
||||
println("--> getdata_decisionMaker() ", @__FILE__, " ", @__LINE__)
|
||||
pprintln(Dict(responsedict))
|
||||
return (thought=responsedict[:reasoning], code=code, success=true, errormsg=nothing)
|
||||
catch e
|
||||
io = IOBuffer()
|
||||
showerror(io, e)
|
||||
errorMsg = String(take!(io))
|
||||
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
|
||||
print("Attempt $attempt. Error occurred: $errorMsg\n$st")
|
||||
println("")
|
||||
end
|
||||
end
|
||||
return (thought=nothing, code=nothing, success=false,
|
||||
errormsg="Failed to generate SQL after numerous attempts.")
|
||||
end
|
||||
|
||||
""" Execute a given SQL.
|
||||
|
||||
# Arguments
|
||||
- `sql::T<:AbstractString`
|
||||
A SQL command
|
||||
- `executeSQL::Function`
|
||||
A connection object to a database
|
||||
|
||||
# Return
|
||||
- `NamedTuple{(:result, :errormsg, :reward, :isterminal), Tuple{Union{Nothing, DataFrame}, String, Integer, Bool}}`
|
||||
|
||||
# Example
|
||||
```jldoctest
|
||||
julia> using LibPQ, SQLLLM
|
||||
julia> function executeSQL(sql)
|
||||
DBconnection = LibPQ.Connection("host=192.168.88.122 port=5432 dbname=xyz user=zyx password=1234")
|
||||
result = LibPQ.execute(DBconnection, sql)
|
||||
close(DBconnection)
|
||||
return result
|
||||
end
|
||||
julia> response = SQLLLM.SQLexecution(executeSQL, sql)
|
||||
```
|
||||
|
||||
# Signature
|
||||
"""
|
||||
# function SQLexecution(executeSQL::Function, sql::T
|
||||
# )::NamedTuple{(:result, :success, :errormsg, :reward, :isterminal), Tuple{Union{DataFrame, Nothing}, Bool, Union{String, Nothing}, Integer, Bool}} where {T<:AbstractString}
|
||||
# println("--> 1-01 ", @__FILE__, " ", @__LINE__)
|
||||
# #XXX dummy SQL. use for testing
|
||||
# # sql = "SELECT w.wine_name FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.\"food_name\" = 'lamb';"
|
||||
# # sql = " SELECT w.wine_name FROM wine w JOIN food f ON f.food_name = 'lamb' JOIN wine_food wf ON w.wine_id = wf.wine_id AND f.food_id = wf.food_id GROUP BY w.wine_name ORDER BY COUNT(DISTINCT w.wine_id) DESC;"
|
||||
# # sql = " SELECT COUNT(DISTINCT wf.wine_id) FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.food_name ILIKE '%lamb%'"
|
||||
|
||||
# #XXX use for package testing, remove when done
|
||||
# # ans = "1.schilfwein zweigelt 2.cabernet sauvignon reserve limited edition"
|
||||
# # ans = "There are 1500 wines that can be paired with lamb."
|
||||
# # ans = "1500"
|
||||
# # return (response=ans, errormsg=nothing, reward=1, isterminal=true)
|
||||
|
||||
# # add LIMIT to the SQL to prevent loading large data
|
||||
# sql = strip(sql)
|
||||
# println("--> SQL 1", @__FILE__, " ", @__LINE__)
|
||||
# println(sql)
|
||||
# println("--> 1-02 ", @__FILE__, " ", @__LINE__)
|
||||
|
||||
# if sql[end] != ';'
|
||||
# errorMsg = "Error, SQL execution failed because it does not ended with ';'"
|
||||
# return (result=nothing, success=false, errormsg=errorMsg, reward=0, isterminal=false)
|
||||
# end
|
||||
# println("--> 1-03 ", @__FILE__, " ", @__LINE__)
|
||||
# if !occursin("LIMIT", sql)
|
||||
# # sql = sql[1:end-1] * " LIMIT 100;"
|
||||
# sql = sql[1:end-1] * " ORDER BY RANDOM() LIMIT 2;"
|
||||
# end
|
||||
|
||||
# println("--> SQL 2", @__FILE__, " ", @__LINE__)
|
||||
# println(sql)
|
||||
# println("--> 1-1 ", @__FILE__, " ", @__LINE__)
|
||||
# result = executeSQL(sql)
|
||||
# println("--> 1-2 ", @__FILE__, " ", @__LINE__)
|
||||
# df = DataFrame(result)
|
||||
# println("--> raw df ", df)
|
||||
# tablesize = size(df)
|
||||
# println("--> df size ", tablesize)
|
||||
# println("--> 6 ", @__FILE__, " ", @__LINE__)
|
||||
# row = tablesize[1]
|
||||
# println("--> 7 ", @__FILE__, " ", @__LINE__)
|
||||
# if row == 0 # if 0 row
|
||||
# errorMsg = "The resulting table has 0 row. Possible causes: 1) SQL is incorrect 2) There is no data that match your search criteria."
|
||||
# return (result=nothing, success=false, errormsg=errorMsg, reward=0, isterminal=false)
|
||||
# end
|
||||
# println("--> 8 ", @__FILE__, " ", @__LINE__)
|
||||
# df1 =
|
||||
# if row > 2
|
||||
# # ramdom row to pick
|
||||
# df[sample(1:nrow(df), 2, replace=false), :] # random select 2 rows from df
|
||||
# else
|
||||
# df
|
||||
# end
|
||||
|
||||
# println("--> SQLexecution result ", @__FILE__, " ", @__LINE__)
|
||||
# println(df1)
|
||||
# return (result=df1, success=true, errormsg=nothing, reward=1, isterminal=true)
|
||||
# end
|
||||
function SQLexecution(executeSQL::Function, sql::T
|
||||
)::NamedTuple{(:result, :success, :errormsg, :reward, :isterminal), Tuple{Union{DataFrame, Nothing}, Bool, Union{String, Nothing}, Integer, Bool}} where {T<:AbstractString}
|
||||
|
||||
try
|
||||
#XXX dummy SQL. use for testing
|
||||
# sql = "SELECT w.wine_name FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.\"food_name\" = 'lamb';"
|
||||
# sql = " SELECT w.wine_name FROM wine w JOIN food f ON f.food_name = 'lamb' JOIN wine_food wf ON w.wine_id = wf.wine_id AND f.food_id = wf.food_id GROUP BY w.wine_name ORDER BY COUNT(DISTINCT w.wine_id) DESC;"
|
||||
# sql = " SELECT COUNT(DISTINCT wf.wine_id) FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.food_name ILIKE '%lamb%'"
|
||||
|
||||
#XXX use for package testing, remove when done
|
||||
# ans = "1.schilfwein zweigelt 2.cabernet sauvignon reserve limited edition"
|
||||
# ans = "There are 1500 wines that can be paired with lamb."
|
||||
# ans = "1500"
|
||||
# return (response=ans, errormsg=nothing, reward=1, isterminal=true)
|
||||
|
||||
# add LIMIT to the SQL to prevent loading large data
|
||||
sql = strip(sql)
|
||||
if sql[end] == ';'
|
||||
if !occursin("LIMIT", sql)
|
||||
# sql = sql[1:end-1] * " LIMIT 100;"
|
||||
sql = sql[1:end-1] * " ORDER BY RANDOM() LIMIT 2;"
|
||||
end
|
||||
else
|
||||
error("Error, SQL execution failed because it does not ended with ';'")
|
||||
end
|
||||
println("--> SQL ", @__FILE__, " ", @__LINE__)
|
||||
println(sql)
|
||||
|
||||
result = executeSQL(sql)
|
||||
df = DataFrame(result)
|
||||
|
||||
tablesize = size(df)
|
||||
row, column = tablesize
|
||||
if row == 0 # if 0 row
|
||||
error("The resulting table has 0 row. Possible causes: 1) You might be searching in the wrong place 2) There could be a typo in your search query 3) No data matches your search criteria.")
|
||||
elseif column > 30
|
||||
error("SQL execution failed. An unexpected error occurred. Please try again.")
|
||||
end
|
||||
|
||||
df1 =
|
||||
if row > 2
|
||||
# ramdom row to pick
|
||||
df[sample(1:nrow(df), 2, replace=false), :] # random select 2 rows from df
|
||||
else
|
||||
df
|
||||
end
|
||||
|
||||
println("--> SQLexecution() ", @__FILE__, " ", @__LINE__)
|
||||
println(df1)
|
||||
return (result=df1, success=true, errormsg=nothing, reward=1, isterminal=true)
|
||||
catch e
|
||||
io = IOBuffer()
|
||||
showerror(io, e)
|
||||
errorMsg = String(take!(io))
|
||||
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
|
||||
println(errorMsg)
|
||||
return (result=nothing, success=false, errormsg=errorMsg, reward=0, isterminal=false)
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
""" Extract content from a dataframe with LLM.
|
||||
|
||||
# Arguments
|
||||
- `df::DataFrame`
|
||||
A dataframe to be read.
|
||||
- `context::Dict`
|
||||
A dictionary to give LLM more context
|
||||
- `text2textInstructLLM::Function`
|
||||
A function that handles communication to LLM service
|
||||
|
||||
# Return
|
||||
- `result::String`
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function extractContent_dataframe(df::DataFrame, context::Dict, text2textInstructLLM::Function
|
||||
)::String
|
||||
|
||||
tablesize = size(df)
|
||||
row = tablesize[1]
|
||||
column = tablesize[2]
|
||||
|
||||
#[PENDING] Since selected column depend on the question, there should be a better way to select column on the fly, not hard coded like this.
|
||||
df1 =
|
||||
if column > 10 # assuming if columns > 10, agent is getting wine info but the info is too much
|
||||
selectedcolumn = ["wine_id",
|
||||
"wine_name",
|
||||
"brand",
|
||||
"manufacturer",
|
||||
"region",
|
||||
"country",
|
||||
"wine_type",
|
||||
"grape_variety",
|
||||
"serving_temperature",
|
||||
"intensity",
|
||||
"sweetness",
|
||||
"tannin",
|
||||
"acidity",
|
||||
"fizziness",
|
||||
"tasting_notes"]
|
||||
df1 = df[:, selectedcolumn]
|
||||
else
|
||||
df
|
||||
end
|
||||
|
||||
dfstr = dfToString(df1)
|
||||
# println("--> df string")
|
||||
# println(dfstr)
|
||||
|
||||
systemmsg =
|
||||
"""
|
||||
You are an assistant that readouts the resulting table after the user executing SQL command.
|
||||
|
||||
At each round of conversation, the user will give you:
|
||||
- User intention: ...
|
||||
- Resulting table dimension: ...
|
||||
- Resulting table: The resulting table after executing the user's intention.
|
||||
|
||||
You should then respond to the user with:
|
||||
- About_resulting_table:
|
||||
1) What is the resulting table represent?
|
||||
- Search_summary:
|
||||
1) Summarize the table's content based on the user intension in verbal English.
|
||||
Here are some example:
|
||||
Bad example (you are not Summarize the table content): there are 2 columns in the table i.e. "cash" and "number".
|
||||
2) Do not generate additional text.
|
||||
|
||||
You should only respond in format as described below:
|
||||
About_resulting_table: ...
|
||||
Search_summary: ...
|
||||
|
||||
Let's begin!
|
||||
"""
|
||||
|
||||
usermsg =
|
||||
"""
|
||||
User intention: $(context[:userintention])
|
||||
Resulting table: $dfstr
|
||||
"""
|
||||
|
||||
_prompt =
|
||||
[
|
||||
Dict(:name=> "system", :text=> systemmsg),
|
||||
Dict(:name=> "user", :text=> usermsg)
|
||||
]
|
||||
|
||||
# put in model format
|
||||
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
|
||||
prompt *=
|
||||
"""
|
||||
<|start_header_id|>assistant<|end_header_id|>
|
||||
"""
|
||||
|
||||
for i in 1:5
|
||||
response = text2textInstructLLM(prompt)
|
||||
responsedict = GeneralUtils.textToDict(response, ["About_resulting_table", "Search_summary"],
|
||||
rightmarker=":", symbolkey=true)
|
||||
|
||||
# result = dfstr
|
||||
result =
|
||||
"""
|
||||
Summary: $(responsedict[:Search_summary])
|
||||
More details: $dfstr
|
||||
"""
|
||||
|
||||
if row > 2
|
||||
result *= "There are many more rows, but they are truncated because there are too many of them."
|
||||
end
|
||||
|
||||
println("--> extractContent_dataframe() ", @__FILE__, " ", @__LINE__)
|
||||
println(result)
|
||||
|
||||
return result
|
||||
end
|
||||
error("Failed to get Code part.")
|
||||
end
|
||||
|
||||
|
||||
""" Extract a database's table name that mentioned in SQL
|
||||
|
||||
# Arguments
|
||||
- `sql<:AbstractString`
|
||||
SQL command
|
||||
- `text2textInstructLLM::Function`
|
||||
A function that handles communication to LLM service
|
||||
|
||||
# Return
|
||||
- `tablename::Vector{String}`
|
||||
A list of table name
|
||||
|
||||
# Example
|
||||
```jldoctest
|
||||
julia> using SQLLLM, UUIDs, GeneralUtils
|
||||
julia> sql = "Get all rows from the \"food\" table where the description contains the word \"lamb\". Then, join this result with the \"wine_food\" table on the \"food_id\" column to get a list of wines that can be paired with lamb. Finally, group the result by the \"wine_id\" column and count the number of unique wines."
|
||||
julia> function text2textInstructLLM(prompt::String)
|
||||
config = Dict(
|
||||
:mqttServerInfo => Dict(
|
||||
:description => "mqtt server info",
|
||||
:port => 1883,
|
||||
:broker => "mqtt.yiem.cc"
|
||||
),
|
||||
:externalservice => Dict(
|
||||
:text2textinstruct => Dict(
|
||||
:mqtttopic => "/loadbalancer/requestingservice",
|
||||
:description => "text to text service with instruct LLM",
|
||||
:llminfo => Dict(:name => "llama3instruct")
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
# apply LLM specific instruct format
|
||||
externalService = config[:externalservice][:text2textinstruct]
|
||||
|
||||
msgMeta = GeneralUtils.generate_msgMeta(
|
||||
externalService[:mqtttopic],
|
||||
senderName= "SQLLLM",
|
||||
senderId= string(uuid4()),
|
||||
receiverName= "text2textinstruct",
|
||||
mqttBroker= config[:mqttServerInfo][:broker],
|
||||
mqttBrokerPort= config[:mqttServerInfo][:port],
|
||||
)
|
||||
|
||||
outgoingMsg = Dict(
|
||||
:msgMeta=> msgMeta,
|
||||
:payload=> Dict(
|
||||
:text=> prompt,
|
||||
:kwargs=> Dict(
|
||||
:max_tokens=> 512,
|
||||
:stop=> ["<|eot_id|>"],
|
||||
:temperature=> 0.2,
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
_response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
|
||||
response = _response[:response][:text]
|
||||
|
||||
return response
|
||||
end
|
||||
julia> result = SQLLLM.getTableNameFromSQL(sql, text2textInstructLLM)
|
||||
```
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function getTableNameFromSQL(sql::T, text2textInstructLLM::Function)::Vector{String} where {T<:AbstractString}
|
||||
systemmsg =
|
||||
"""
|
||||
Extract table name out of the user query.
|
||||
|
||||
At each round of conversation, the user will give you:
|
||||
Query: ...
|
||||
|
||||
You should then respond to the user with:
|
||||
- table_name: a list of table name that the user mentioned in the query.
|
||||
For example, ["color", "type"]
|
||||
|
||||
You must only respond in format as described below:
|
||||
table_name: ["...", "...", ...]
|
||||
|
||||
Let's begin!
|
||||
"""
|
||||
|
||||
usermsg =
|
||||
"""
|
||||
Query: $sql
|
||||
"""
|
||||
|
||||
_prompt =
|
||||
[
|
||||
Dict(:name=> "system", :text=> systemmsg),
|
||||
Dict(:name=> "user", :text=> usermsg)
|
||||
]
|
||||
|
||||
# put in model format
|
||||
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
|
||||
prompt *=
|
||||
"""
|
||||
<|start_header_id|>assistant<|end_header_id|>
|
||||
"""
|
||||
|
||||
for attempt in 1:5
|
||||
try
|
||||
response = text2textInstructLLM(prompt)
|
||||
responsedict = GeneralUtils.textToDict(response,
|
||||
["table_name"],
|
||||
rightmarker=":", symbolkey=true)
|
||||
response = copy(JSON3.read(responsedict[:table_name]))
|
||||
|
||||
return response
|
||||
catch e
|
||||
io = IOBuffer()
|
||||
showerror(io, e)
|
||||
errorMsg = String(take!(io))
|
||||
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
|
||||
println("")
|
||||
println("Attempt $attempt. Error occurred: $errorMsg\n$st")
|
||||
println("")
|
||||
end
|
||||
end
|
||||
error("getTableNameFromSQL failed to generate a thought")
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
end # module llmfunction
|
||||
81
src/type.jl
Normal file
81
src/type.jl
Normal file
@@ -0,0 +1,81 @@
|
||||
module type
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
end # module type
|
||||
116
src/util.jl
Normal file
116
src/util.jl
Normal file
@@ -0,0 +1,116 @@
|
||||
module util
|
||||
|
||||
export getDataFrameValue, dfRowtoString, dfToString
|
||||
|
||||
using DataFrames
|
||||
|
||||
|
||||
""" get a value from a dataframe row by a given key
|
||||
"""
|
||||
getDataFrameValue(row::DataFrameRow, key::Symbol) = row.:($key)
|
||||
|
||||
|
||||
""" convert df row into key:value string
|
||||
"""
|
||||
function dfRowtoString(row::DataFrameRow)::String
|
||||
str = ""
|
||||
for key in keys(row)
|
||||
value = getDataFrameValue(row, key)
|
||||
str *= "$key: $value, "
|
||||
end
|
||||
result = str[1:end-2] # remove ", " at the end of row
|
||||
return result
|
||||
end
|
||||
|
||||
|
||||
""" convert df table into string
|
||||
"""
|
||||
function dfToString(df::DataFrame)
|
||||
dfstr = ""
|
||||
for (i, row) in enumerate(eachrow(df))
|
||||
rowstr = dfRowtoString(row)
|
||||
dfstr *= "$i) $rowstr\n"
|
||||
end
|
||||
return dfstr
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
end # module util
|
||||
Reference in New Issue
Block a user