958 lines
28 KiB
Julia
958 lines
28 KiB
Julia
module llmfunction
|
|
|
|
export listAllTable_json, listAllTable_str, tableinfo, getdata, finalAnswerBox,
|
|
getTableNameFromSQL, extractContent_dataframe, SQLexecution
|
|
|
|
using HTTP, JSON3, URIs, Random, PrettyPrinting, UUIDs, LibPQ, Tables, DataFrames, CSV,
|
|
DataStructures, StatsBase
|
|
using GeneralUtils, LLMMCTS
|
|
using ..util
|
|
|
|
# ---------------------------------------------- 100 --------------------------------------------- #
|
|
|
|
|
|
""" List all tables in the database and return in JSON format.
|
|
|
|
# Arguments
|
|
- `executeSQL::Function`
|
|
A connection object to Postgres database
|
|
|
|
# Return
|
|
- `NamedTuple{(:result, :success), Tuple{DataFrame, Bool}}`
|
|
|
|
# Example
|
|
```jldoctest
|
|
julia> using LibPQ, SQLLLM
|
|
julia> function executeSQL(sql)
|
|
DBconnection = LibPQ.Connection("host=192.168.88.122 port=5432 dbname=xyz user=zyx password=1234")
|
|
result = LibPQ.execute(DBconnection, sql)
|
|
close(DBconnection)
|
|
return result
|
|
end
|
|
julia> response = SQLLLM.listAllTable_json(executeSQL)
|
|
julia> result = response[:result]
|
|
```
|
|
|
|
# Signature
|
|
"""
|
|
function listAllTable_json(executeSQL::Function
|
|
)::NamedTuple{(:result, :success), Tuple{DataFrame, Bool}}
|
|
|
|
sql =
|
|
"""
|
|
SELECT
|
|
table_name,
|
|
obj_description(relfilenode, 'pg_class') AS table_comment,
|
|
string_agg(column_name || ' (' || data_type || ')', ', ') AS columns
|
|
FROM
|
|
information_schema.columns
|
|
JOIN
|
|
pg_class ON table_name = relname
|
|
WHERE
|
|
table_schema = 'public'
|
|
GROUP BY
|
|
table_name, relfilenode
|
|
ORDER BY
|
|
table_name;
|
|
"""
|
|
|
|
result = executeSQL(sql)
|
|
df = DataFrame(result)
|
|
tablesinfo_df = df
|
|
|
|
return (result=tablesinfo_df, success=true)
|
|
end
|
|
|
|
|
|
function listAllTable_str(executeSQL::Function
|
|
)::NamedTuple{(:result, :success), Tuple{String, Bool}}
|
|
sql =
|
|
"""
|
|
SELECT
|
|
table_name,
|
|
obj_description(relfilenode, 'pg_class') AS table_comment,
|
|
string_agg(column_name || ' (' || data_type || ')', ', ') AS columns
|
|
FROM
|
|
information_schema.columns
|
|
JOIN
|
|
pg_class ON table_name = relname
|
|
WHERE
|
|
table_schema = 'public'
|
|
GROUP BY
|
|
table_name, relfilenode
|
|
ORDER BY
|
|
table_name;
|
|
"""
|
|
result = executeSQL(sql)
|
|
df = DataFrame(result)
|
|
tableinfo = "Here are a list of available tables in the database (each row is in this format: table name; table comment; table columns): \n"
|
|
for i in 1:size(df)[1]
|
|
table_name = df[i, 1]
|
|
table_comment = df[i, 2]
|
|
columns = df[i, 3]
|
|
tableinfo *= "$i. $table_name; $table_comment; $columns\n"
|
|
end
|
|
return (result=tableinfo, success=true)
|
|
end
|
|
|
|
|
|
""" Get table description, column comments and the first 3-rows of the table data
|
|
|
|
# Arguments
|
|
- `executeSQL::Function`
|
|
A connection object to Postgres database
|
|
|
|
# Return
|
|
- `tableinfo::String`
|
|
|
|
# Signature
|
|
"""
|
|
|
|
|
|
function tableinfo_str(executeSQL::Function, tablename::String)::NamedTuple{(:result, :success), Tuple{String, Bool}}
|
|
|
|
sql =
|
|
"""
|
|
SELECT
|
|
column_name,
|
|
data_type,
|
|
col_description(format('%s.%s', table_schema, table_name)::regclass::oid, ordinal_position) AS column_comment
|
|
FROM
|
|
information_schema.columns
|
|
WHERE
|
|
table_name = '$tablename'
|
|
AND table_schema = 'public';
|
|
"""
|
|
|
|
result = executeSQL(sql)
|
|
df = DataFrame(result)
|
|
|
|
tableinfo = "Here are info of table $tablename (each row is in this format: column name; data type; column comment):\n"
|
|
for i in 1:size(df)[1]
|
|
column_name = df[i, 1]
|
|
column_datatype = df[i, 2]
|
|
column_comment = df[i, 3]
|
|
tableinfo *= "$i. $column_name; $column_datatype; $column_comment \n"
|
|
end
|
|
|
|
return (result=tableinfo, success=true)
|
|
end
|
|
|
|
|
|
""" Get table description, column comments.
|
|
|
|
# Arguments
|
|
- `executeSQL::Function`
|
|
A connection object to Postgres database
|
|
- `tablenames<:AbstractVector`
|
|
A list of table name to get description
|
|
|
|
# Return
|
|
- `NamedTuple{(:result), Tuple{String}}`
|
|
Text contain multiple table info
|
|
|
|
# Example
|
|
```jldoctest
|
|
julia> using SQLLLM, LibPQ
|
|
julia> function executeSQL(sql)
|
|
DBconnection = LibPQ.Connection("host=192.168.88.122 port=5432 dbname=xyz user=zyx password=1234")
|
|
result = LibPQ.execute(DBconnection, sql)
|
|
close(DBconnection)
|
|
return result
|
|
end
|
|
julia> response = SQLLLM.tableinfo(executeSQL, ["wine", "food"])
|
|
julia> result = response[:result]
|
|
```
|
|
|
|
# Signature
|
|
"""
|
|
function tableinfo(executeSQL::Function, tablenames::T
|
|
)::NamedTuple{(:result,), Tuple{String}} where {T<:AbstractVector}
|
|
# list all tables in a database
|
|
sql =
|
|
"""
|
|
SELECT pg_namespace.nspname AS schema_name,
|
|
relname AS table_name,
|
|
pg_catalog.obj_description(pg_class.oid) AS comment
|
|
FROM pg_class
|
|
INNER JOIN pg_namespace ON pg_namespace.oid = pg_class.relnamespace
|
|
WHERE pg_namespace.nspname = 'public' -- Replace 'public' with your desired schema
|
|
AND pg_class.relkind IN ('r', 't');
|
|
"""
|
|
|
|
_result = executeSQL(sql)
|
|
df = DataFrame(_result)
|
|
alltable_df = df[:, [:table_name, :comment]]
|
|
tableNameList = alltable_df.table_name |> collect
|
|
|
|
# check if the requested table name exist in the database
|
|
notExistingTable = []
|
|
for i in tablenames
|
|
if i ∉ tableNameList
|
|
push!(notExistingTable, i)
|
|
end
|
|
end
|
|
if !isempty(notExistingTable)
|
|
result =
|
|
"Error, the following tables does not exist in the database: $(JSON3.write(notExistingTable))"
|
|
return (result=result,)
|
|
end
|
|
|
|
tableInfoStr = ""
|
|
for i in tablenames
|
|
x, _ = tableinfo_str(executeSQL, i)
|
|
tableInfoStr *= x
|
|
end
|
|
|
|
return (result=tableInfoStr,)
|
|
end
|
|
|
|
|
|
|
|
|
|
""" Convert a query process in English into SQL, execute and get the result from the database.
|
|
|
|
# Arguments
|
|
- `query<:AbstractString`
|
|
A query to a database in SQL.
|
|
- `context::Union{Dict, Nothing}`
|
|
A context to be available at transition()
|
|
- `executeSQL::Function`
|
|
A connection object connected to the database
|
|
- `text2textInstructLLM::Function`
|
|
A function that handles communication to LLM service.
|
|
|
|
# Return
|
|
- `NamedTuple{(:result, :errormsg, success), Tuple{String, String, Bool}}`
|
|
|
|
# Signature
|
|
"""
|
|
function getdata(query::T, context::Union{Dict, Nothing}, executeSQL::Function,
|
|
text2textInstructLLM::Function;
|
|
)::NamedTuple{(:result, :errormsg, :success), Tuple{String, Union{String, Nothing}, Bool}} where {T<:AbstractString}
|
|
|
|
# get table info here because it'll be called only 1-time. If this function is in
|
|
# getdata_decisionMaker(), it'll be called everytime
|
|
mentionedtable = getTableNameFromSQL(query, text2textInstructLLM)
|
|
mentionedTableInfo = tableinfo(executeSQL, mentionedtable)[:result]
|
|
context[:mentionedTableInfo] = mentionedTableInfo
|
|
|
|
initialstate = Dict{Symbol, Any}(
|
|
:reward=> 0,
|
|
:isterminal=> false,
|
|
:evaluation=> nothing,
|
|
:errormsg=> nothing,
|
|
:errorexplain=> nothing,
|
|
|
|
:question=> query,
|
|
:code=> nothing,
|
|
:response=> nothing,
|
|
)
|
|
|
|
transitionargs = (
|
|
# decisionMaker=getdata_decisionMaker,
|
|
# evaluator=getdata_evaluator,
|
|
# reflector=getdata_reflector,
|
|
context=context,
|
|
executeSQL=executeSQL,
|
|
text2textInstructLLM=text2textInstructLLM
|
|
)
|
|
result_1, result_2 = LLMMCTS.runMCTS(initialstate, getdata_transition, transitionargs;
|
|
totalsample=1, maxdepth=3, maxiterations=1, explorationweight=1.0)
|
|
|
|
if result_2[:isterminal] == true
|
|
return (result=result_2[:response], errormsg=nothing, success=true) # succues=true to finish getdata()
|
|
else
|
|
# return (response="Failed to act with the following error message: $(result_2[:errorexplain])", select=nothing, reward=0, success=false)
|
|
return (result="Failed to get the data. $(result_1[:errormsg])",
|
|
errormsg=result_1[:errormsg], success=false)
|
|
end
|
|
end
|
|
|
|
|
|
"""
|
|
|
|
# Arguments
|
|
`v::Integer`
|
|
dummy variable
|
|
|
|
# Return
|
|
|
|
# Example
|
|
```jldoctest
|
|
julia>
|
|
```
|
|
|
|
# TODO
|
|
- [] update docstring
|
|
- [PENDING] implement the function
|
|
|
|
# Signature
|
|
"""
|
|
function getdata_evaluator(newstate, config)
|
|
|
|
return (evaluation="None", score=0)
|
|
end
|
|
|
|
|
|
""" State transition
|
|
|
|
# Arguments
|
|
- `state<:AbstractDict`
|
|
A game state
|
|
- `args::NamedTuple`
|
|
Arguments for various function within transition()
|
|
|
|
# Return
|
|
- `NamedTuple{(:newNodeKey, :newstate, :progressvalue), Tuple{String, T, Integer}}`
|
|
|
|
# Signature
|
|
"""
|
|
function getdata_transition(state::T, args::NamedTuple
|
|
)::NamedTuple{(:newNodeKey, :newstate, :progressvalue), Tuple{String, T, Integer}} where {T<:AbstractDict}
|
|
|
|
|
|
# decisionMaker::Function = args[:decisionMaker]
|
|
# evaluator::Function = args[:evaluator]
|
|
# reflector::Function = args[:reflector]
|
|
context = args[:context]
|
|
executeSQL::Function = args[:executeSQL]
|
|
text2textInstructLLM::Function = args[:text2textInstructLLM]
|
|
|
|
thought, sql =
|
|
if state[:code] !== nothing
|
|
result = getdata_decisionMaker(state, context, text2textInstructLLM)
|
|
result[:thought], result[:code]
|
|
else
|
|
nothing, state[:question]
|
|
end
|
|
|
|
# make new state
|
|
newNodeKey = GeneralUtils.uuid4snakecase()
|
|
newstate = deepcopy(state)
|
|
|
|
response, success, errormsg, reward, isterminal =
|
|
if sql !== nothing
|
|
response, success, errormsg, reward, isterminal = SQLexecution(executeSQL, sql)
|
|
else
|
|
(result= nothing,
|
|
success= false,
|
|
errormsg= "SQL execution failed. An unexpected error occurred. Please try again.",
|
|
reward=0,
|
|
isterminal=false)
|
|
end
|
|
|
|
newstate[:code] = sql
|
|
newstate[:response] = response
|
|
newstate[:errorexplain] = thought
|
|
newstate[:errormsg] = errormsg
|
|
newstate[:reward] = reward
|
|
newstate[:isterminal] = isterminal
|
|
if response !== nothing
|
|
extracted = extractContent_dataframe(response, context, text2textInstructLLM)
|
|
newstate[:response] = extracted
|
|
end
|
|
|
|
stateevaluation = "None"
|
|
progressvalue = 0
|
|
|
|
return (newNodeKey=newNodeKey, newstate=newstate, progressvalue=progressvalue)
|
|
end
|
|
|
|
|
|
""" Make a decision using LLM
|
|
|
|
# Arguments
|
|
- `state::Dict`
|
|
A game state
|
|
- `context::Dict`
|
|
Additional context for LLM to use
|
|
- `text2textInstructLLM::Function`
|
|
A function to handles communication to LLM
|
|
|
|
# Return
|
|
- `NamedTuple{(:thought, :code, :success, :errormsg), Tuple{String, String, Bool, Union{String, Nothing}}}`
|
|
|
|
# Signature
|
|
"""
|
|
function getdata_decisionMaker(state::Dict, context::Dict, text2textInstructLLM::Function
|
|
)::NamedTuple{(:thought, :code, :success, :errormsg), Tuple{Union{String, Nothing}, Union{String, Nothing}, Bool, Union{String, Nothing}}}
|
|
|
|
Hints = "None"
|
|
|
|
# """
|
|
# Here are some useful SQL programs:
|
|
# $usefulSQL
|
|
# """
|
|
|
|
# systemmsg =
|
|
# """
|
|
# You are an assistant helping the user to execute SQL code from the user's query.
|
|
|
|
# At each round of conversation, the user will give you:
|
|
# Context: ...
|
|
# User intention: ...
|
|
# Code executed from the last round: ...
|
|
# Execution error: execution error of the last round code.
|
|
|
|
# You should consider the following guidelines:
|
|
# - Text information in the database is sometimes stored in lower case. If your search returns empty, try using lower case to search.
|
|
|
|
# You should then respond to the user with:
|
|
# - thought: Why the code does not complete the task. What does the execution error imply exactly?
|
|
# - plan: Step-by-step instructions of how to complete the task.
|
|
# 1) Focus on improving the code from the last round.
|
|
# 2) Do not create any table in the database.
|
|
# - code:
|
|
# 1) Write new improved code.
|
|
# 2) Do not wrap the code and no comment as it will be executed directly without any modification against the database.
|
|
|
|
# You should only respond in format as described below and nothing more:
|
|
# thought: ...
|
|
# plan:
|
|
# 1) ...
|
|
# 2) ...
|
|
# ...
|
|
# code: ...
|
|
|
|
# Let's begin!
|
|
# """
|
|
|
|
systemmsg =
|
|
"""
|
|
You are an assistant helping the user to execute SQL code from the user's query.
|
|
|
|
At each round of conversation, the user will give you:
|
|
Context: ...
|
|
User intention: ...
|
|
Code executed from the last round: ...
|
|
Execution error: execution error of the last round code.
|
|
|
|
You should consider the following guidelines:
|
|
- Text information in the database is sometimes stored in lower case. If your search returns empty, try using lower case to search.
|
|
|
|
You should then respond to the user with:
|
|
1) Understanding:
|
|
- State your understanding about the current situation.
|
|
2) Reasoning:
|
|
- State your step by step reasoning about the current situation.
|
|
3) Plan: Step-by-step instructions of how to complete the task.
|
|
- Focus on improving the code from the last round.
|
|
- Do not create any table in the database.
|
|
4) Code:
|
|
- Write new improved code.
|
|
- Do not wrap the code and no comment as it will be executed directly without any modification against the database.
|
|
|
|
You should only respond in format as described below and nothing more:
|
|
Understanding: ...
|
|
Reasoning: ...
|
|
Plan:
|
|
1) ...
|
|
2) ...
|
|
...
|
|
Code: ...
|
|
|
|
Let's begin!
|
|
"""
|
|
|
|
noise = ""
|
|
note_flag = ""
|
|
for attempt in 1:10
|
|
usermsg =
|
|
"""
|
|
Context:
|
|
$(context[:mentionedTableInfo])
|
|
User intention: $(context[:userintention])
|
|
Code executed from the last round: $(state[:code])
|
|
Execution error: $(state[:errormsg])
|
|
$noise
|
|
$note_flag
|
|
"""
|
|
|
|
_prompt =
|
|
[
|
|
Dict(:name=> "system", :text=> systemmsg),
|
|
Dict(:name=> "user", :text=> usermsg)
|
|
]
|
|
|
|
# put in model format
|
|
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
|
|
prompt *=
|
|
"""
|
|
<|start_header_id|>assistant<|end_header_id|>
|
|
"""
|
|
try
|
|
response = text2textInstructLLM(prompt)
|
|
responsedict = GeneralUtils.textToDict(response,
|
|
["Understanding", "Reasoning", "Plan", "Code"];
|
|
rightmarker=":", symbolkey=true, lowercasekey=true)
|
|
_code = responsedict[:code]
|
|
code = strip(_code)
|
|
|
|
if length(code) < 2
|
|
error("No code available.")
|
|
elseif code == state[:code]
|
|
error("generated code is the same as earlier.")
|
|
else
|
|
end
|
|
|
|
# check code
|
|
if occursin("CREATE TABLE", code)
|
|
note_flag = "Note: Create new table is not allowed."
|
|
error("create table is not allowed")
|
|
elseif occursin("```", code)
|
|
error("Note: code contains backtick ` which is not allowed")
|
|
elseif code[end] != ';'
|
|
error("SQL does not ending with ';'")
|
|
elseif count(';', code) > 1
|
|
error("Multiple SQL statement are not allowed")
|
|
else
|
|
end
|
|
|
|
println("--> getdata_decisionMaker() ", @__FILE__, " ", @__LINE__)
|
|
pprintln(Dict(responsedict))
|
|
return (thought=responsedict[:reasoning], code=code, success=true, errormsg=nothing)
|
|
catch e
|
|
io = IOBuffer()
|
|
showerror(io, e)
|
|
errorMsg = String(take!(io))
|
|
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
|
|
print("Attempt $attempt. Error occurred: $errorMsg\n$st")
|
|
println("")
|
|
noise = GeneralUtils.randstrings(3, 5)
|
|
end
|
|
end
|
|
return (thought=nothing, code=nothing, success=false,
|
|
errormsg="Failed to generate SQL after numerous attempts.")
|
|
end
|
|
|
|
""" Execute a given SQL.
|
|
|
|
# Arguments
|
|
- `sql::T<:AbstractString`
|
|
A SQL command
|
|
- `executeSQL::Function`
|
|
A connection object to a database
|
|
|
|
# Return
|
|
- `NamedTuple{(:result, :errormsg, :reward, :isterminal), Tuple{Union{Nothing, DataFrame}, String, Integer, Bool}}`
|
|
|
|
# Example
|
|
```jldoctest
|
|
julia> using LibPQ, SQLLLM
|
|
julia> function executeSQL(sql)
|
|
DBconnection = LibPQ.Connection("host=192.168.88.122 port=5432 dbname=xyz user=zyx password=1234")
|
|
result = LibPQ.execute(DBconnection, sql)
|
|
close(DBconnection)
|
|
return result
|
|
end
|
|
julia> response = SQLLLM.SQLexecution(executeSQL, sql)
|
|
```
|
|
|
|
# Signature
|
|
"""
|
|
# function SQLexecution(executeSQL::Function, sql::T
|
|
# )::NamedTuple{(:result, :success, :errormsg, :reward, :isterminal), Tuple{Union{DataFrame, Nothing}, Bool, Union{String, Nothing}, Integer, Bool}} where {T<:AbstractString}
|
|
# println("--> 1-01 ", @__FILE__, " ", @__LINE__)
|
|
# #XXX dummy SQL. use for testing
|
|
# # sql = "SELECT w.wine_name FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.\"food_name\" = 'lamb';"
|
|
# # sql = " SELECT w.wine_name FROM wine w JOIN food f ON f.food_name = 'lamb' JOIN wine_food wf ON w.wine_id = wf.wine_id AND f.food_id = wf.food_id GROUP BY w.wine_name ORDER BY COUNT(DISTINCT w.wine_id) DESC;"
|
|
# # sql = " SELECT COUNT(DISTINCT wf.wine_id) FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.food_name ILIKE '%lamb%'"
|
|
|
|
# #XXX use for package testing, remove when done
|
|
# # ans = "1.schilfwein zweigelt 2.cabernet sauvignon reserve limited edition"
|
|
# # ans = "There are 1500 wines that can be paired with lamb."
|
|
# # ans = "1500"
|
|
# # return (response=ans, errormsg=nothing, reward=1, isterminal=true)
|
|
|
|
# # add LIMIT to the SQL to prevent loading large data
|
|
# sql = strip(sql)
|
|
# println("--> SQL 1", @__FILE__, " ", @__LINE__)
|
|
# println(sql)
|
|
# println("--> 1-02 ", @__FILE__, " ", @__LINE__)
|
|
|
|
# if sql[end] != ';'
|
|
# errorMsg = "Error, SQL execution failed because it does not ended with ';'"
|
|
# return (result=nothing, success=false, errormsg=errorMsg, reward=0, isterminal=false)
|
|
# end
|
|
# println("--> 1-03 ", @__FILE__, " ", @__LINE__)
|
|
# if !occursin("LIMIT", sql)
|
|
# # sql = sql[1:end-1] * " LIMIT 100;"
|
|
# sql = sql[1:end-1] * " ORDER BY RANDOM() LIMIT 2;"
|
|
# end
|
|
|
|
# println("--> SQL 2", @__FILE__, " ", @__LINE__)
|
|
# println(sql)
|
|
# println("--> 1-1 ", @__FILE__, " ", @__LINE__)
|
|
# result = executeSQL(sql)
|
|
# println("--> 1-2 ", @__FILE__, " ", @__LINE__)
|
|
# df = DataFrame(result)
|
|
# println("--> raw df ", df)
|
|
# tablesize = size(df)
|
|
# println("--> df size ", tablesize)
|
|
# println("--> 6 ", @__FILE__, " ", @__LINE__)
|
|
# row = tablesize[1]
|
|
# println("--> 7 ", @__FILE__, " ", @__LINE__)
|
|
# if row == 0 # if 0 row
|
|
# errorMsg = "The resulting table has 0 row. Possible causes: 1) SQL is incorrect 2) There is no data that match your search criteria."
|
|
# return (result=nothing, success=false, errormsg=errorMsg, reward=0, isterminal=false)
|
|
# end
|
|
# println("--> 8 ", @__FILE__, " ", @__LINE__)
|
|
# df1 =
|
|
# if row > 2
|
|
# # ramdom row to pick
|
|
# df[sample(1:nrow(df), 2, replace=false), :] # random select 2 rows from df
|
|
# else
|
|
# df
|
|
# end
|
|
|
|
# println("--> SQLexecution result ", @__FILE__, " ", @__LINE__)
|
|
# println(df1)
|
|
# return (result=df1, success=true, errormsg=nothing, reward=1, isterminal=true)
|
|
# end
|
|
function SQLexecution(executeSQL::Function, sql::T
|
|
)::NamedTuple{(:result, :success, :errormsg, :reward, :isterminal), Tuple{Union{DataFrame, Nothing}, Bool, Union{String, Nothing}, Integer, Bool}} where {T<:AbstractString}
|
|
|
|
try
|
|
#XXX dummy SQL. use for testing
|
|
# sql = "SELECT w.wine_name FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.\"food_name\" = 'lamb';"
|
|
# sql = " SELECT w.wine_name FROM wine w JOIN food f ON f.food_name = 'lamb' JOIN wine_food wf ON w.wine_id = wf.wine_id AND f.food_id = wf.food_id GROUP BY w.wine_name ORDER BY COUNT(DISTINCT w.wine_id) DESC;"
|
|
# sql = " SELECT COUNT(DISTINCT wf.wine_id) FROM wine w JOIN wine_food wf ON w.wine_id = wf.wine_id JOIN food f ON wf.food_id = f.food_id WHERE f.food_name ILIKE '%lamb%'"
|
|
|
|
#XXX use for package testing, remove when done
|
|
# ans = "1.schilfwein zweigelt 2.cabernet sauvignon reserve limited edition"
|
|
# ans = "There are 1500 wines that can be paired with lamb."
|
|
# ans = "1500"
|
|
# return (response=ans, errormsg=nothing, reward=1, isterminal=true)
|
|
|
|
# add LIMIT to the SQL to prevent loading large data
|
|
sql = strip(sql)
|
|
if sql[end] == ';'
|
|
if !occursin("LIMIT", sql)
|
|
# sql = sql[1:end-1] * " LIMIT 100;"
|
|
sql = sql[1:end-1] * " ORDER BY RANDOM() LIMIT 2;"
|
|
end
|
|
else
|
|
error("Error, SQL execution failed because it does not ended with ';'")
|
|
end
|
|
println("--> SQL ", @__FILE__, " ", @__LINE__)
|
|
println(sql)
|
|
|
|
result = executeSQL(sql)
|
|
df = DataFrame(result)
|
|
|
|
tablesize = size(df)
|
|
row, column = tablesize
|
|
if row == 0 # if 0 row
|
|
error("The resulting table has 0 row. Possible causes: 1) You might be searching in the wrong place 2) There could be a typo in your search query 3) No data matches your search criteria.")
|
|
elseif column > 30
|
|
error("SQL execution failed. An unexpected error occurred. Please try again.")
|
|
end
|
|
|
|
df1 =
|
|
if row > 2
|
|
# ramdom row to pick
|
|
df[sample(1:nrow(df), 2, replace=false), :] # random select 2 rows from df
|
|
else
|
|
df
|
|
end
|
|
|
|
println("--> SQLexecution() ", @__FILE__, " ", @__LINE__)
|
|
println(df1)
|
|
return (result=df1, success=true, errormsg=nothing, reward=1, isterminal=true)
|
|
catch e
|
|
io = IOBuffer()
|
|
showerror(io, e)
|
|
errorMsg = String(take!(io))
|
|
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
|
|
println(errorMsg)
|
|
return (result=nothing, success=false, errormsg=errorMsg, reward=0, isterminal=false)
|
|
end
|
|
end
|
|
|
|
|
|
""" Extract content from a dataframe with LLM.
|
|
|
|
# Arguments
|
|
- `df::DataFrame`
|
|
A dataframe to be read.
|
|
- `context::Dict`
|
|
A dictionary to give LLM more context
|
|
- `text2textInstructLLM::Function`
|
|
A function that handles communication to LLM service
|
|
|
|
# Return
|
|
- `result::String`
|
|
|
|
# Signature
|
|
"""
|
|
function extractContent_dataframe(df::DataFrame, context::Dict, text2textInstructLLM::Function
|
|
)::String
|
|
|
|
tablesize = size(df)
|
|
row = tablesize[1]
|
|
column = tablesize[2]
|
|
|
|
#[PENDING] Since selected column depend on the question, there should be a better way to select column on the fly, not hard coded like this.
|
|
df1 =
|
|
if column > 10 # assuming if columns > 10, agent is getting wine info but the info is too much
|
|
selectedcolumn = ["wine_id",
|
|
"wine_name",
|
|
"brand",
|
|
"manufacturer",
|
|
"region",
|
|
"country",
|
|
"wine_type",
|
|
"grape_variety",
|
|
"serving_temperature",
|
|
"intensity",
|
|
"sweetness",
|
|
"tannin",
|
|
"acidity",
|
|
"fizziness",
|
|
"tasting_notes"]
|
|
df1 = df[:, selectedcolumn]
|
|
else
|
|
df
|
|
end
|
|
|
|
dfstr = dfToString(df1)
|
|
# println("--> df string")
|
|
# println(dfstr)
|
|
|
|
systemmsg =
|
|
"""
|
|
You are an assistant that readouts the resulting table after the user executing SQL command.
|
|
|
|
At each round of conversation, the user will give you:
|
|
- User intention: ...
|
|
- Resulting table dimension: ...
|
|
- Resulting table: The resulting table after executing the user's intention.
|
|
|
|
You should then respond to the user with:
|
|
- About_resulting_table:
|
|
1) What is the resulting table represent?
|
|
- Search_summary:
|
|
1) Summarize the table's content based on the user intension in verbal English.
|
|
Here are some example:
|
|
Bad example (you are not Summarize the table content): there are 2 columns in the table i.e. "cash" and "number".
|
|
2) Do not generate additional text.
|
|
|
|
You should only respond in format as described below:
|
|
About_resulting_table: ...
|
|
Search_summary: ...
|
|
|
|
Let's begin!
|
|
"""
|
|
|
|
usermsg =
|
|
"""
|
|
User intention: $(context[:userintention])
|
|
Resulting table: $dfstr
|
|
"""
|
|
|
|
_prompt =
|
|
[
|
|
Dict(:name=> "system", :text=> systemmsg),
|
|
Dict(:name=> "user", :text=> usermsg)
|
|
]
|
|
|
|
# put in model format
|
|
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
|
|
prompt *=
|
|
"""
|
|
<|start_header_id|>assistant<|end_header_id|>
|
|
"""
|
|
|
|
for i in 1:5
|
|
response = text2textInstructLLM(prompt)
|
|
responsedict = GeneralUtils.textToDict(response, ["About_resulting_table", "Search_summary"],
|
|
rightmarker=":", symbolkey=true)
|
|
|
|
# result = dfstr
|
|
result =
|
|
"""
|
|
Summary: $(responsedict[:Search_summary])
|
|
More details: $dfstr
|
|
"""
|
|
|
|
if row > 2
|
|
result *= "There are many more rows, but they are truncated because there are too many of them."
|
|
end
|
|
|
|
println("--> extractContent_dataframe() ", @__FILE__, " ", @__LINE__)
|
|
println(result)
|
|
|
|
return result
|
|
end
|
|
error("Failed to get Code part.")
|
|
end
|
|
|
|
|
|
""" Extract a database's table name that mentioned in SQL
|
|
|
|
# Arguments
|
|
- `sql<:AbstractString`
|
|
SQL command
|
|
- `text2textInstructLLM::Function`
|
|
A function that handles communication to LLM service
|
|
|
|
# Return
|
|
- `tablename::Vector{String}`
|
|
A list of table name
|
|
|
|
# Example
|
|
```jldoctest
|
|
julia> using SQLLLM, UUIDs, GeneralUtils
|
|
julia> sql = "Get all rows from the \"food\" table where the description contains the word \"lamb\". Then, join this result with the \"wine_food\" table on the \"food_id\" column to get a list of wines that can be paired with lamb. Finally, group the result by the \"wine_id\" column and count the number of unique wines."
|
|
julia> function text2textInstructLLM(prompt::String)
|
|
config = Dict(
|
|
:mqttServerInfo => Dict(
|
|
:description => "mqtt server info",
|
|
:port => 1883,
|
|
:broker => "mqtt.yiem.cc"
|
|
),
|
|
:externalservice => Dict(
|
|
:text2textinstruct => Dict(
|
|
:mqtttopic => "/loadbalancer/requestingservice",
|
|
:description => "text to text service with instruct LLM",
|
|
:llminfo => Dict(:name => "llama3instruct")
|
|
),
|
|
)
|
|
)
|
|
|
|
# apply LLM specific instruct format
|
|
externalService = config[:externalservice][:text2textinstruct]
|
|
|
|
msgMeta = GeneralUtils.generate_msgMeta(
|
|
externalService[:mqtttopic],
|
|
senderName= "SQLLLM",
|
|
senderId= string(uuid4()),
|
|
receiverName= "text2textinstruct",
|
|
mqttBroker= config[:mqttServerInfo][:broker],
|
|
mqttBrokerPort= config[:mqttServerInfo][:port],
|
|
)
|
|
|
|
outgoingMsg = Dict(
|
|
:msgMeta=> msgMeta,
|
|
:payload=> Dict(
|
|
:text=> prompt,
|
|
:kwargs=> Dict(
|
|
:max_tokens=> 512,
|
|
:stop=> ["<|eot_id|>"],
|
|
:temperature=> 0.2,
|
|
)
|
|
)
|
|
)
|
|
|
|
_response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
|
|
response = _response[:response][:text]
|
|
|
|
return response
|
|
end
|
|
julia> result = SQLLLM.getTableNameFromSQL(sql, text2textInstructLLM)
|
|
```
|
|
|
|
# Signature
|
|
"""
|
|
function getTableNameFromSQL(sql::T, text2textInstructLLM::Function)::Vector{String} where {T<:AbstractString}
|
|
systemmsg =
|
|
"""
|
|
Extract table name out of the user query.
|
|
|
|
At each round of conversation, the user will give you:
|
|
Query: ...
|
|
|
|
You should then respond to the user with:
|
|
- table_name: a list of table name that the user mentioned in the query.
|
|
For example, ["color", "type"]
|
|
|
|
You must only respond in format as described below:
|
|
table_name: ["...", "...", ...]
|
|
|
|
Let's begin!
|
|
"""
|
|
|
|
usermsg =
|
|
"""
|
|
Query: $sql
|
|
"""
|
|
|
|
_prompt =
|
|
[
|
|
Dict(:name=> "system", :text=> systemmsg),
|
|
Dict(:name=> "user", :text=> usermsg)
|
|
]
|
|
|
|
# put in model format
|
|
prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
|
|
prompt *=
|
|
"""
|
|
<|start_header_id|>assistant<|end_header_id|>
|
|
"""
|
|
|
|
for attempt in 1:5
|
|
try
|
|
response = text2textInstructLLM(prompt)
|
|
responsedict = GeneralUtils.textToDict(response,
|
|
["table_name"],
|
|
rightmarker=":", symbolkey=true)
|
|
response = copy(JSON3.read(responsedict[:table_name]))
|
|
|
|
return response
|
|
catch e
|
|
io = IOBuffer()
|
|
showerror(io, e)
|
|
errorMsg = String(take!(io))
|
|
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
|
|
println("")
|
|
println("Attempt $attempt. Error occurred: $errorMsg\n$st")
|
|
println("")
|
|
end
|
|
end
|
|
error("getTableNameFromSQL failed to generate a thought")
|
|
end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
end # module llmfunction |