This commit is contained in:
narawat lamaiin
2025-03-07 12:32:49 +07:00
parent 2f38f3cd0d
commit e9f9e431a9
15 changed files with 131 additions and 4083 deletions

View File

@@ -1,12 +1,29 @@
using Revise
using LibPQ, JSON3, PrettyPrinting, UUIDs, DataFrames, DataStructures, Base64
using LibPQ, Dates, JSON3, PrettyPrinting, UUIDs, DataFrames, DataStructures, Base64
using GeneralUtils, SQLLLM
config = copy(JSON3.read("config.json"))
config = copy(JSON3.read("/appfolder/mountvolume/config.json"))
function executeSQL(sql::T) where {T<:AbstractString}
DBconnection = LibPQ.Connection("host=192.168.88.12 port=10201 dbname=wineDB user=yiemtechnologies password=yiemtechnologies@Postgres_0.0")
host = config[:externalservice][:wineDB][:host]
port = config[:externalservice][:wineDB][:port]
dbname = config[:externalservice][:wineDB][:dbname]
user = config[:externalservice][:wineDB][:user]
password = config[:externalservice][:wineDB][:password]
DBconnection = LibPQ.Connection("host=$host port=$port dbname=$dbname user=$user password=$password")
result = LibPQ.execute(DBconnection, sql)
close(DBconnection)
return result
end
function executeSQLVectorDB(sql)
host = config[:externalservice][:SQLVectorDB][:host]
port = config[:externalservice][:SQLVectorDB][:port]
dbname = config[:externalservice][:SQLVectorDB][:dbname]
user = config[:externalservice][:SQLVectorDB][:user]
password = config[:externalservice][:SQLVectorDB][:password]
DBconnection = LibPQ.Connection("host=$host port=$port dbname=$dbname user=$user password=$password")
result = LibPQ.execute(DBconnection, sql)
close(DBconnection)
return result
@@ -14,11 +31,11 @@ end
function text2textInstructLLM(prompt::String)
msgMeta = GeneralUtils.generate_msgMeta(
config[:externalservice][:text2textinstruct][:mqtttopic];
config[:externalservice][:loadbalancer][:mqtttopic];
msgPurpose="inference",
senderName="yiemagent",
senderId=string(uuid4()),
receiverName="text2textinstruct",
senderId=sessionId,
receiverName="text2textinstruct_medium",
mqttBrokerAddress=config[:mqttServerInfo][:broker],
mqttBrokerPort=config[:mqttServerInfo][:port],
)
@@ -28,7 +45,7 @@ function text2textInstructLLM(prompt::String)
:payload => Dict(
:text => prompt,
:kwargs => Dict(
:num_ctx => 20480,
:num_ctx => 16384,
:temperature => 0.2,
)
)
@@ -40,126 +57,104 @@ function text2textInstructLLM(prompt::String)
return response
end
function executeSQLVectorDB(sql)
DBconnection = LibPQ.Connection("host=192.168.88.12 port=10203 dbname=SQLVectorDB user=yiemtechnologies password=yiemtechnologies@Postgres_0.0")
result = LibPQ.execute(DBconnection, sql)
close(DBconnection)
return result
end
function addSQLVectorDB(state)
# get embedding of the query
query = [state[:thoughtHistory][:question]]
# get text embedding from a LLM service
function getEmbedding(text::T) where {T<:AbstractString}
msgMeta = GeneralUtils.generate_msgMeta(
config[:externalservice][:text2textinstruct][:mqtttopic];
msgPurpose= "embedding",
senderName= "yiemagent",
senderId= string(uuid4()),
receiverName= "text2textinstruct",
mqttBrokerAddress= config[:mqttServerInfo][:broker],
mqttBrokerPort= config[:mqttServerInfo][:port],
config[:externalservice][:loadbalancer][:mqtttopic];
msgPurpose="embedding",
senderName="yiemagent",
senderId=sessionId,
receiverName="text2textinstruct_medium",
mqttBrokerAddress=config[:mqttServerInfo][:broker],
mqttBrokerPort=config[:mqttServerInfo][:port],
)
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> Dict(
:text=> query
)
:msgMeta => msgMeta,
:payload => Dict(
:text => [text] # must be a vector of string
)
response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
embedding = response[:response][:embeddings][1]
)
response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=120)
embedding = response[:response][:embeddings]
return embedding
end
function findSimilarTextFromVectorDB(text::T1, tablename::T2, embeddingColumnName::T3,
vectorDB::Function; limit::Integer=1
)::DataFrame where {T1<:AbstractString, T2<:AbstractString, T3<:AbstractString}
# get embedding from LLM service
embedding = getEmbedding(text)[1]
# check whether there is close enough vector already store in vectorDB. if no, add, else skip
sql =
"""
SELECT *, embedding <-> '$embedding' as distance
FROM sql_statement_repository
ORDER BY distance LIMIT 1;
"""
response = executeSQLVectorDB(sql)
sql = """
SELECT *, $embeddingColumnName <-> '$embedding' as distance
FROM $tablename
ORDER BY distance LIMIT $limit;
"""
response = vectorDB(sql)
df = DataFrame(response)
return df
end
function similarSQLVectorDB(query; maxdistance::Integer=100)
tablename = "sqlllm_decision_repository"
# get embedding of the query
df = findSimilarTextFromVectorDB(query, tablename,
"function_input_embedding", executeSQLVectorDB)
row, col = size(df)
# distance = row == 0 ? Inf : df[1, :distance]
distance = 100 # CHANGE this is for testing only
if row != 0 && distance < maxdistance
# if there is usable SQL, return it.
output_b64 = df[1, :function_output_base64] # pick the closest match
output_str = String(base64decode(output_b64))
rowid = df[1, :id]
println("\n~~~ found similar sql. row id $rowid, distance $distance ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
return (dict=output_str, distance=distance)
else
println("\n~~~ similar sql not found, max distance $maxdistance ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
return (dict=nothing, distance=nothing)
end
end
function insertSQLVectorDB(query::T1, SQL::T2; maxdistance::Integer=3) where {T1<:AbstractString, T2<:AbstractString}
tablename = "sqlllm_decision_repository"
# get embedding of the query
# query = state[:thoughtHistory][:question]
df = findSimilarTextFromVectorDB(query, tablename,
"function_input_embedding", executeSQLVectorDB)
row, col = size(df)
distance = row == 0 ? Inf : df[1, :distance]
if row == 0 || distance > 10 # no close enough SQL stored in the database
latestKey, _ = GeneralUtils.findHighestIndexKey(state[:thoughtHistory], :action_input)
_sqlStatement = state[:thoughtHistory][latestKey]
if occursin("SELECT", _sqlStatement) # make sure it is an SQL statement before adding into DB
sqlStatementBase64 = base64encode(_sqlStatement)
sqlStatement = replace(_sqlStatement, "'"=>"")
sql =
"""
INSERT INTO sql_statement_repository (question, sql_statement, sql_statement_base64, embedding) VALUES ('$query', '$sqlStatement', '$sqlStatementBase64', '$embedding');
"""
_ = executeSQLVectorDB(sql)
println("--> added new SQL statement to vectorDB ", @__FILE__, " ", @__LINE__)
println(sqlStatement)
end
if row == 0 || distance > maxdistance # no close enough SQL stored in the database
query_embedding = getEmbedding(query)[1]
query = replace(query, "'" => "")
sql_base64 = base64encode(SQL)
sql_ = replace(SQL, "'" => "")
sql = """
INSERT INTO $tablename (function_input, function_output, function_output_base64, function_input_embedding) VALUES ('$query', '$sql_', '$sql_base64', '$query_embedding');
"""
# println("\n~~~ added new decision to vectorDB ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# println(sql)
_ = executeSQLVectorDB(sql)
end
end
function querySQLVectorDB(state)
# provide similarSQL at the first time thinking only
latestKey, _ = GeneralUtils.findHighestIndexKey(state[:thoughtHistory], :action_input)
if latestKey === nothing
# get embedding of the query
query = [state[:thoughtHistory][:question]]
msgMeta = GeneralUtils.generate_msgMeta(
config[:externalservice][:text2textinstruct][:mqtttopic];
msgPurpose= "embedding",
senderName= "yiemagent",
senderId= string(uuid4()),
receiverName= "text2textinstruct",
mqttBrokerAddress= config[:mqttServerInfo][:broker],
mqttBrokerPort= config[:mqttServerInfo][:port],
)
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> Dict(
:text=> query
)
)
response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
embedding = response[:response][:embeddings][1]
# check whether there is close enough vector already store in vectorDB. if no, add, else skip
sql =
"""
SELECT *, embedding <-> '$embedding' as distance
FROM sql_statement_repository
ORDER BY distance LIMIT 1;
"""
response = executeSQLVectorDB(sql)
df = DataFrame(response)
row, col = size(df)
distance = row == 0 ? Inf : df[1, :distance]
if row != 0 && distance < 100
# if there is usable SQL, return it.
sqlStatementBase64 = df[1, :sql_statement_base64]
sqlStatement = String(base64decode(sqlStatementBase64))
return sqlStatement
else
return nothing
end
end
return nothing
end
sessionId = "555"
# query = Dict(:text=> "How many wines from France do you have that can be paired with lamb?")
# query = "How many wines are from United States?"
query = "retailer: Yiem, wine_type: red, sweetness: 1-2, intensity: 4-5, wine price: 20-40"
# query = "wine_type: white, country: United States, sweetness: 1-2, tannin: 3, food to be served with wine: pizza"
# query = "retailer: Yiem, wine_type: red, sweetness: 1-2, intensity: 4-5, wine price: 20-40"
query = "wine_type: white, country: United States, sweetness: 1-2, tannin: 3, food to be served with wine: pizza"
# query = "wine_type: white, country: Austria, food to be served with wine: pork"
# query = "wine price: less than 25, wine_type: rose, country: France, sweetness: 2, tannin: 3, food to be served with wine: pizza"
# query = Dict(:text=> "wine_type: white, country: France, sweetness: 1")
result = SQLLLM.query(query, executeSQL, text2textInstructLLM;
addSQLVectorDB=addSQLVectorDB,
querySQLVectorDB=querySQLVectorDB)
insertSQLVectorDB=insertSQLVectorDB,
similarSQLVectorDB=similarSQLVectorDB)
println(result)
error(555)