This commit is contained in:
narawat lamaiin
2025-05-04 20:56:55 +07:00
parent c8f5983620
commit 2541223bbb
2 changed files with 317 additions and 22 deletions

View File

@@ -141,7 +141,7 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo
For your information: For your information:
- Observation: Result of the immediately preceding action - Observation: Result of the immediately preceding action
At each round of conversation, the user will give you the following: At each round of conversation, you will be given the following information:
User Query: ... User Query: ...
Example: ... Example: ...
Your Q&A: ... Your Q&A: ...
@@ -205,30 +205,37 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo
QandA = generatequestion(state, context, text2textInstructLLM, llmFormatName; similarSQL=similarSQL_) QandA = generatequestion(state, context, text2textInstructLLM, llmFormatName; similarSQL=similarSQL_)
usermsg = assistantinfo =
""" """
<information>
$(context[:tablelist]) $(context[:tablelist])
User query: $(state[:thoughtHistory][:question]) User query: $(state[:thoughtHistory][:question])
Example: $similarSQL_ Similar SQL: $similarSQL_
Your Q&A: $QandA Your Q&A: $QandA
Your work progress: $workprogress Your work progress: $workprogress
Evaluation: $(state[:evaluation]) Evaluation: $(state[:evaluation])
Suggestion: $(state[:suggestion]) Suggestion: $(state[:suggestion])
Data specific guidelines:
- tasting_notes should not be used as search criteria.
P.S. $errornote P.S. $errornote
</information>
""" """
_prompt = unformatPrompt =
[ [
Dict(:name => "system", :text => systemmsg), Dict(:name => "system", :text => systemmsg),
Dict(:name=> "user", :text=> usermsg)
] ]
# put in model format # put in model format
prompt = GeneralUtils.formatLLMtext(_prompt, llmFormatName) prompt = GeneralUtils.formatLLMtext(unformatPrompt, llmFormatName)
# add info
prompt = prompt * assistantinfo
response = text2textInstructLLM(prompt; llmkwargs=llmkwargs) response = text2textInstructLLM(prompt; llmkwargs=llmkwargs)
response = GeneralUtils.deFormatLLMtext(response, llmFormatName) response = GeneralUtils.deFormatLLMtext(response, llmFormatName)
think, response = GeneralUtils.extractthink(response) think, response = GeneralUtils.extractthink(response)
#[WORKING] check for tasting_notes occurs AFTER where in the sql
# LLM tends to generate observation given that it is in the input # LLM tends to generate observation given that it is in the input
response = response =
if occursin("observation:", response) if occursin("observation:", response)
@@ -328,11 +335,246 @@ function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFo
end end
state[:decisionMaker] = responsedict state[:decisionMaker] = responsedict
println("\nSQLLLM decisionMaker() ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
pprintln(Dict(responsedict))
return responsedict return responsedict
end end
error("SQLLLM DecisionMaker() failed to generate a thought \n", response) error("SQLLLM DecisionMaker() failed to generate a thought \n", response)
end end
# function decisionMaker(state::T1, context, text2textInstructLLM::Function, llmFormatName::String
# ; querySQLVectorDBF::Union{T2, Nothing}=nothing, maxattempt=10
# )::Dict{Symbol, Any} where {T1<:AbstractDict, T2<:Function}
# # lessonDict =
# # if isfile("lesson.json")
# # lessonDict = copy(JSON3.read("lesson.json"))
# # else
# # lessonDict = nothing
# # end
# # lessonDict = nothing
# # lesson =
# # if lessonDict === nothing
# # ""
# # else
# # """
# # You have attempted to help the user before and failed, either because your reasoning for the
# # recommendation was incorrect or your response did not exactly match the user expectation.
# # The following lesson(s) give a plan to avoid failing to help the user in the same way you
# # did previously. Use them to improve your strategy to help the user.
# # Here are some lessons in JSON format:
# # $(JSON3.write(lessonDict))
# # When providing the thought and action for the current trial, that into account these failed
# # trajectories and make sure not to repeat the same mistakes and incorrect answers.
# # """
# # end
# systemmsg =
# """
# You are a helpful assistant that find the data from a database to satisfy the user's query.
# You are also eager to improve your helpfulness.
# For your information:
# - Observation: Result of the immediately preceding action
# At each round of conversation, the user will give you the following:
# User Query: ...
# Example: ...
# Your Q&A: ...
# Your work progress: ...
# Evaluation: Evaluation of the immediately preceding action and observation
# Suggestion: Suggestion for the immediately preceding action and observation
# You must follow the following guidelines:
# - Keep SQL queries focused only on the provided information.
# You should follow the following guidelines:
# - Do not create any table in the database
# - A junction table can be used to link tables together. Another use case is for filtering data.
# - If you can't find a single table that can be used to answer the user's query, try joining multiple tables to see if you can obtain the answer.
# - If you are unable to find the requested information, kindly inform the user, "The current data in our database does not provide the specific answer to your query".
# - Text information in the database usually stored in lower case. If your search returns empty, try using lower case to search.
# You should then respond to the user with interleaving Comprehension, Plan, Action_name, Action_input:
# Plan: Given the current circumstances, outline a detailed, step-by-step plan to accomplish the task. Be specific.
# Action_name: (Typically corresponds to the execution of the first step in your plan)
# Can be one of the following function names:
# - RUNSQL, which you can use to execute SQL against the database. Action_input for this function must be a single SQL query to be executed against the database.
# For more effective text search, it's necessary to use case-insensitivity and the ILIKE operator.
# Do not wrap the SQL as it will be executed against the database directly and SQL must be ended with ';'.
# 4) Action_input: Input to the action
# You should only respond in format as described below:
# Plan: ...
# Action_name: ...
# Action_input: ...
# Let's begin!
# """
# workprogress = ""
# for (k, v) in state[:thoughtHistory]
# if k ∉ [:question]
# workprogress *= "$k: $v\n"
# end
# end
# response = nothing # store for show when error msg show up
# errornote = "N/A"
# # provide similar sql only for the first attempt
# similarSQL_ = "None"
# if length(state[:thoughtHistory]) == 1
# sql, distance = querySQLVectorDBF(state[:thoughtHistory][:question])
# similarSQL_ = sql !== nothing ? sql : "None"
# end
# header = ["Plan:", "Action_name:", "Action_input:"]
# dictkey = ["plan", "action_name", "action_input"]
# llmkwargs=Dict(
# :num_ctx => 32768,
# :temperature => 0.5,
# )
# for attempt in 1:maxattempt
# QandA = generatequestion(state, context, text2textInstructLLM, llmFormatName; similarSQL=similarSQL_)
# usermsg =
# """
# $(context[:tablelist])
# User query: $(state[:thoughtHistory][:question])
# Example: $similarSQL_
# Your Q&A: $QandA
# Your work progress: $workprogress
# Evaluation: $(state[:evaluation])
# Suggestion: $(state[:suggestion])
# P.S. $errornote
# """
# _prompt =
# [
# Dict(:name=> "system", :text=> systemmsg),
# Dict(:name=> "user", :text=> usermsg)
# ]
# # put in model format
# prompt = GeneralUtils.formatLLMtext(_prompt, llmFormatName)
# response = text2textInstructLLM(prompt; llmkwargs=llmkwargs)
# response = GeneralUtils.deFormatLLMtext(response, llmFormatName)
# think, response = GeneralUtils.extractthink(response)
# # LLM tends to generate observation given that it is in the input
# response =
# if occursin("observation:", response)
# string(split(response, "observation:")[1])
# elseif occursin("Observation:", response)
# string(split(response, "Observation:")[1])
# elseif occursin("observation_", response)
# string(split(response, "observation_")[1])
# elseif occursin("Observation_", response)
# string(split(response, "Observation_")[1])
# else
# response
# end
# # sometime LLM output something like **Comprehension**: which is not expected
# response = replace(response, "**"=>"")
# response = replace(response, "***"=>"")
# # some time LLM output Plan_1: so we need to detect and replace topic numbering
# regex = r"_[0-1000]+:"
# matches = collect(eachmatch(regex, response))
# for m in matches
# response = replace(response, string(m.match)=>":")
# end
# if occursin("NULL", response)
# errornote = "\nYour previous attempt was NULL. This is not allowed"
# println("\nERROR SQLLLM decisionMaker(). Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# continue
# end
# # # detect if there are more than 1 key per categories
# # wordcount = GeneralUtils.countGivenWords(response, header)
# # duplicateKeywordFlag = false
# # for (i, v) in enumerate(wordcount)
# # keyword = header[i]
# # keywordNumber = v
# # if keywordNumber > 1
# # errornote = "\nSQL query has duplicated keyword, $keyword"
# # println("Attempt $attempt $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# # duplicateKeywordFlag = true
# # break
# # end
# # end
# # duplicateKeywordFlag == true ? continue : nothing
# # check whether response has all header
# detected_kw = GeneralUtils.detect_keyword(header, response)
# if 0 ∈ values(detected_kw)
# errornote = "\nYour previous attempt did not have all points according to the required response format"
# println("\nERROR SQLLLM decisionMaker(). Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# continue
# elseif sum(values(detected_kw)) > length(header)
# errornote = "\nYour previous attempt has duplicated points according to the required response format"
# println("\nERROR SQLLLM decisionMaker(). Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# continue
# end
# responsedict = GeneralUtils.textToDict(response, header;
# dictKey=dictkey, symbolkey=true)
# delete!(responsedict, :observation)
# # remove backticks Error occurred: MethodError: no method matching occursin(::String, ::Vector{String})
# if occursin("```", responsedict[:action_input])
# sql = GeneralUtils.extract_triple_backtick_text(responsedict[:action_input])[1]
# if sql[1:4] == "sql\n"
# sql = sql[5:end]
# end
# sql = split(sql, ';') # some time there are comments in the sql
# sql = sql[1] * ';'
# responsedict[:action_input] = sql
# end
# toollist = ["TABLEINFO", "RUNSQL"]
# if responsedict[:action_name] ∉ toollist
# errornote = "Your previous attempt has action_name that is not in the tool list"
# println("\nERROR SQLLLM decisionMaker(). Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# continue
# end
# for i in toollist
# if occursin(i, responsedict[:action_input])
# errornote = "Your previous attempt has action_name in action_input which is not allowed"
# println("\nERROR SQLLLM decisionMaker(). Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# continue
# end
# end
# for i ∈ Symbol.(dictkey)
# if length(JSON3.write(responsedict[i])) == 0
# errornote = "Your previous attempt has empty value for $i"
# println("\nERROR SQLLLM decisionMaker(). Attempt $attempt/$maxattempt. $errornote ", @__FILE__, ":", @__LINE__, " $(Dates.now())")
# continue
# end
# end
# state[:decisionMaker] = responsedict
# return responsedict
# end
# error("SQLLLM DecisionMaker() failed to generate a thought \n", response)
# end
""" Assigns a scalar value to each new child node to be used for selec- """ Assigns a scalar value to each new child node to be used for selec-
tion and backpropagation. This value effectively quantifies the agent's progress in task completion, tion and backpropagation. This value effectively quantifies the agent's progress in task completion,

View File

@@ -3,7 +3,7 @@ using LibPQ, Dates, JSON3, PrettyPrinting, UUIDs, DataFrames, DataStructures, Ba
using GeneralUtils, SQLLLM using GeneralUtils, SQLLLM
config = copy(JSON3.read("/appfolder/mountvolume/appdata/config.json")) config = JSON3.read("/appfolder/app/dev/YiemAgent/test/config.json")
function executeSQL(sql::T) where {T<:AbstractString} function executeSQL(sql::T) where {T<:AbstractString}
host = config[:externalservice][:wineDB][:host] host = config[:externalservice][:wineDB][:host]
@@ -29,13 +29,19 @@ function executeSQLVectorDB(sql)
return result return result
end end
function text2textInstructLLM(prompt::String; maxattempt=3) function text2textInstructLLM(prompt::String; maxattempt::Integer=3, modelsize::String="medium",
senderId=GeneralUtils.uuid4snakecase(), timeout=180,
llmkwargs=Dict(
:num_ctx => 32768,
:temperature => 0.5,
)
)
msgMeta = GeneralUtils.generate_msgMeta( msgMeta = GeneralUtils.generate_msgMeta(
config[:externalservice][:loadbalancer][:mqtttopic]; config[:externalservice][:loadbalancer][:mqtttopic];
msgPurpose="inference", msgPurpose="inference",
senderName="yiemagent", senderName="yiemagent",
senderId=sessionId, senderId=senderId,
receiverName="text2textinstruct_small", receiverName="text2textinstruct_$modelsize",
mqttBrokerAddress=config[:mqttServerInfo][:broker], mqttBrokerAddress=config[:mqttServerInfo][:broker],
mqttBrokerPort=config[:mqttServerInfo][:port], mqttBrokerPort=config[:mqttServerInfo][:port],
) )
@@ -44,16 +50,13 @@ function text2textInstructLLM(prompt::String; maxattempt=3)
:msgMeta => msgMeta, :msgMeta => msgMeta,
:payload => Dict( :payload => Dict(
:text => prompt, :text => prompt,
:kwargs => Dict( :kwargs => llmkwargs
:num_ctx => 16384,
:temperature => 0.2,
)
) )
) )
response = nothing response = nothing
for attempts in 1:maxattempt for attempts in 1:maxattempt
_response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=300, maxattempt=2) _response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=timeout, maxattempt=maxattempt)
payload = _response[:response] payload = _response[:response]
if _response[:success] && payload[:text] !== nothing if _response[:success] && payload[:text] !== nothing
response = _response[:response][:text] response = _response[:response][:text]
@@ -76,7 +79,7 @@ function getEmbedding(text::T) where {T<:AbstractString}
msgPurpose="embedding", msgPurpose="embedding",
senderName="yiemagent", senderName="yiemagent",
senderId=sessionId, senderId=sessionId,
receiverName="text2textinstruct_small", receiverName="textembedding",
mqttBrokerAddress=config[:mqttServerInfo][:broker], mqttBrokerAddress=config[:mqttServerInfo][:broker],
mqttBrokerPort=config[:mqttServerInfo][:port], mqttBrokerPort=config[:mqttServerInfo][:port],
) )
@@ -87,7 +90,8 @@ function getEmbedding(text::T) where {T<:AbstractString}
:text => [text] # must be a vector of string :text => [text] # must be a vector of string
) )
) )
response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=120)
response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=120, maxattempt=3)
embedding = response[:response][:embeddings] embedding = response[:response][:embeddings]
return embedding return embedding
end end
@@ -108,7 +112,6 @@ function findSimilarTextFromVectorDB(text::T1, tablename::T2, embeddingColumnNam
return df return df
end end
function similarSQLVectorDB(query; maxdistance::Integer=100) function similarSQLVectorDB(query; maxdistance::Integer=100)
tablename = "sqlllm_decision_repository" tablename = "sqlllm_decision_repository"
# get embedding of the query # get embedding of the query
@@ -131,7 +134,6 @@ function similarSQLVectorDB(query; maxdistance::Integer=100)
end end
end end
function insertSQLVectorDB(query::T1, SQL::T2; maxdistance::Integer=3) where {T1<:AbstractString, T2<:AbstractString} function insertSQLVectorDB(query::T1, SQL::T2; maxdistance::Integer=3) where {T1<:AbstractString, T2<:AbstractString}
tablename = "sqlllm_decision_repository" tablename = "sqlllm_decision_repository"
# get embedding of the query # get embedding of the query
@@ -155,6 +157,57 @@ function insertSQLVectorDB(query::T1, SQL::T2; maxdistance::Integer=3) where {T1
end end
end end
function similarSommelierDecision(recentevents::T1; maxdistance::Integer=3
)::Union{AbstractDict, Nothing} where {T1<:AbstractString}
tablename = "sommelier_decision_repository"
# find similar
println("\n~~~ search vectorDB for this: $recentevents ", @__FILE__, " ", @__LINE__)
df = findSimilarTextFromVectorDB(recentevents, tablename,
"function_input_embedding", executeSQLVectorDB)
row, col = size(df)
distance = row == 0 ? Inf : df[1, :distance]
if row != 0 && distance < maxdistance
# if there is usable decision, return it.
rowid = df[1, :id]
println("\n~~~ found similar decision. row id $rowid, distance $distance ", @__FILE__, " ", @__LINE__)
output_b64 = df[1, :function_output_base64] # pick the closest match
_output_str = String(base64decode(output_b64))
output = copy(JSON3.read(_output_str))
return output
else
println("\n~~~ similar decision not found, max distance $maxdistance ", @__FILE__, " ", @__LINE__)
return nothing
end
end
function insertSommelierDecision(recentevents::T1, decision::T2; maxdistance::Integer=5
) where {T1<:AbstractString, T2<:AbstractDict}
tablename = "sommelier_decision_repository"
# find similar
df = findSimilarTextFromVectorDB(recentevents, tablename,
"function_input_embedding", executeSQLVectorDB)
row, col = size(df)
distance = row == 0 ? Inf : df[1, :distance]
if row == 0 || distance > maxdistance # no close enough SQL stored in the database
recentevents_embedding = getEmbedding(recentevents)[1]
recentevents = replace(recentevents, "'" => "")
decision_json = JSON3.write(decision)
decision_base64 = base64encode(decision_json)
decision = replace(decision_json, "'" => "")
sql = """
INSERT INTO $tablename (function_input, function_output, function_output_base64, function_input_embedding) VALUES ('$recentevents', '$decision', '$decision_base64', '$recentevents_embedding');
"""
println("\n~~~ added new decision to vectorDB ", @__FILE__, " ", @__LINE__)
println(sql)
_ = executeSQLVectorDB(sql)
else
println("~~~ similar decision previously cached, distance $distance ", @__FILE__, " ", @__LINE__)
end
end
sessionId = "555" sessionId = "555"