31 Commits

Author SHA1 Message Date
0e36b8db90 remove MQTT dependency 2025-08-01 06:04:17 +07:00
13fcf06503 update 2025-07-23 07:10:28 +07:00
narawat lamaiin
066d72553f update 2025-07-18 07:54:50 +07:00
narawat lamaiin
b3e8df7287 update 2025-07-17 11:48:16 +07:00
narawat lamaiin
c5f3fda2ba update 2025-07-14 13:49:04 +07:00
narawat lamaiin
adab61dca8 update 2025-07-14 08:54:46 +07:00
narawat lamaiin
09615a6909 mark new version 2025-06-10 10:49:11 +07:00
ton
92c5930e9a Merge pull request 'v0.3.0' (#6) from v0.3.0 into main
Reviewed-on: #6
2025-06-10 03:39:42 +00:00
narawat lamaiin
5b4c1c1471 update 2025-06-10 10:38:51 +07:00
narawat lamaiin
fc3edd7b8f update 2025-06-10 10:29:57 +07:00
narawat lamaiin
93aa0ee1ac update 2025-06-10 10:16:31 +07:00
narawat lamaiin
42378714a0 mark new version 2025-06-10 09:31:00 +07:00
ton
759f022c98 Merge pull request 'v0.2.4' (#5) from v0.2.4 into main
Reviewed-on: #5
2025-06-10 02:27:09 +00:00
narawat lamaiin
5af4d481f2 update 2025-06-10 09:25:41 +07:00
narawat lamaiin
221bb5beb7 update 2025-06-09 06:34:29 +07:00
narawat lamaiin
5a89e86120 update 2025-06-03 10:08:40 +07:00
narawat lamaiin
e351a92680 mark new version 2025-05-24 08:52:50 +07:00
ton
83cd0cfea3 Merge pull request 'v0.2.3' (#4) from v0.2.3 into main
Reviewed-on: #4
2025-05-24 01:47:53 +00:00
narawat lamaiin
9e29f611df update 2025-05-24 08:42:50 +07:00
narawat lamaiin
d8ea4b70a9 update 2025-05-04 20:56:36 +07:00
narawat lamaiin
150ddac2c0 add extractTextBetweenString 2025-04-30 12:59:14 +07:00
narawat lamaiin
5108ad1f6b update 2025-04-25 21:12:14 +07:00
narawat lamaiin
14766ae171 update 2025-04-13 21:45:47 +07:00
narawat lamaiin
ccd91a7b6f update 2025-04-07 05:20:05 +07:00
narawat lamaiin
a894ad85ba update 2025-04-04 15:04:19 +07:00
narawat lamaiin
1da05f5cae update 2025-03-31 21:30:29 +07:00
narawat lamaiin
562f528c01 update 2025-03-27 13:09:20 +07:00
narawat lamaiin
840b0e6205 update 2025-03-22 09:41:39 +07:00
cb4d01c612 update 2025-03-20 16:05:39 +07:00
e6344f1a92 mark new version 2025-03-17 09:54:32 +07:00
ton
3082c261c7 Merge pull request 'v0.2.2' (#3) from v0.2.2 into main
Reviewed-on: #3
2025-03-14 12:17:37 +00:00
15 changed files with 1509 additions and 903 deletions

View File

@@ -1,8 +1,8 @@
# This file is machine-generated - editing it directly is not advised
julia_version = "1.11.2"
julia_version = "1.11.5"
manifest_format = "2.0"
project_hash = "75c6a269a13b222c106479d2177b05facfa23f74"
project_hash = "a942446c2f26ef72d0c4b0ca522e0adcf709ce4e"
[[deps.AliasTables]]
deps = ["PtrArrays", "Random"]
@@ -22,12 +22,23 @@ version = "1.11.0"
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
version = "1.11.0"
[[deps.BufferedStreams]]
git-tree-sha1 = "6863c5b7fc997eadcabdbaf6c5f201dc30032643"
uuid = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d"
version = "1.2.2"
[[deps.CSV]]
deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"]
git-tree-sha1 = "deddd8725e5e1cc49ee205a1964256043720a6c3"
uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
version = "0.10.15"
[[deps.CodecBase]]
deps = ["TranscodingStreams"]
git-tree-sha1 = "40956acdbef3d8c7cc38cba42b56034af8f8581a"
uuid = "6c391c72-fb7b-5838-ba82-7cfb1bcfecbf"
version = "0.3.4"
[[deps.CodecZlib]]
deps = ["TranscodingStreams", "Zlib_jll"]
git-tree-sha1 = "bce6804e5e6044c6daab27bb533d1295e4a2e759"
@@ -81,11 +92,6 @@ deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
version = "1.11.0"
[[deps.Distributed]]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
version = "1.11.0"
[[deps.Distributions]]
deps = ["AliasTables", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"]
git-tree-sha1 = "3101c32aab536e7a27b1763c0797dba151b899ad"
@@ -148,6 +154,11 @@ deps = ["Random"]
uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
version = "1.11.0"
[[deps.HashArrayMappedTries]]
git-tree-sha1 = "2eaa69a7cab70a52b9687c8bf950a5a93ec895ae"
uuid = "076d061b-32b6-4027-95e0-9a2c6f6d7e74"
version = "0.2.0"
[[deps.HypergeometricFunctions]]
deps = ["LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"]
git-tree-sha1 = "b1c2585431c382e3fe5805874bda6aea90a95de9"
@@ -264,21 +275,17 @@ version = "0.3.28"
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
version = "1.11.0"
[[deps.MQTTClient]]
deps = ["Distributed", "Random", "Sockets"]
git-tree-sha1 = "f2597b290d4bf17b577346153cd2ddf9accb5c26"
uuid = "985f35cc-2c3d-4943-b8c1-f0931d5f0959"
version = "0.3.1"
weakdeps = ["PrecompileTools"]
[deps.MQTTClient.extensions]
PrecompileMQTT = "PrecompileTools"
[[deps.Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
version = "1.11.0"
[[deps.MbedTLS]]
deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"]
git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf"
uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
version = "1.1.9"
[[deps.MbedTLS_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
@@ -298,6 +305,18 @@ version = "1.11.0"
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
version = "2023.12.12"
[[deps.NATS]]
deps = ["Base64", "BufferedStreams", "CodecBase", "Dates", "DocStringExtensions", "JSON3", "MbedTLS", "NanoDates", "Random", "ScopedValues", "Sockets", "Sodium", "StructTypes", "URIs"]
git-tree-sha1 = "d9d9a189fb9155a460e6b5e8966bf6a66737abf8"
uuid = "55e73f9c-eeeb-467f-b4cc-a633fde63d2a"
version = "0.1.0"
[[deps.NanoDates]]
deps = ["Dates", "Parsers"]
git-tree-sha1 = "850a0557ae5934f6e67ac0dc5ca13d0328422d1f"
uuid = "46f1a544-deae-4307-8689-c12aa3c955c6"
version = "1.0.3"
[[deps.NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
version = "1.2.0"
@@ -310,7 +329,7 @@ version = "0.3.27+1"
[[deps.OpenLibm_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
version = "0.8.1+2"
version = "0.8.5+0"
[[deps.OpenSpecFun_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
@@ -423,6 +442,12 @@ version = "0.5.1+0"
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
version = "0.7.0"
[[deps.ScopedValues]]
deps = ["HashArrayMappedTries", "Logging"]
git-tree-sha1 = "1147f140b4c8ddab224c94efa9569fc23d63ab44"
uuid = "7e506255-f358-4e82-b7e4-beb19740aa63"
version = "1.3.0"
[[deps.SentinelArrays]]
deps = ["Dates", "Random"]
git-tree-sha1 = "d0553ce4031a081cc42387a9b9c8441b7d99f32d"
@@ -437,6 +462,12 @@ version = "1.11.0"
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
version = "1.11.0"
[[deps.Sodium]]
deps = ["Base64", "libsodium_jll"]
git-tree-sha1 = "907703e0d50846f300650d7225bdcab145b7bca9"
uuid = "4f5b5e99-b0ad-42cd-b47a-334e172ec8bd"
version = "1.1.2"
[[deps.SortingAlgorithms]]
deps = ["DataStructures"]
git-tree-sha1 = "66e0a8e672a0bdfca2c3f5937efb8538b9ddc085"
@@ -544,6 +575,11 @@ git-tree-sha1 = "0c45878dcfdcfa8480052b6ab162cdd138781742"
uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
version = "0.11.3"
[[deps.URIs]]
git-tree-sha1 = "24c1c558881564e2217dcf7840a8b2e10caeb0f9"
uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
version = "1.6.0"
[[deps.UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
@@ -574,6 +610,12 @@ deps = ["Artifacts", "Libdl"]
uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
version = "5.11.0+0"
[[deps.libsodium_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "f76d682d87eefadd3f165d8d9fda436464213142"
uuid = "a9144af2-ca23-56d9-984f-0d03f7b5ccf8"
version = "1.0.20+3"
[[deps.nghttp2_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"

View File

@@ -1,7 +1,7 @@
name = "GeneralUtils"
uuid = "c6c72f09-b708-4ac8-ac7c-2084d70108fe"
authors = ["tonaerospace <tonaerospace.etc@gmail.com>"]
version = "0.2.2"
version = "0.3.1"
[deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
@@ -10,8 +10,11 @@ DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
MQTTClient = "985f35cc-2c3d-4943-b8c1-f0931d5f0959"
NATS = "55e73f9c-eeeb-467f-b4cc-a633fde63d2a"
PrettyPrinting = "54e16d92-306c-5ea0-a30b-337be88ac337"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[compat]
NATS = "0.1.0"

79
codesnippet/nats.jl Normal file
View File

@@ -0,0 +1,79 @@
using NATS, JSON3
connection = NATS.connect("nats.yiem.cc:4222")
sub1 = NATS.reply(connection, "some_subject"; queue_group="group1") do msg
payload = copy(JSON3.read(msg.payload))
println(payload)
println(msg.reply_to)
# publish(connection, msg.reply_to, "ACK")
return JSON3.write(Dict(:a=>"wassup"))
end
using NATS, JSON3, GeneralUtils
connection = NATS.connect("nats.yiem.cc:4222")
msgMeta = GeneralUtils.generate_msgMeta(
"text2textinstruct_medium.inference.api.v1";
msgPurpose= "inference",
senderName= "yiemagent",
senderId= GeneralUtils.uuid4snakecase(),
receiverName= "text2textinstruct",
)
llmHttpTimeout = 60
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> Dict(
:text=> "Wassup buddy!",
:kwargs=> Dict(
:max_tokens=> 2048,
:stop=> ["<|im_end|>"],
:temperature=> 0.2,
),
:llmHttpTimeout=>llmHttpTimeout,
)
)
r = NATS.request(String, connection, "text2textinstruct_medium.inference.api.v1",
JSON3.write(outgoingMsg); timer=Timer(llmHttpTimeout))
using NATS, JSON3, GeneralUtils
connection = NATS.connect("nats.yiem.cc:4222")
msgMeta = GeneralUtils.generate_msgMeta(
"tonpc.containerServices",
msgPurpose="reset container",
senderName= "",
)
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> "docker container restart ollama-instance-2",
)
# may be I can't use NATS request inside NATS reply??
r = NATS.request(String, connection, msgMeta[:sendTopic], JSON3.write(outgoingMsg); timer=Timer(10))

View File

@@ -2,7 +2,7 @@ module GeneralUtils
export # struct
mqttClientInstance,
# mqttClientInstance,
# function
noNegative!, randomWithProb, randomChoiceWithProb, findIndex, limitvalue

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,7 @@ module dbUtil
export dictToPostgresKeyValueString, generateInsertSQL, generateUpdateSQL
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames,
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, DataFrames,
SHA
using ..util

View File

@@ -6,9 +6,9 @@ export noNegative!, randomWithProb, randomChoiceWithProb, findIndex, limitvalue,
matMul_3Dto4D_batchwise, isNotEqual, linearToCartesian, vectorMax, findMax,
multiply_last, multiplyRandomElements, replaceElements, replaceElements!, isBetween,
isLess, allTrue, getStringBetweenCharacters, JSON3read_stringKey, mkDictPath!,
getDictPath
getDictPath, detectKeywordVariation, textToDict
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames, CSV
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, DataFrames, CSV
using ..util, ..communication
# ---------------------------------------------- 100 --------------------------------------------- #
@@ -1150,9 +1150,196 @@ end
"""
detectKeywordVariation(keywords::AbstractVector{String}, text::String) -> Dict{String, Union{Array, Nothing}}
Detects and collects all case-variant occurrences of multiple keywords in the text.
This function processes each keyword individually and returns an array of matched variations for each keyword.
# Arguments
- `keywords::AbstractVector{String}` Vector of keywords to search for
- `text::String` The text to search in
# Returns
- `Dict{String, Array}` Returns a dictionary mapping each keyword to an array of matched variations found in the text
# Examples
```jldoctest
julia> detectKeywordVariation(["test", "example", "cat"], "This is a Test EXAMPLE")
Dict{String, Array}("test" => ["Test"], "example" => ["EXAMPLE"], "cat" => nothing)
"""
function detectKeywordVariation(keywords::T, text::String)::Dict{String, Union{Array, Nothing}} where {T<:AbstractVector}
kw = Dict{String, Union{Array, Nothing}}()
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
for keyword in keywords
ws = detectKeywordVariation.(keyword, text)
total = sum(issomething.(ws))
if total != 0
kw[keyword] = ws
else
kw[keyword] = nothing
end
end
return kw
end
"""
detectKeywordVariation(keyword::String, text::String) -> Union{Nothing, Array{String}}
Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
# Arguments:
- `keyword::String` The keyword to search for
- `text::String` The text to search in
# Returns:
- `Union{Nothing, Array{String}}` Returns an array of matched keyword variations if found, otherwise returns nothing
# Examples:
```jldoctest
julia> detectKeywordVariation("test", "This is a Test case")
["Test"]
julia> detectKeywordVariation("error", "NO ERRORS FOUND")
["ERRORS"]
julia> detectKeywordVariation("missing", "complete data")
nothing
"""
function detectKeywordVariation(keyword::String, text::String)::Union{Nothing, Array{String}}
# Define the keyword variations to search for
wordVariations = [uppercasefirst(keyword), uppercase(keyword), lowercase(keyword)]
# wordVariations may duplicate keyword
keyword_variations = [keyword]
for i in wordVariations
i != keyword ? push!(keyword_variations, i) : nothing
end
_splittext = string.(strip.(split(text, " ")))
splittext = String[]
# remove . after a word
for i in _splittext
if length(i) != 0 && i[end] ['.']
word = string(i[1:end-1])
push!(splittext, word)
else
push!(splittext, i)
end
end
result = String[]
for variation in keyword_variations
# if length of both word is equals then it is a whole word otherwise it is part of part of other word
r = findIndex(splittext, variation)
if isempty(r[2])
# skip
else
# if variation > 1 add them all so this function detect duplicate keyword
variations = [variation for i in eachindex(r[2])]
result = vcat(result, variations)
end
end
return result
end
""" Convert text into a dictionary with a given keywords. This function use keywords to slice
a given text into the following format: KW1|kw1_text|KW2|kw2_text|KW3|kw3_text.
The left most string which has no keyword will be discarded. WARNING, ordering is important
# Arguments
- `text::String`
A text to be converted.
- `keywords::Vector{String}`
A list of keywords to be used to slice the text.
These keywords also be the resulting dict keys.
# Keyword Arguments
- `rightmarker::String`
A maker used to make a word to be unique. Ex, A keyword "plan" with rightmarker ":",
the function will search for "plan:" otherwise the function will search for "plan".
The marker will not be in the resulting dict keys.
- `symbolkey::Bool`
If true, resulting dict's key will be Symbols, otherwise string.
- `lowercasekey::Bool`
set resulting dict's key to be lowercase
# Return
- `d::OrderedDict`
# Example
```jldoctest
julia> text = "TODAY thought: what to do plan: wake up and going out action: 1. wake up 2. eat 3. sleep"
julia> sample_keywords = ["thought", "plan", "action"]
julia> resultdict = GeneralUtils.textToDict(text, sample_keywords; rightmarker=":", symbolkey=true)
julia> println(resultdict)
OrderedCollections.OrderedDict{Any, Any}(:thought => "what to do",
:plan => "wake up and going out",
:action => "1. wake up 2. eat 3. sleep")
```
# Signature
"""
function textToDict(text::String, detectKeywords::Vector{String};
dictKey::Union{Vector{String}, Nothing}=nothing,
symbolkey::Bool=false, lowercasekey::Bool=false
)::OrderedDict
# make sure this function detect variation of a work e.g. agent, Agent, AGENT
kw = []
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
for keyword in detectKeywords
detected = detectKeywordVariation(keyword, text)
if detected !== nothing
push!(kw, detected)
else
error("Keyword $keyword not found in text: $text")
end
end
if typeof(kw[1]) <: AbstractArray
kw = reduce(vcat, kw)
end
od1, od2 =
if symbolkey
OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}()
else
OrderedDict{String, Any}(), OrderedDict{String, Any}()
end
remainingtext = text
dictKey_ = reverse(dictKey)
# process text from back to front
rkw = reverse(kw)
for (i,keyword) in enumerate(rkw)
# Find the position of the keyword in the text
keywordidx = findlast(keyword, remainingtext)
dKey = dictKey_[i]
if keywordidx !== nothing
substr = remainingtext[keywordidx[end]+1:end]
str = string(strip(substr)) # Removes both leading and trailing whitespace.
_key = lowercasekey == true ? lowercase(dKey) : dKey
key = symbolkey == true ? Symbol(_key) : _key
od1[key] = str
remainingtext = remainingtext[1:keywordidx[1]-1]
else
error("""keyword "$keyword" not found in the provided text: $text </end of error note>""")
end
end
# correct the order
ks = reverse([i for i in keys(od1)])
for k in ks
k = symbolkey == true ? Symbol(k) : k
od2[k] = od1[k]
end
return od2
end

View File

@@ -1,6 +1,6 @@
module llmUtil
export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection
export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection, deFormatLLMtext, extractthink
using UUIDs, JSON3, Dates
using GeneralUtils
@@ -43,7 +43,7 @@ julia> formattedtext = YiemAgent.formatLLMtext_llama3instruct(d[:name], d[:text]
Signature
"""
function formatLLMtext_llama3instruct(name::T, text::T;
assistantStarter::Bool=true) where {T<:AbstractString}
assistantStarter::Bool=false) where {T<:AbstractString}
formattedtext =
if name == "system"
"""
@@ -68,28 +68,10 @@ function formatLLMtext_llama3instruct(name::T, text::T;
return formattedtext
end
# function formatLLMtext_llama3instruct(name::T, text::T) where {T<:AbstractString}
# formattedtext =
# if name == "system"
# """
# <|begin_of_text|>
# <|start_header_id|>$name<|end_header_id|>
# $text
# <|eot_id|>
# """
# else
# """
# <|start_header_id|>$name<|end_header_id|>
# $text
# <|eot_id|>
# """
# end
# return formattedtext
# end
function formatLLMtext_qwen(name::T, text::T;
assistantStarter::Bool=true) where {T<:AbstractString}
assistantStarter::Bool=false) where {T<:AbstractString}
formattedtext =
if name == "system"
"""
@@ -116,14 +98,94 @@ function formatLLMtext_qwen(name::T, text::T;
end
""" Convert a chat messages in vector of dictionary into LLM model instruct format.
function formatLLMtext_qwen3(name::T, text::T;
assistantStarter::Bool=false) where {T<:AbstractString}
formattedtext =
if name == "system"
"""
<|im_start|>$name
$text
<|im_end|>
"""
else
"""
<|im_start|>$name
$text
<|im_end|>
"""
end
if assistantStarter
formattedtext *=
"""
<|im_start|>assistant
"""
end
return formattedtext
end
function formatLLMtext_phi4(name::T, text::T;
assistantStarter::Bool=false) where {T<:AbstractString}
formattedtext =
if name == "system"
"""
<|system|>
$text
<|end|>
"""
else
"""
<|assistant|>
$text
<|end|>
"""
end
if assistantStarter
formattedtext *=
"""
<|assistant|>
"""
end
return formattedtext
end
function formatLLMtext_granite3(name::T, text::T;
assistantStarter::Bool=false) where {T<:AbstractString}
formattedtext =
if name == "system"
"""
<|start_of_role|>system<|end_of_role|>{$text}<|end_of_text|>
"""
else
"""
<|start_of_role|>$name<|end_of_role|>{$text}<|end_of_text|>
"""
end
if assistantStarter
formattedtext *=
"""
<|start_of_role|>assistant<|end_of_role|>{
"""
end
return formattedtext
end
""" Convert a vector of chat message dictionaries into LLM model instruct format.
# Arguments
- `messages::Vector{Dict{Symbol, T}}`
message owner name e.f. "system", "user" or "assistant"
A vector of dictionaries where each dictionary contains the keys `:name` (the name of the message owner) and `:text` (the text of the message).
- `formatname::T`
format name to be used
The name of the format to be used for converting the chat messages.
# Return
- `formattedtext::String`
text formatted to model format
@@ -140,12 +202,11 @@ julia> chatmessage = [
julia> formattedtext = YiemAgent.formatLLMtext(chatmessage, "llama3instruct")
"<|begin_of_text|>\n <|start_header_id|>system<|end_header_id|>\n You are a helpful, respectful and honest assistant.\n <|eot_id|>\n <|start_header_id|>user<|end_header_id|>\n list me all planets in our solar system.\n <|eot_id|>\n <|start_header_id|>assistant<|end_header_id|>\n I'm sorry. I don't know. You tell me.\n <|eot_id|>\n"
```
# Signature
"""
function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="llama3instruct"
)::String where {T<:Any}
f = if formatname == "llama3instruct"
function formatLLMtext(messages::Vector{Dict{Symbol, T}}, formatname::String
)::String where {T<:AbstractString}
f =
if formatname == "llama3instruct"
formatLLMtext_llama3instruct
elseif formatname == "mistral"
# not define yet
@@ -153,18 +214,125 @@ function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="ll
# not define yet
elseif formatname == "qwen"
formatLLMtext_qwen
elseif formatname == "qwen3"
formatLLMtext_qwen3
elseif formatname == "phi4"
formatLLMtext_phi4
elseif formatname == "granite3"
formatLLMtext_granite3
else
error("$formatname template not define yet")
end
str = ""
for t in messages
for (i, t) in enumerate(messages)
if i < length(messages)
str *= f(t[:name], t[:text])
else
str *= f(t[:name], t[:text]; assistantStarter=true)
end
end
return str
end
""" Revert LLM-format response back into regular text.
# Arguments
- `text::String`
The LLM formatted string to be converted.
# Return
- `normalText::String`
The original plain text extracted from the given LLM-formatted string.
# Example
```jldoctest
julia> using Revise
julia> using YiemAgent
julia> response = "<|begin_of_text|>This is a sample system instruction.<|eot_id|>"
julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3")
"This is a sample system instruction."
```
"""
function deFormatLLMtext(text::String, formatname::String; includethink::Bool=false
)::String
f =
if formatname == "granite3"
deFormatLLMtext_granite3
elseif formatname == "qwen3"
deFormatLLMtext_qwen3
else
error("$formatname template not define yet")
end
r = f(text)
result = r === nothing ? text : r
return result
end
""" Revert LLM-format response back into regular text for Granite 3 format.
# Arguments
- `text::String`
The LLM formatted string to be converted.
# Return
- `normalText::Union{Nothing, String}`
The original plain text extracted from the given LLM-formatted string.
Returns nothing if the text is not in Granite 3 format.
# Example
```jldoctest
julia> using Revise
julia> using YiemAgent
julia> response = "{This is a sample LLM response.}"
julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3")
"This is a sample LLM response."
"""
function deFormatLLMtext_granite3(text::String)::Union{Nothing, String}
# check if '{' and '}' are in the text because it's a special format for the LLM response
if contains(text, "<|im_start|>assistant")
# get the text between '{' and '}'
text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1]
return text_between_braces
elseif text[end] == '}'
text = "{$text"
text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1]
else
return nothing
end
end
function deFormatLLMtext_qwen3(text::String)::Union{Nothing, String}
return text
end
# function deFormatLLMtext_qwen3(text::String; includethink::Bool=false)::Union{Nothing, String}
# think = nothing
# str = nothing
# if occursin("<think>", text)
# r = GeneralUtils.extractTextBetweenString(text, "<think>", "</think>")
# if r[:success]
# think = r[:text]
# end
# str = string(split(text, "</think>")[2])
# end
# if includethink == true && occursin("<think>", text)
# result = "ModelThought: $think $str"
# return result
# elseif includethink == false && occursin("<think>", text)
# result = str
# return result
# else
# return text
# end
# end
""" Attemp to correct LLM response's incorrect JSON response.
@@ -255,7 +423,20 @@ function jsoncorrection(config::T1, input::T2, correctJsonExample::T3;
end
function extractthink(text::String)
think = nothing
str = nothing
if occursin("<think>", text)
r = GeneralUtils.extractTextBetweenString(text, "<think>", "</think>")
if r[:success]
think = r[:text]
end
str = string(split(text, "</think>")[2])
else
str = text
end
return think, str
end

View File

@@ -1,14 +1,15 @@
module util
export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, replaceDictKeys,
findMatchingDictKey, textToDict, randstring, randstrings, timeout,
findMatchingDictKey, randstring, randstrings, timeout,
dataframeToCSV, dfToVectorDict, disintegrate_vectorDict, getDataFrameValue, dfRowtoString,
dfToString, dataframe_to_json_list, dictToString, dictToString_noKey,
dfToString, dataframe_to_json_list, dictToString, dictToString_noKey, issomething,
dictToString_numbering, extract_triple_backtick_text,
countGivenWords, remove_french_accents, detect_keyword, extractTextBetweenCharacter,
convertCamelSnakeKebabCase
countGivenWords, remove_french_accents,
extractTextBetweenCharacter, extractTextBetweenString,
convertCamelSnakeKebabCase, fitrange, recentElementsIndex, nonRecentElementsIndex
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, DataFrames
# ---------------------------------------------- 100 --------------------------------------------- #
@@ -42,6 +43,7 @@ function timedifference(starttime::DateTime, stoptime::DateTime, unit::String)::
diff = stoptime - starttime
unit = lowercase(unit)
# Check the unit and calculate the time difference accordingly
if unit == "milliseconds"
return diff.value
elseif unit == "seconds"
@@ -242,101 +244,6 @@ function replaceDictKeys(d::Dict, replacementMap::Dict)::Dict
end
""" Convert text into a dictionary with a given keywords. This function use keywords to slice
a given text into the following format: KW1|kw1_text|KW2|kw2_text|KW3|kw3_text.
The left most string which has no keyword will be discarded. WARNING, ordering is important
# Arguments
- `text::String`
A text to be converted.
- `keywords::Vector{String}`
A list of keywords to be used to slice the text.
These keywords also be the resulting dict keys.
# Keyword Arguments
- `rightmarker::String`
A maker used to make a word to be unique. Ex, A keyword "plan" with rightmarker ":",
the function will search for "plan:" otherwise the function will search for "plan".
The marker will not be in the resulting dict keys.
- `symbolkey::Bool`
If true, resulting dict's key will be Symbols, otherwise string.
- `lowercasekey::Bool`
set resulting dict's key to be lowercase
# Return
- `d::OrderedDict`
# Example
```jldoctest
julia> text = "TODAY thought: what to do plan: wake up and going out action: 1. wake up 2. eat 3. sleep"
julia> sample_keywords = ["thought", "plan", "action"]
julia> resultdict = GeneralUtils.textToDict(text, sample_keywords; rightmarker=":", symbolkey=true)
julia> println(resultdict)
OrderedCollections.OrderedDict{Any, Any}(:thought => "what to do",
:plan => "wake up and going out",
:action => "1. wake up 2. eat 3. sleep")
```
# Signature
"""
function textToDict(text::String, detectKeywords::Vector{String};
dictKey::Union{Vector{String}, Nothing}=nothing,
symbolkey::Bool=false, lowercasekey::Bool=false
)::OrderedDict
# make sure this function detect variation of a work e.g. agent, Agent, AGENT
kw = []
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
for keyword in detectKeywords
detected = detect_keyword(keyword, text)
if detected !== nothing
push!(kw, detected)
else
error("Keyword $keyword not found in text.")
end
end
od1, od2 =
if symbolkey
OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}()
else
OrderedDict{String, Any}(), OrderedDict{String, Any}()
end
remainingtext = text
dictKey_ = reverse(dictKey)
# process text from back to front
for (i,keyword) in enumerate(reverse(kw))
# Find the position of the keyword in the text
keywordidx = findlast(keyword, remainingtext)
dKey = dictKey_[i]
if keywordidx !== nothing
substr = remainingtext[keywordidx[end]+1:end]
str = string(strip(substr)) # Removes both leading and trailing whitespace.
_key = lowercasekey == true ? lowercase(dKey) : dKey
key = symbolkey == true ? Symbol(_key) : _key
od1[key] = str
remainingtext = remainingtext[1:keywordidx[1]-1]
else
error("""keyword "$keyword" not found in the provided text: $text </end of error note>""")
end
end
# correct the order
ks = reverse([i for i in keys(od1)])
for k in ks
k = symbolkey == true ? Symbol(k) : k
od2[k] = od1[k]
end
return od2
end
""" Generate a random string
# Arguments
@@ -770,48 +677,19 @@ function extract_triple_backtick_text(input::String)::Vector{String}
end
"""
detect_keyword(keyword::String, text::String) -> Union{Nothing, String}
wordwindow(word::String, startindex::Integer)::UnitRange = startindex:startindex + length(word) -1
Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
# Arguments:
- `keyword::String` The keyword to search for
- `text::String` The text to search in
# Returns:
- `Union{Nothing, String}` Returns the matched keyword variation if found, otherwise returns nothing
# Examples:
```jldoctest
julia> detect_keyword("test", "This is a Test case")
"Test"
julia> detect_keyword("error", "NO ERRORS FOUND")
"ERRORS"
julia> detect_keyword("missing", "complete data")
nothing
```
# Signature
"""
function detect_keyword(keyword::String, text::String)::Union{Nothing, String}
# Define the keyword variations to search for
keyword_variations = [keyword, uppercasefirst(keyword), uppercase(keyword), lowercase(keyword)]
# Check if any of the keyword variations are in the text
for variation in keyword_variations
if occursin(variation, text)
return variation
end
end
# Return nothing if no variation is found
function cuttext(range, text)
# check whether range is outside text boundary
if range.start > length(text) || range.stop > length(text)
return nothing
else
return text[range]
end
end
"""
countGivenWords(text::String, words::Vector{String}) -> Dict{String, Int}
@@ -924,9 +802,11 @@ Extracts and returns the text that is enclosed between two specified characters
# Examples
```jldoctest
julia> text = "Hello [World]!"
julia> text = "Hello [World]! [Yay]"
julia> extracted_text = extractTextBetweenCharacter(text, '[', ']')
println(extracted_text) # Output: "World"
2-element Vector{Any}:
"World"
"Yay"
```
"""
function extractTextBetweenCharacter(text::String, startchar::Char, endchar::Char)
@@ -950,6 +830,29 @@ function extractTextBetweenCharacter(text::String, startchar::Char, endchar::Cha
end
function extractTextBetweenString(text::String, startstr::String, endstr::String)
# check whether startstr is in the text or not
isStartStr = split(text, startstr)
if length(isStartStr) > 2
return (success=false, error="There are more than one occurrences of the start string '$startstr' in the text. Text must has only one start string", errorcode=2, result=nothing)
elseif length(isStartStr) == 1
return (success=false, error="There are no start string '$startstr' in the text. Text must has only one start string", errorcode=1, result=nothing)
end
# check whether endstr is in the text or not
isEndStr = split(text, endstr)
if length(isEndStr) > 2
return (success=false, error="There are more than one occurrences of the end string '$endstr' in the text. Text must has only one end string", errorcode=3, result=nothing)
elseif length(isStartStr) == 1
return (success=false, error="There are no end string '$endstr' in the text. Text must has only one end string", errorcode=4, result=nothing)
end
s = string(split(isStartStr[2], endstr)[1])
return (success=true, error=nothing, errorcode=0, text=s)
end
"""
Determines if the given string follows camel case naming convention.
@@ -1075,6 +978,140 @@ function convertCamelSnakeKebabCase(text::T, tocase::Symbol)::String where {T<:A
end
""" Check if a value is not `nothing`.
# Arguments
- `x`: The value to check
# Returns
- `Bool`: `true` if `x` is not `nothing`, `false` otherwise
# Examples
```jldoctest
julia> issomething(1)
true
julia> issomething(nothing)
false
julia> issomething("test")
true
````
"""
function issomething(x)
return x === nothing ? false : true
end
""" Adjust a given range to fit within the bounds of a vector's length.
# Arguments
- `v::T1`
the input vector to check against
- `range::UnitRange`
the original range to be adjusted
# Return
- `adjusted_range::UnitRange`
a range that is constrained to the vector's length, preventing out-of-bounds indexing
# Example
julia> v = [1, 2, 3, 4, 5]
julia> fitrange(v, 3:10)
3:5
"""
function fitrange(v::T1, range::UnitRange) where {T1<:AbstractVector}
totalelements = length(v)
startind =
# check if user put start range greater than total event
if range.start > totalelements
totalelements
else
range.start
end
stopind =
if range.stop > totalelements
totalelements
else
range.stop
end
return startind:stopind
end
""" Find a unit range for a vector given a number of the most recent elements of interest.
# Arguments
- `vectorLength::Integer`
the length of the vector to generate range from
- `n::Integer`
the number of most recent elements to include in range
# Return
- `UnitRange`
a range representing the n most recent elements of a vector with length vectorLength
# Example
```jldoctest
julia> a = [1, 2, 3, 4, 5]
julia> recentElementsIndex(length(a), 3)
3:5
julia> recentElementsIndex(length(a), 0)
5:5
```
"""
function recentElementsIndex(vectorlength::Integer, n::Integer; includelatest::Bool=false)
if n == 0
error("n must be greater than 0")
end
if includelatest
start = max(1, vectorlength - n + 1)
return start:vectorlength
else
startind = max(1, vectorlength - n)
endind = vectorlength -1
return startind:endind
end
end
""" Find a unit range for a vector excluding the most recent elements.
# Arguments
- `vectorlength::Integer`
the length of the vector to generate range from
- `n::Integer`
the number of most recent elements to exclude from range
# Return
- `UnitRange`
a range representing the elements of the vector excluding the last `n` elements
# Example
```jldoctest
julia> a = [1, 2, 3, 4, 5]
julia> nonRecentElementsIndex(length(a), 3)
1:2
julia> nonRecentElementsIndex(length(a), 1)
1:4
julia> nonRecentElementsIndex(length(a), 0)
1:5
```
"""
function nonRecentElementsIndex(vectorlength::Integer, n::Integer)
if n < 0
error("n must be non-negative")
end
if n > vectorlength
return 1:0 # empty range
end
return 1:(vectorlength-n)
end

3
test/.vscode/settings.json vendored Normal file
View File

@@ -0,0 +1,3 @@
{
"julia.environmentPath": "/appfolder/app/dev/GeneralUtils/test"
}

41
test/Manifest.toml Normal file
View File

@@ -0,0 +1,41 @@
# This file is machine-generated - editing it directly is not advised
julia_version = "1.11.4"
manifest_format = "2.0"
project_hash = "71d91126b5a1fb1020e1098d9d492de2a4438fd2"
[[deps.Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
version = "1.11.0"
[[deps.InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
version = "1.11.0"
[[deps.Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
version = "1.11.0"
[[deps.Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
version = "1.11.0"
[[deps.Random]]
deps = ["SHA"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
version = "1.11.0"
[[deps.SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
version = "0.7.0"
[[deps.Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
version = "1.11.0"
[[deps.Test]]
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
version = "1.11.0"

2
test/Project.toml Normal file
View File

@@ -0,0 +1,2 @@
[deps]
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

0
test/etc.jl Normal file
View File

View File

@@ -1,44 +0,0 @@
using Revise
using GeneralUtils, MQTTClient, JSON3
mqttMsgReceiveTopic = ["/receivetopic_1", "/receivetopic_2"]
mqttMsgReceiveChannel = (ch1=Channel(8), ch2=Channel(32))
keepaliveChannel = Channel(8)
function onMsgCallback(topic, payload)
jobj = JSON3.read(String(payload))
incomingMqttMsg = copy(jobj) # convert json object into julia dictionary recursively
if occursin("topic_1", topic)
put!(mqttMsgReceiveChannel[:ch1], incomingMqttMsg)
elseif occursin("topic_2", topic)
put!(mqttMsgReceiveChannel[:ch2], incomingMqttMsg)
elseif occursin("keepalive", topic)
put!(keepaliveChannel, incomingMqttMsg)
else
println("undefined condition ", @__FILE__, " ", @__LINE__)
end
end
mqttInstance = GeneralUtils.mqttClientInstance_v2(
"mqtt.yiem.cc",
mqttMsgReceiveTopic,
mqttMsgReceiveChannel,
keepaliveChannel,
onMsgCallback
)
_ = GeneralUtils.checkMqttConnection!(mqttInstance)
println("GeneralUtils test done")

39
test/runtests.jl Normal file
View File

@@ -0,0 +1,39 @@
using Test
using GeneralUtils: detect_keyword
@testset "detect_keyword tests" begin
@test detect_keyword(["test"], "this is a test") == Dict("test" => 1)
@test detect_keyword(["hello", "world"], "hello world hello") == Dict("hello" => 2, "world" => 1)
@test detect_keyword(["cat"], "category") == Dict("cat" => 1)
@test detect_keyword(["cat"], "category"; mode="individual") == Dict("cat" => 0)
@test detect_keyword(["dog"], "dogs and cats"; mode="individual", delimiter=[' ']) == Dict("dog" => 0)
@test detect_keyword(["test"], "test.case"; mode="individual", delimiter=['.']) == Dict("test" => 1)
@test detect_keyword(["word"], "") == Dict("word" => 0)
@test detect_keyword(String[], "some text") == Dict{String, Integer}()
@test detect_keyword(["a", "b"], "a.b\nc"; delimiter=['.', '\n']) == Dict("a" => 1, "b" => 1)
multiline_text = """
first line
second line
first word
"""
@test detect_keyword(["first"], multiline_text) == Dict("first" => 2)
@test detect_keyword(["word"], "word"; mode="individual") == Dict("word" => 1)
@test detect_keyword(["test"], "testing.test.tester"; mode="individual", delimiter=['.']) == Dict("test" => 1)
end