8 Commits

Author SHA1 Message Date
0e36b8db90 remove MQTT dependency 2025-08-01 06:04:17 +07:00
13fcf06503 update 2025-07-23 07:10:28 +07:00
narawat lamaiin
066d72553f update 2025-07-18 07:54:50 +07:00
narawat lamaiin
b3e8df7287 update 2025-07-17 11:48:16 +07:00
narawat lamaiin
c5f3fda2ba update 2025-07-14 13:49:04 +07:00
narawat lamaiin
adab61dca8 update 2025-07-14 08:54:46 +07:00
narawat lamaiin
09615a6909 mark new version 2025-06-10 10:49:11 +07:00
ton
92c5930e9a Merge pull request 'v0.3.0' (#6) from v0.3.0 into main
Reviewed-on: #6
2025-06-10 03:39:42 +00:00
9 changed files with 999 additions and 942 deletions

View File

@@ -1,8 +1,8 @@
# This file is machine-generated - editing it directly is not advised # This file is machine-generated - editing it directly is not advised
julia_version = "1.11.4" julia_version = "1.11.5"
manifest_format = "2.0" manifest_format = "2.0"
project_hash = "75c6a269a13b222c106479d2177b05facfa23f74" project_hash = "a942446c2f26ef72d0c4b0ca522e0adcf709ce4e"
[[deps.AliasTables]] [[deps.AliasTables]]
deps = ["PtrArrays", "Random"] deps = ["PtrArrays", "Random"]
@@ -22,12 +22,23 @@ version = "1.11.0"
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
version = "1.11.0" version = "1.11.0"
[[deps.BufferedStreams]]
git-tree-sha1 = "6863c5b7fc997eadcabdbaf6c5f201dc30032643"
uuid = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d"
version = "1.2.2"
[[deps.CSV]] [[deps.CSV]]
deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"] deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"]
git-tree-sha1 = "deddd8725e5e1cc49ee205a1964256043720a6c3" git-tree-sha1 = "deddd8725e5e1cc49ee205a1964256043720a6c3"
uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
version = "0.10.15" version = "0.10.15"
[[deps.CodecBase]]
deps = ["TranscodingStreams"]
git-tree-sha1 = "40956acdbef3d8c7cc38cba42b56034af8f8581a"
uuid = "6c391c72-fb7b-5838-ba82-7cfb1bcfecbf"
version = "0.3.4"
[[deps.CodecZlib]] [[deps.CodecZlib]]
deps = ["TranscodingStreams", "Zlib_jll"] deps = ["TranscodingStreams", "Zlib_jll"]
git-tree-sha1 = "bce6804e5e6044c6daab27bb533d1295e4a2e759" git-tree-sha1 = "bce6804e5e6044c6daab27bb533d1295e4a2e759"
@@ -81,11 +92,6 @@ deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
version = "1.11.0" version = "1.11.0"
[[deps.Distributed]]
deps = ["Random", "Serialization", "Sockets"]
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
version = "1.11.0"
[[deps.Distributions]] [[deps.Distributions]]
deps = ["AliasTables", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"] deps = ["AliasTables", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"]
git-tree-sha1 = "3101c32aab536e7a27b1763c0797dba151b899ad" git-tree-sha1 = "3101c32aab536e7a27b1763c0797dba151b899ad"
@@ -148,6 +154,11 @@ deps = ["Random"]
uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
version = "1.11.0" version = "1.11.0"
[[deps.HashArrayMappedTries]]
git-tree-sha1 = "2eaa69a7cab70a52b9687c8bf950a5a93ec895ae"
uuid = "076d061b-32b6-4027-95e0-9a2c6f6d7e74"
version = "0.2.0"
[[deps.HypergeometricFunctions]] [[deps.HypergeometricFunctions]]
deps = ["LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"] deps = ["LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"]
git-tree-sha1 = "b1c2585431c382e3fe5805874bda6aea90a95de9" git-tree-sha1 = "b1c2585431c382e3fe5805874bda6aea90a95de9"
@@ -264,21 +275,17 @@ version = "0.3.28"
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
version = "1.11.0" version = "1.11.0"
[[deps.MQTTClient]]
deps = ["Distributed", "Random", "Sockets"]
git-tree-sha1 = "f2597b290d4bf17b577346153cd2ddf9accb5c26"
uuid = "985f35cc-2c3d-4943-b8c1-f0931d5f0959"
version = "0.3.1"
weakdeps = ["PrecompileTools"]
[deps.MQTTClient.extensions]
PrecompileMQTT = "PrecompileTools"
[[deps.Markdown]] [[deps.Markdown]]
deps = ["Base64"] deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
version = "1.11.0" version = "1.11.0"
[[deps.MbedTLS]]
deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"]
git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf"
uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
version = "1.1.9"
[[deps.MbedTLS_jll]] [[deps.MbedTLS_jll]]
deps = ["Artifacts", "Libdl"] deps = ["Artifacts", "Libdl"]
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
@@ -298,6 +305,18 @@ version = "1.11.0"
uuid = "14a3606d-f60d-562e-9121-12d972cd8159" uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
version = "2023.12.12" version = "2023.12.12"
[[deps.NATS]]
deps = ["Base64", "BufferedStreams", "CodecBase", "Dates", "DocStringExtensions", "JSON3", "MbedTLS", "NanoDates", "Random", "ScopedValues", "Sockets", "Sodium", "StructTypes", "URIs"]
git-tree-sha1 = "d9d9a189fb9155a460e6b5e8966bf6a66737abf8"
uuid = "55e73f9c-eeeb-467f-b4cc-a633fde63d2a"
version = "0.1.0"
[[deps.NanoDates]]
deps = ["Dates", "Parsers"]
git-tree-sha1 = "850a0557ae5934f6e67ac0dc5ca13d0328422d1f"
uuid = "46f1a544-deae-4307-8689-c12aa3c955c6"
version = "1.0.3"
[[deps.NetworkOptions]] [[deps.NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
version = "1.2.0" version = "1.2.0"
@@ -310,7 +329,7 @@ version = "0.3.27+1"
[[deps.OpenLibm_jll]] [[deps.OpenLibm_jll]]
deps = ["Artifacts", "Libdl"] deps = ["Artifacts", "Libdl"]
uuid = "05823500-19ac-5b8b-9628-191a04bc5112" uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
version = "0.8.1+4" version = "0.8.5+0"
[[deps.OpenSpecFun_jll]] [[deps.OpenSpecFun_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
@@ -423,6 +442,12 @@ version = "0.5.1+0"
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
version = "0.7.0" version = "0.7.0"
[[deps.ScopedValues]]
deps = ["HashArrayMappedTries", "Logging"]
git-tree-sha1 = "1147f140b4c8ddab224c94efa9569fc23d63ab44"
uuid = "7e506255-f358-4e82-b7e4-beb19740aa63"
version = "1.3.0"
[[deps.SentinelArrays]] [[deps.SentinelArrays]]
deps = ["Dates", "Random"] deps = ["Dates", "Random"]
git-tree-sha1 = "d0553ce4031a081cc42387a9b9c8441b7d99f32d" git-tree-sha1 = "d0553ce4031a081cc42387a9b9c8441b7d99f32d"
@@ -437,6 +462,12 @@ version = "1.11.0"
uuid = "6462fe0b-24de-5631-8697-dd941f90decc" uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
version = "1.11.0" version = "1.11.0"
[[deps.Sodium]]
deps = ["Base64", "libsodium_jll"]
git-tree-sha1 = "907703e0d50846f300650d7225bdcab145b7bca9"
uuid = "4f5b5e99-b0ad-42cd-b47a-334e172ec8bd"
version = "1.1.2"
[[deps.SortingAlgorithms]] [[deps.SortingAlgorithms]]
deps = ["DataStructures"] deps = ["DataStructures"]
git-tree-sha1 = "66e0a8e672a0bdfca2c3f5937efb8538b9ddc085" git-tree-sha1 = "66e0a8e672a0bdfca2c3f5937efb8538b9ddc085"
@@ -544,6 +575,11 @@ git-tree-sha1 = "0c45878dcfdcfa8480052b6ab162cdd138781742"
uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
version = "0.11.3" version = "0.11.3"
[[deps.URIs]]
git-tree-sha1 = "24c1c558881564e2217dcf7840a8b2e10caeb0f9"
uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
version = "1.6.0"
[[deps.UUIDs]] [[deps.UUIDs]]
deps = ["Random", "SHA"] deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
@@ -574,6 +610,12 @@ deps = ["Artifacts", "Libdl"]
uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
version = "5.11.0+0" version = "5.11.0+0"
[[deps.libsodium_jll]]
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "f76d682d87eefadd3f165d8d9fda436464213142"
uuid = "a9144af2-ca23-56d9-984f-0d03f7b5ccf8"
version = "1.0.20+3"
[[deps.nghttp2_jll]] [[deps.nghttp2_jll]]
deps = ["Artifacts", "Libdl"] deps = ["Artifacts", "Libdl"]
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"

View File

@@ -1,7 +1,7 @@
name = "GeneralUtils" name = "GeneralUtils"
uuid = "c6c72f09-b708-4ac8-ac7c-2084d70108fe" uuid = "c6c72f09-b708-4ac8-ac7c-2084d70108fe"
authors = ["tonaerospace <tonaerospace.etc@gmail.com>"] authors = ["tonaerospace <tonaerospace.etc@gmail.com>"]
version = "0.3.0" version = "0.3.1"
[deps] [deps]
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
@@ -10,8 +10,11 @@ DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f" Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
MQTTClient = "985f35cc-2c3d-4943-b8c1-f0931d5f0959" NATS = "55e73f9c-eeeb-467f-b4cc-a633fde63d2a"
PrettyPrinting = "54e16d92-306c-5ea0-a30b-337be88ac337" PrettyPrinting = "54e16d92-306c-5ea0-a30b-337be88ac337"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce" SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[compat]
NATS = "0.1.0"

79
codesnippet/nats.jl Normal file
View File

@@ -0,0 +1,79 @@
using NATS, JSON3
connection = NATS.connect("nats.yiem.cc:4222")
sub1 = NATS.reply(connection, "some_subject"; queue_group="group1") do msg
payload = copy(JSON3.read(msg.payload))
println(payload)
println(msg.reply_to)
# publish(connection, msg.reply_to, "ACK")
return JSON3.write(Dict(:a=>"wassup"))
end
using NATS, JSON3, GeneralUtils
connection = NATS.connect("nats.yiem.cc:4222")
msgMeta = GeneralUtils.generate_msgMeta(
"text2textinstruct_medium.inference.api.v1";
msgPurpose= "inference",
senderName= "yiemagent",
senderId= GeneralUtils.uuid4snakecase(),
receiverName= "text2textinstruct",
)
llmHttpTimeout = 60
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> Dict(
:text=> "Wassup buddy!",
:kwargs=> Dict(
:max_tokens=> 2048,
:stop=> ["<|im_end|>"],
:temperature=> 0.2,
),
:llmHttpTimeout=>llmHttpTimeout,
)
)
r = NATS.request(String, connection, "text2textinstruct_medium.inference.api.v1",
JSON3.write(outgoingMsg); timer=Timer(llmHttpTimeout))
using NATS, JSON3, GeneralUtils
connection = NATS.connect("nats.yiem.cc:4222")
msgMeta = GeneralUtils.generate_msgMeta(
"tonpc.containerServices",
msgPurpose="reset container",
senderName= "",
)
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> "docker container restart ollama-instance-2",
)
# may be I can't use NATS request inside NATS reply??
r = NATS.request(String, connection, msgMeta[:sendTopic], JSON3.write(outgoingMsg); timer=Timer(10))

View File

@@ -2,7 +2,7 @@ module GeneralUtils
export # struct export # struct
mqttClientInstance, # mqttClientInstance,
# function # function
noNegative!, randomWithProb, randomChoiceWithProb, findIndex, limitvalue noNegative!, randomWithProb, randomChoiceWithProb, findIndex, limitvalue

File diff suppressed because it is too large Load Diff

View File

@@ -2,7 +2,7 @@ module dbUtil
export dictToPostgresKeyValueString, generateInsertSQL, generateUpdateSQL export dictToPostgresKeyValueString, generateInsertSQL, generateUpdateSQL
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames, using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, DataFrames,
SHA SHA
using ..util using ..util

View File

@@ -6,9 +6,9 @@ export noNegative!, randomWithProb, randomChoiceWithProb, findIndex, limitvalue,
matMul_3Dto4D_batchwise, isNotEqual, linearToCartesian, vectorMax, findMax, matMul_3Dto4D_batchwise, isNotEqual, linearToCartesian, vectorMax, findMax,
multiply_last, multiplyRandomElements, replaceElements, replaceElements!, isBetween, multiply_last, multiplyRandomElements, replaceElements, replaceElements!, isBetween,
isLess, allTrue, getStringBetweenCharacters, JSON3read_stringKey, mkDictPath!, isLess, allTrue, getStringBetweenCharacters, JSON3read_stringKey, mkDictPath!,
getDictPath getDictPath, detectKeywordVariation, textToDict
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames, CSV using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, DataFrames, CSV
using ..util, ..communication using ..util, ..communication
# ---------------------------------------------- 100 --------------------------------------------- # # ---------------------------------------------- 100 --------------------------------------------- #
@@ -1150,9 +1150,196 @@ end
"""
detectKeywordVariation(keywords::AbstractVector{String}, text::String) -> Dict{String, Union{Array, Nothing}}
Detects and collects all case-variant occurrences of multiple keywords in the text.
This function processes each keyword individually and returns an array of matched variations for each keyword.
# Arguments
- `keywords::AbstractVector{String}` Vector of keywords to search for
- `text::String` The text to search in
# Returns
- `Dict{String, Array}` Returns a dictionary mapping each keyword to an array of matched variations found in the text
# Examples
```jldoctest
julia> detectKeywordVariation(["test", "example", "cat"], "This is a Test EXAMPLE")
Dict{String, Array}("test" => ["Test"], "example" => ["EXAMPLE"], "cat" => nothing)
"""
function detectKeywordVariation(keywords::T, text::String)::Dict{String, Union{Array, Nothing}} where {T<:AbstractVector}
kw = Dict{String, Union{Array, Nothing}}()
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
for keyword in keywords
ws = detectKeywordVariation.(keyword, text)
total = sum(issomething.(ws))
if total != 0
kw[keyword] = ws
else
kw[keyword] = nothing
end
end
return kw
end
"""
detectKeywordVariation(keyword::String, text::String) -> Union{Nothing, Array{String}}
Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
# Arguments:
- `keyword::String` The keyword to search for
- `text::String` The text to search in
# Returns:
- `Union{Nothing, Array{String}}` Returns an array of matched keyword variations if found, otherwise returns nothing
# Examples:
```jldoctest
julia> detectKeywordVariation("test", "This is a Test case")
["Test"]
julia> detectKeywordVariation("error", "NO ERRORS FOUND")
["ERRORS"]
julia> detectKeywordVariation("missing", "complete data")
nothing
"""
function detectKeywordVariation(keyword::String, text::String)::Union{Nothing, Array{String}}
# Define the keyword variations to search for
wordVariations = [uppercasefirst(keyword), uppercase(keyword), lowercase(keyword)]
# wordVariations may duplicate keyword
keyword_variations = [keyword]
for i in wordVariations
i != keyword ? push!(keyword_variations, i) : nothing
end
_splittext = string.(strip.(split(text, " ")))
splittext = String[]
# remove . after a word
for i in _splittext
if length(i) != 0 && i[end] ['.']
word = string(i[1:end-1])
push!(splittext, word)
else
push!(splittext, i)
end
end
result = String[]
for variation in keyword_variations
# if length of both word is equals then it is a whole word otherwise it is part of part of other word
r = findIndex(splittext, variation)
if isempty(r[2])
# skip
else
# if variation > 1 add them all so this function detect duplicate keyword
variations = [variation for i in eachindex(r[2])]
result = vcat(result, variations)
end
end
return result
end
""" Convert text into a dictionary with a given keywords. This function use keywords to slice
a given text into the following format: KW1|kw1_text|KW2|kw2_text|KW3|kw3_text.
The left most string which has no keyword will be discarded. WARNING, ordering is important
# Arguments
- `text::String`
A text to be converted.
- `keywords::Vector{String}`
A list of keywords to be used to slice the text.
These keywords also be the resulting dict keys.
# Keyword Arguments
- `rightmarker::String`
A maker used to make a word to be unique. Ex, A keyword "plan" with rightmarker ":",
the function will search for "plan:" otherwise the function will search for "plan".
The marker will not be in the resulting dict keys.
- `symbolkey::Bool`
If true, resulting dict's key will be Symbols, otherwise string.
- `lowercasekey::Bool`
set resulting dict's key to be lowercase
# Return
- `d::OrderedDict`
# Example
```jldoctest
julia> text = "TODAY thought: what to do plan: wake up and going out action: 1. wake up 2. eat 3. sleep"
julia> sample_keywords = ["thought", "plan", "action"]
julia> resultdict = GeneralUtils.textToDict(text, sample_keywords; rightmarker=":", symbolkey=true)
julia> println(resultdict)
OrderedCollections.OrderedDict{Any, Any}(:thought => "what to do",
:plan => "wake up and going out",
:action => "1. wake up 2. eat 3. sleep")
```
# Signature
"""
function textToDict(text::String, detectKeywords::Vector{String};
dictKey::Union{Vector{String}, Nothing}=nothing,
symbolkey::Bool=false, lowercasekey::Bool=false
)::OrderedDict
# make sure this function detect variation of a work e.g. agent, Agent, AGENT
kw = []
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
for keyword in detectKeywords
detected = detectKeywordVariation(keyword, text)
if detected !== nothing
push!(kw, detected)
else
error("Keyword $keyword not found in text: $text")
end
end
if typeof(kw[1]) <: AbstractArray
kw = reduce(vcat, kw)
end
od1, od2 =
if symbolkey
OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}()
else
OrderedDict{String, Any}(), OrderedDict{String, Any}()
end
remainingtext = text
dictKey_ = reverse(dictKey)
# process text from back to front
rkw = reverse(kw)
for (i,keyword) in enumerate(rkw)
# Find the position of the keyword in the text
keywordidx = findlast(keyword, remainingtext)
dKey = dictKey_[i]
if keywordidx !== nothing
substr = remainingtext[keywordidx[end]+1:end]
str = string(strip(substr)) # Removes both leading and trailing whitespace.
_key = lowercasekey == true ? lowercase(dKey) : dKey
key = symbolkey == true ? Symbol(_key) : _key
od1[key] = str
remainingtext = remainingtext[1:keywordidx[1]-1]
else
error("""keyword "$keyword" not found in the provided text: $text </end of error note>""")
end
end
# correct the order
ks = reverse([i for i in keys(od1)])
for k in ks
k = symbolkey == true ? Symbol(k) : k
od2[k] = od1[k]
end
return od2
end

View File

@@ -1,15 +1,15 @@
module util module util
export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, replaceDictKeys, export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, replaceDictKeys,
findMatchingDictKey, textToDict, randstring, randstrings, timeout, findMatchingDictKey, randstring, randstrings, timeout,
dataframeToCSV, dfToVectorDict, disintegrate_vectorDict, getDataFrameValue, dfRowtoString, dataframeToCSV, dfToVectorDict, disintegrate_vectorDict, getDataFrameValue, dfRowtoString,
dfToString, dataframe_to_json_list, dictToString, dictToString_noKey, dfToString, dataframe_to_json_list, dictToString, dictToString_noKey, issomething,
dictToString_numbering, extract_triple_backtick_text, dictToString_numbering, extract_triple_backtick_text,
countGivenWords, remove_french_accents, detect_keyword, extractTextBetweenCharacter, countGivenWords, remove_french_accents,
extractTextBetweenString, extractTextBetweenCharacter, extractTextBetweenString,
convertCamelSnakeKebabCase, fitrange, recentElementsIndex, nonRecentElementsIndex convertCamelSnakeKebabCase, fitrange, recentElementsIndex, nonRecentElementsIndex
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, DataFrames
# ---------------------------------------------- 100 --------------------------------------------- # # ---------------------------------------------- 100 --------------------------------------------- #
@@ -244,102 +244,6 @@ function replaceDictKeys(d::Dict, replacementMap::Dict)::Dict
end end
""" Convert text into a dictionary with a given keywords. This function use keywords to slice
a given text into the following format: KW1|kw1_text|KW2|kw2_text|KW3|kw3_text.
The left most string which has no keyword will be discarded. WARNING, ordering is important
# Arguments
- `text::String`
A text to be converted.
- `keywords::Vector{String}`
A list of keywords to be used to slice the text.
These keywords also be the resulting dict keys.
# Keyword Arguments
- `rightmarker::String`
A maker used to make a word to be unique. Ex, A keyword "plan" with rightmarker ":",
the function will search for "plan:" otherwise the function will search for "plan".
The marker will not be in the resulting dict keys.
- `symbolkey::Bool`
If true, resulting dict's key will be Symbols, otherwise string.
- `lowercasekey::Bool`
set resulting dict's key to be lowercase
# Return
- `d::OrderedDict`
# Example
```jldoctest
julia> text = "TODAY thought: what to do plan: wake up and going out action: 1. wake up 2. eat 3. sleep"
julia> sample_keywords = ["thought", "plan", "action"]
julia> resultdict = GeneralUtils.textToDict(text, sample_keywords; rightmarker=":", symbolkey=true)
julia> println(resultdict)
OrderedCollections.OrderedDict{Any, Any}(:thought => "what to do",
:plan => "wake up and going out",
:action => "1. wake up 2. eat 3. sleep")
```
# Signature
"""
function textToDict(text::String, detectKeywords::Vector{String};
dictKey::Union{Vector{String}, Nothing}=nothing,
symbolkey::Bool=false, lowercasekey::Bool=false
)::OrderedDict
# make sure this function detect variation of a work e.g. agent, Agent, AGENT
kw = []
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
for keyword in detectKeywords
detected = detect_keyword(keyword, text)
if detected !== nothing
push!(kw, detected)
else
error("Keyword $keyword not found in text.")
end
end
od1, od2 =
if symbolkey
OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}()
else
OrderedDict{String, Any}(), OrderedDict{String, Any}()
end
remainingtext = text
dictKey_ = reverse(dictKey)
# process text from back to front
rkw = reverse(kw)
for (i,keyword) in enumerate(rkw)
# Find the position of the keyword in the text
keywordidx = findlast(keyword, remainingtext)
dKey = dictKey_[i]
if keywordidx !== nothing
substr = remainingtext[keywordidx[end]+1:end]
str = string(strip(substr)) # Removes both leading and trailing whitespace.
_key = lowercasekey == true ? lowercase(dKey) : dKey
key = symbolkey == true ? Symbol(_key) : _key
od1[key] = str
remainingtext = remainingtext[1:keywordidx[1]-1]
else
error("""keyword "$keyword" not found in the provided text: $text </end of error note>""")
end
end
# correct the order
ks = reverse([i for i in keys(od1)])
for k in ks
k = symbolkey == true ? Symbol(k) : k
od2[k] = od1[k]
end
return od2
end
""" Generate a random string """ Generate a random string
# Arguments # Arguments
@@ -784,152 +688,6 @@ function cuttext(range, text)
end end
end end
"""
detect_keyword(keywords::AbstractVector{String}, text::String; mode::Union{String, Nothing}=nothing, delimiter::AbstractVector=[' ', '\n', '.']) -> Dict{String, Integer}
Detects and counts occurrences of multiple keywords in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
# Arguments
- `keywords::AbstractVector{String}` Vector of keywords to search for
- `text::String` The text to search in
# Keyword Arguments
- `mode::Union{String, Nothing}` When set to "individual", only counts matches that are individual words (default: nothing)
- `delimiter::AbstractVector` Characters used to determine word boundaries when mode="individual" (default: [' ', '\n', '.'])
# Returns
- `Dict{String, Integer}` Returns a dictionary mapping each keyword to its count in the text (0 if not found)
# Examples
```jldoctest
julia> detect_keyword(["test", "example"], "This is a Test EXAMPLE")
Dict{String, Integer}("test" => 1, "example" => 1)
julia> detect_keyword(["cat"], "cats and category", mode="individual")
Dict{String, Integer}("cat" => 0)
julia> detect_keyword(["error"], "No ERRORS found!")
Dict{String, Integer}("error" => 1)
```
# Signature
"""
# function detect_keyword(keywords::T1, text::String;
# mode::Union{String, Nothing}=nothing, delimiter::T2=[' ', '\n', '.']
# )::Dict{String, Integer} where {T1<:AbstractVector, T2<:AbstractVector}
# # Initialize dictionary to store keyword counts
# kwdict = Dict{String, Integer}()
# for i in keywords
# kwdict[i] = 0
# end
# startindex = 1
# # Iterate through each keyword and search for matches in text
# for kw in keywords
# # Check each possible starting position in the text
# for startindex in 1:1:length(text)
# # Get the window range for current keyword at current position
# wordwindows = wordwindow(kw, startindex)
# # Extract the text slice for comparison
# cuttexts = cuttext(wordwindows, text)
# if cuttexts !== nothing
# # Try to detect keyword in current text slice
# detected_kw = detect_keyword(kw, cuttexts)
# if detected_kw !== nothing && mode === nothing
# # Increment count if keyword found and no mode restrictions
# kwdict[kw] +=1
# elseif detected_kw !== nothing && mode === "individual"
# # For individual word mode, check word boundaries
# # Check if character before keyword is a delimiter or start of text
# checkbefore =
# if wordwindows.start > 1 &&
# text[wordwindows.start-1] ∈ delimiter
# true
# elseif wordwindows.start == 1
# true
# else
# false
# end
# # Check if character after keyword is a delimiter or end of text
# checkafter =
# if wordwindows.stop < length(text) &&
# text[wordwindows.stop+1] ∈ delimiter
# true
# elseif wordwindows.stop == length(text)
# true
# else
# false
# end
# # Only count keyword if it's a complete word
# if checkbefore && checkafter
# kwdict[kw] +=1
# end
# end
# end
# end
# end
# return kwdict
# end
function detect_keyword(keywords::T, text::String)::Dict{String, Integer} where {T<:AbstractVector}
kw = Dict{String, Integer}()
splittext = string.(split(text, " "))
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
for keyword in keywords
ws = detect_keyword.(keyword, splittext)
total = sum(issomething.(ws))
if total != 0
kw[keyword] = total
else
kw[keyword] = 0
end
end
return kw
end
"""
detect_keyword(keyword::String, text::String) -> Union{Nothing, String}
Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
# Arguments:
- `keyword::String` The keyword to search for
- `text::String` The text to search in
# Returns:
- `Union{Nothing, String}` Returns the matched keyword variation if found, otherwise returns nothing
# Examples:
```jldoctest
julia> detect_keyword("test", "This is a Test case")
"Test"
julia> detect_keyword("error", "NO ERRORS FOUND")
"ERRORS"
julia> detect_keyword("missing", "complete data")
nothing
```
# Signature
"""
function detect_keyword(keyword::String, text::String)::Union{Nothing, String}
# Define the keyword variations to search for
keyword_variations = [keyword, uppercasefirst(keyword), uppercase(keyword), lowercase(keyword)]
# Check if any of the keyword variations are in the text
for variation in keyword_variations
if occursin(variation, text)
return variation
end
end
# Return nothing if no variation is found
return nothing
end
""" """

View File

@@ -1,7 +0,0 @@
python -> pandas -> dataframe -> csv
julia -> DataFrames -> dataframe -> csv
dict -> dataframe -> csv