Compare commits
13 Commits
v0.2.4
...
0e36b8db90
| Author | SHA1 | Date | |
|---|---|---|---|
| 0e36b8db90 | |||
| 13fcf06503 | |||
|
|
066d72553f | ||
|
|
b3e8df7287 | ||
|
|
c5f3fda2ba | ||
|
|
adab61dca8 | ||
|
|
09615a6909 | ||
|
|
92c5930e9a | ||
|
|
5b4c1c1471 | ||
|
|
fc3edd7b8f | ||
|
|
93aa0ee1ac | ||
|
|
42378714a0 | ||
|
|
759f022c98 |
@@ -1,8 +1,8 @@
|
||||
# This file is machine-generated - editing it directly is not advised
|
||||
|
||||
julia_version = "1.11.4"
|
||||
julia_version = "1.11.5"
|
||||
manifest_format = "2.0"
|
||||
project_hash = "75c6a269a13b222c106479d2177b05facfa23f74"
|
||||
project_hash = "a942446c2f26ef72d0c4b0ca522e0adcf709ce4e"
|
||||
|
||||
[[deps.AliasTables]]
|
||||
deps = ["PtrArrays", "Random"]
|
||||
@@ -22,12 +22,23 @@ version = "1.11.0"
|
||||
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
|
||||
version = "1.11.0"
|
||||
|
||||
[[deps.BufferedStreams]]
|
||||
git-tree-sha1 = "6863c5b7fc997eadcabdbaf6c5f201dc30032643"
|
||||
uuid = "e1450e63-4bb3-523b-b2a4-4ffa8c0fd77d"
|
||||
version = "1.2.2"
|
||||
|
||||
[[deps.CSV]]
|
||||
deps = ["CodecZlib", "Dates", "FilePathsBase", "InlineStrings", "Mmap", "Parsers", "PooledArrays", "PrecompileTools", "SentinelArrays", "Tables", "Unicode", "WeakRefStrings", "WorkerUtilities"]
|
||||
git-tree-sha1 = "deddd8725e5e1cc49ee205a1964256043720a6c3"
|
||||
uuid = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
|
||||
version = "0.10.15"
|
||||
|
||||
[[deps.CodecBase]]
|
||||
deps = ["TranscodingStreams"]
|
||||
git-tree-sha1 = "40956acdbef3d8c7cc38cba42b56034af8f8581a"
|
||||
uuid = "6c391c72-fb7b-5838-ba82-7cfb1bcfecbf"
|
||||
version = "0.3.4"
|
||||
|
||||
[[deps.CodecZlib]]
|
||||
deps = ["TranscodingStreams", "Zlib_jll"]
|
||||
git-tree-sha1 = "bce6804e5e6044c6daab27bb533d1295e4a2e759"
|
||||
@@ -81,11 +92,6 @@ deps = ["Printf"]
|
||||
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
|
||||
version = "1.11.0"
|
||||
|
||||
[[deps.Distributed]]
|
||||
deps = ["Random", "Serialization", "Sockets"]
|
||||
uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b"
|
||||
version = "1.11.0"
|
||||
|
||||
[[deps.Distributions]]
|
||||
deps = ["AliasTables", "FillArrays", "LinearAlgebra", "PDMats", "Printf", "QuadGK", "Random", "SpecialFunctions", "Statistics", "StatsAPI", "StatsBase", "StatsFuns"]
|
||||
git-tree-sha1 = "3101c32aab536e7a27b1763c0797dba151b899ad"
|
||||
@@ -148,6 +154,11 @@ deps = ["Random"]
|
||||
uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
|
||||
version = "1.11.0"
|
||||
|
||||
[[deps.HashArrayMappedTries]]
|
||||
git-tree-sha1 = "2eaa69a7cab70a52b9687c8bf950a5a93ec895ae"
|
||||
uuid = "076d061b-32b6-4027-95e0-9a2c6f6d7e74"
|
||||
version = "0.2.0"
|
||||
|
||||
[[deps.HypergeometricFunctions]]
|
||||
deps = ["LinearAlgebra", "OpenLibm_jll", "SpecialFunctions"]
|
||||
git-tree-sha1 = "b1c2585431c382e3fe5805874bda6aea90a95de9"
|
||||
@@ -264,21 +275,17 @@ version = "0.3.28"
|
||||
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
|
||||
version = "1.11.0"
|
||||
|
||||
[[deps.MQTTClient]]
|
||||
deps = ["Distributed", "Random", "Sockets"]
|
||||
git-tree-sha1 = "f2597b290d4bf17b577346153cd2ddf9accb5c26"
|
||||
uuid = "985f35cc-2c3d-4943-b8c1-f0931d5f0959"
|
||||
version = "0.3.1"
|
||||
weakdeps = ["PrecompileTools"]
|
||||
|
||||
[deps.MQTTClient.extensions]
|
||||
PrecompileMQTT = "PrecompileTools"
|
||||
|
||||
[[deps.Markdown]]
|
||||
deps = ["Base64"]
|
||||
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
|
||||
version = "1.11.0"
|
||||
|
||||
[[deps.MbedTLS]]
|
||||
deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"]
|
||||
git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf"
|
||||
uuid = "739be429-bea8-5141-9913-cc70e7f3736d"
|
||||
version = "1.1.9"
|
||||
|
||||
[[deps.MbedTLS_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
|
||||
@@ -298,6 +305,18 @@ version = "1.11.0"
|
||||
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
|
||||
version = "2023.12.12"
|
||||
|
||||
[[deps.NATS]]
|
||||
deps = ["Base64", "BufferedStreams", "CodecBase", "Dates", "DocStringExtensions", "JSON3", "MbedTLS", "NanoDates", "Random", "ScopedValues", "Sockets", "Sodium", "StructTypes", "URIs"]
|
||||
git-tree-sha1 = "d9d9a189fb9155a460e6b5e8966bf6a66737abf8"
|
||||
uuid = "55e73f9c-eeeb-467f-b4cc-a633fde63d2a"
|
||||
version = "0.1.0"
|
||||
|
||||
[[deps.NanoDates]]
|
||||
deps = ["Dates", "Parsers"]
|
||||
git-tree-sha1 = "850a0557ae5934f6e67ac0dc5ca13d0328422d1f"
|
||||
uuid = "46f1a544-deae-4307-8689-c12aa3c955c6"
|
||||
version = "1.0.3"
|
||||
|
||||
[[deps.NetworkOptions]]
|
||||
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
|
||||
version = "1.2.0"
|
||||
@@ -310,7 +329,7 @@ version = "0.3.27+1"
|
||||
[[deps.OpenLibm_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
|
||||
version = "0.8.1+4"
|
||||
version = "0.8.5+0"
|
||||
|
||||
[[deps.OpenSpecFun_jll]]
|
||||
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
|
||||
@@ -423,6 +442,12 @@ version = "0.5.1+0"
|
||||
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
|
||||
version = "0.7.0"
|
||||
|
||||
[[deps.ScopedValues]]
|
||||
deps = ["HashArrayMappedTries", "Logging"]
|
||||
git-tree-sha1 = "1147f140b4c8ddab224c94efa9569fc23d63ab44"
|
||||
uuid = "7e506255-f358-4e82-b7e4-beb19740aa63"
|
||||
version = "1.3.0"
|
||||
|
||||
[[deps.SentinelArrays]]
|
||||
deps = ["Dates", "Random"]
|
||||
git-tree-sha1 = "d0553ce4031a081cc42387a9b9c8441b7d99f32d"
|
||||
@@ -437,6 +462,12 @@ version = "1.11.0"
|
||||
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
|
||||
version = "1.11.0"
|
||||
|
||||
[[deps.Sodium]]
|
||||
deps = ["Base64", "libsodium_jll"]
|
||||
git-tree-sha1 = "907703e0d50846f300650d7225bdcab145b7bca9"
|
||||
uuid = "4f5b5e99-b0ad-42cd-b47a-334e172ec8bd"
|
||||
version = "1.1.2"
|
||||
|
||||
[[deps.SortingAlgorithms]]
|
||||
deps = ["DataStructures"]
|
||||
git-tree-sha1 = "66e0a8e672a0bdfca2c3f5937efb8538b9ddc085"
|
||||
@@ -544,6 +575,11 @@ git-tree-sha1 = "0c45878dcfdcfa8480052b6ab162cdd138781742"
|
||||
uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa"
|
||||
version = "0.11.3"
|
||||
|
||||
[[deps.URIs]]
|
||||
git-tree-sha1 = "24c1c558881564e2217dcf7840a8b2e10caeb0f9"
|
||||
uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4"
|
||||
version = "1.6.0"
|
||||
|
||||
[[deps.UUIDs]]
|
||||
deps = ["Random", "SHA"]
|
||||
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
|
||||
@@ -574,6 +610,12 @@ deps = ["Artifacts", "Libdl"]
|
||||
uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
|
||||
version = "5.11.0+0"
|
||||
|
||||
[[deps.libsodium_jll]]
|
||||
deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"]
|
||||
git-tree-sha1 = "f76d682d87eefadd3f165d8d9fda436464213142"
|
||||
uuid = "a9144af2-ca23-56d9-984f-0d03f7b5ccf8"
|
||||
version = "1.0.20+3"
|
||||
|
||||
[[deps.nghttp2_jll]]
|
||||
deps = ["Artifacts", "Libdl"]
|
||||
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
name = "GeneralUtils"
|
||||
uuid = "c6c72f09-b708-4ac8-ac7c-2084d70108fe"
|
||||
authors = ["tonaerospace <tonaerospace.etc@gmail.com>"]
|
||||
version = "0.2.4"
|
||||
version = "0.3.1"
|
||||
|
||||
[deps]
|
||||
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
|
||||
@@ -10,8 +10,11 @@ DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
|
||||
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
|
||||
Distributions = "31c24e10-a181-5473-b8eb-7969acd0382f"
|
||||
JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1"
|
||||
MQTTClient = "985f35cc-2c3d-4943-b8c1-f0931d5f0959"
|
||||
NATS = "55e73f9c-eeeb-467f-b4cc-a633fde63d2a"
|
||||
PrettyPrinting = "54e16d92-306c-5ea0-a30b-337be88ac337"
|
||||
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
|
||||
SHA = "ea8e919c-243c-51af-8825-aaa63cd721ce"
|
||||
UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
|
||||
|
||||
[compat]
|
||||
NATS = "0.1.0"
|
||||
|
||||
79
codesnippet/nats.jl
Normal file
79
codesnippet/nats.jl
Normal file
@@ -0,0 +1,79 @@
|
||||
|
||||
using NATS, JSON3
|
||||
connection = NATS.connect("nats.yiem.cc:4222")
|
||||
sub1 = NATS.reply(connection, "some_subject"; queue_group="group1") do msg
|
||||
payload = copy(JSON3.read(msg.payload))
|
||||
println(payload)
|
||||
println(msg.reply_to)
|
||||
# publish(connection, msg.reply_to, "ACK")
|
||||
return JSON3.write(Dict(:a=>"wassup"))
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
using NATS, JSON3, GeneralUtils
|
||||
connection = NATS.connect("nats.yiem.cc:4222")
|
||||
|
||||
msgMeta = GeneralUtils.generate_msgMeta(
|
||||
"text2textinstruct_medium.inference.api.v1";
|
||||
msgPurpose= "inference",
|
||||
senderName= "yiemagent",
|
||||
senderId= GeneralUtils.uuid4snakecase(),
|
||||
receiverName= "text2textinstruct",
|
||||
)
|
||||
|
||||
llmHttpTimeout = 60
|
||||
outgoingMsg = Dict(
|
||||
:msgMeta=> msgMeta,
|
||||
:payload=> Dict(
|
||||
:text=> "Wassup buddy!",
|
||||
:kwargs=> Dict(
|
||||
:max_tokens=> 2048,
|
||||
:stop=> ["<|im_end|>"],
|
||||
:temperature=> 0.2,
|
||||
),
|
||||
:llmHttpTimeout=>llmHttpTimeout,
|
||||
)
|
||||
)
|
||||
|
||||
r = NATS.request(String, connection, "text2textinstruct_medium.inference.api.v1",
|
||||
JSON3.write(outgoingMsg); timer=Timer(llmHttpTimeout))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
using NATS, JSON3, GeneralUtils
|
||||
connection = NATS.connect("nats.yiem.cc:4222")
|
||||
|
||||
msgMeta = GeneralUtils.generate_msgMeta(
|
||||
"tonpc.containerServices",
|
||||
msgPurpose="reset container",
|
||||
senderName= "",
|
||||
)
|
||||
|
||||
outgoingMsg = Dict(
|
||||
:msgMeta=> msgMeta,
|
||||
:payload=> "docker container restart ollama-instance-2",
|
||||
)
|
||||
|
||||
# may be I can't use NATS request inside NATS reply??
|
||||
r = NATS.request(String, connection, msgMeta[:sendTopic], JSON3.write(outgoingMsg); timer=Timer(10))
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ module GeneralUtils
|
||||
|
||||
|
||||
export # struct
|
||||
mqttClientInstance,
|
||||
# mqttClientInstance,
|
||||
# function
|
||||
noNegative!, randomWithProb, randomChoiceWithProb, findIndex, limitvalue
|
||||
|
||||
|
||||
1319
src/communication.jl
1319
src/communication.jl
File diff suppressed because it is too large
Load Diff
@@ -2,7 +2,7 @@ module dbUtil
|
||||
|
||||
export dictToPostgresKeyValueString, generateInsertSQL, generateUpdateSQL
|
||||
|
||||
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames,
|
||||
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, DataFrames,
|
||||
SHA
|
||||
using ..util
|
||||
|
||||
|
||||
191
src/interface.jl
191
src/interface.jl
@@ -6,9 +6,9 @@ export noNegative!, randomWithProb, randomChoiceWithProb, findIndex, limitvalue,
|
||||
matMul_3Dto4D_batchwise, isNotEqual, linearToCartesian, vectorMax, findMax,
|
||||
multiply_last, multiplyRandomElements, replaceElements, replaceElements!, isBetween,
|
||||
isLess, allTrue, getStringBetweenCharacters, JSON3read_stringKey, mkDictPath!,
|
||||
getDictPath
|
||||
getDictPath, detectKeywordVariation, textToDict
|
||||
|
||||
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames, CSV
|
||||
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, DataFrames, CSV
|
||||
using ..util, ..communication
|
||||
|
||||
# ---------------------------------------------- 100 --------------------------------------------- #
|
||||
@@ -1150,9 +1150,196 @@ end
|
||||
|
||||
|
||||
|
||||
"""
|
||||
detectKeywordVariation(keywords::AbstractVector{String}, text::String) -> Dict{String, Union{Array, Nothing}}
|
||||
|
||||
Detects and collects all case-variant occurrences of multiple keywords in the text.
|
||||
This function processes each keyword individually and returns an array of matched variations for each keyword.
|
||||
|
||||
# Arguments
|
||||
- `keywords::AbstractVector{String}` Vector of keywords to search for
|
||||
- `text::String` The text to search in
|
||||
|
||||
# Returns
|
||||
- `Dict{String, Array}` Returns a dictionary mapping each keyword to an array of matched variations found in the text
|
||||
|
||||
# Examples
|
||||
```jldoctest
|
||||
julia> detectKeywordVariation(["test", "example", "cat"], "This is a Test EXAMPLE")
|
||||
Dict{String, Array}("test" => ["Test"], "example" => ["EXAMPLE"], "cat" => nothing)
|
||||
"""
|
||||
function detectKeywordVariation(keywords::T, text::String)::Dict{String, Union{Array, Nothing}} where {T<:AbstractVector}
|
||||
kw = Dict{String, Union{Array, Nothing}}()
|
||||
|
||||
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
|
||||
for keyword in keywords
|
||||
ws = detectKeywordVariation.(keyword, text)
|
||||
total = sum(issomething.(ws))
|
||||
if total != 0
|
||||
kw[keyword] = ws
|
||||
else
|
||||
kw[keyword] = nothing
|
||||
end
|
||||
end
|
||||
return kw
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
detectKeywordVariation(keyword::String, text::String) -> Union{Nothing, Array{String}}
|
||||
|
||||
Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
|
||||
|
||||
# Arguments:
|
||||
- `keyword::String` The keyword to search for
|
||||
- `text::String` The text to search in
|
||||
|
||||
# Returns:
|
||||
- `Union{Nothing, Array{String}}` Returns an array of matched keyword variations if found, otherwise returns nothing
|
||||
|
||||
# Examples:
|
||||
```jldoctest
|
||||
julia> detectKeywordVariation("test", "This is a Test case")
|
||||
["Test"]
|
||||
|
||||
julia> detectKeywordVariation("error", "NO ERRORS FOUND")
|
||||
["ERRORS"]
|
||||
|
||||
julia> detectKeywordVariation("missing", "complete data")
|
||||
nothing
|
||||
"""
|
||||
function detectKeywordVariation(keyword::String, text::String)::Union{Nothing, Array{String}}
|
||||
# Define the keyword variations to search for
|
||||
wordVariations = [uppercasefirst(keyword), uppercase(keyword), lowercase(keyword)]
|
||||
# wordVariations may duplicate keyword
|
||||
keyword_variations = [keyword]
|
||||
for i in wordVariations
|
||||
i != keyword ? push!(keyword_variations, i) : nothing
|
||||
end
|
||||
|
||||
_splittext = string.(strip.(split(text, " ")))
|
||||
splittext = String[]
|
||||
# remove . after a word
|
||||
for i in _splittext
|
||||
if length(i) != 0 && i[end] ∈ ['.']
|
||||
word = string(i[1:end-1])
|
||||
push!(splittext, word)
|
||||
else
|
||||
push!(splittext, i)
|
||||
end
|
||||
end
|
||||
|
||||
result = String[]
|
||||
for variation in keyword_variations
|
||||
# if length of both word is equals then it is a whole word otherwise it is part of part of other word
|
||||
r = findIndex(splittext, variation)
|
||||
|
||||
if isempty(r[2])
|
||||
# skip
|
||||
else
|
||||
# if variation > 1 add them all so this function detect duplicate keyword
|
||||
variations = [variation for i in eachindex(r[2])]
|
||||
result = vcat(result, variations)
|
||||
end
|
||||
end
|
||||
return result
|
||||
end
|
||||
|
||||
|
||||
""" Convert text into a dictionary with a given keywords. This function use keywords to slice
|
||||
a given text into the following format: KW1|kw1_text|KW2|kw2_text|KW3|kw3_text.
|
||||
The left most string which has no keyword will be discarded. WARNING, ordering is important
|
||||
|
||||
# Arguments
|
||||
- `text::String`
|
||||
A text to be converted.
|
||||
- `keywords::Vector{String}`
|
||||
A list of keywords to be used to slice the text.
|
||||
These keywords also be the resulting dict keys.
|
||||
# Keyword Arguments
|
||||
- `rightmarker::String`
|
||||
A maker used to make a word to be unique. Ex, A keyword "plan" with rightmarker ":",
|
||||
the function will search for "plan:" otherwise the function will search for "plan".
|
||||
The marker will not be in the resulting dict keys.
|
||||
- `symbolkey::Bool`
|
||||
If true, resulting dict's key will be Symbols, otherwise string.
|
||||
- `lowercasekey::Bool`
|
||||
set resulting dict's key to be lowercase
|
||||
|
||||
# Return
|
||||
- `d::OrderedDict`
|
||||
|
||||
# Example
|
||||
```jldoctest
|
||||
julia> text = "TODAY thought: what to do plan: wake up and going out action: 1. wake up 2. eat 3. sleep"
|
||||
julia> sample_keywords = ["thought", "plan", "action"]
|
||||
julia> resultdict = GeneralUtils.textToDict(text, sample_keywords; rightmarker=":", symbolkey=true)
|
||||
julia> println(resultdict)
|
||||
OrderedCollections.OrderedDict{Any, Any}(:thought => "what to do",
|
||||
:plan => "wake up and going out",
|
||||
:action => "1. wake up 2. eat 3. sleep")
|
||||
```
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function textToDict(text::String, detectKeywords::Vector{String};
|
||||
dictKey::Union{Vector{String}, Nothing}=nothing,
|
||||
symbolkey::Bool=false, lowercasekey::Bool=false
|
||||
)::OrderedDict
|
||||
|
||||
# make sure this function detect variation of a work e.g. agent, Agent, AGENT
|
||||
kw = []
|
||||
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
|
||||
for keyword in detectKeywords
|
||||
detected = detectKeywordVariation(keyword, text)
|
||||
if detected !== nothing
|
||||
push!(kw, detected)
|
||||
else
|
||||
error("Keyword $keyword not found in text: $text")
|
||||
end
|
||||
end
|
||||
if typeof(kw[1]) <: AbstractArray
|
||||
kw = reduce(vcat, kw)
|
||||
end
|
||||
|
||||
od1, od2 =
|
||||
if symbolkey
|
||||
OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}()
|
||||
else
|
||||
OrderedDict{String, Any}(), OrderedDict{String, Any}()
|
||||
end
|
||||
|
||||
remainingtext = text
|
||||
dictKey_ = reverse(dictKey)
|
||||
|
||||
# process text from back to front
|
||||
rkw = reverse(kw)
|
||||
for (i,keyword) in enumerate(rkw)
|
||||
# Find the position of the keyword in the text
|
||||
keywordidx = findlast(keyword, remainingtext)
|
||||
dKey = dictKey_[i]
|
||||
|
||||
if keywordidx !== nothing
|
||||
substr = remainingtext[keywordidx[end]+1:end]
|
||||
str = string(strip(substr)) # Removes both leading and trailing whitespace.
|
||||
_key = lowercasekey == true ? lowercase(dKey) : dKey
|
||||
key = symbolkey == true ? Symbol(_key) : _key
|
||||
od1[key] = str
|
||||
remainingtext = remainingtext[1:keywordidx[1]-1]
|
||||
else
|
||||
error("""keyword "$keyword" not found in the provided text: $text </end of error note>""")
|
||||
end
|
||||
end
|
||||
|
||||
# correct the order
|
||||
ks = reverse([i for i in keys(od1)])
|
||||
for k in ks
|
||||
k = symbolkey == true ? Symbol(k) : k
|
||||
od2[k] = od1[k]
|
||||
end
|
||||
|
||||
return od2
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
252
src/util.jl
252
src/util.jl
@@ -1,15 +1,15 @@
|
||||
module util
|
||||
|
||||
export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, replaceDictKeys,
|
||||
findMatchingDictKey, textToDict, randstring, randstrings, timeout,
|
||||
findMatchingDictKey, randstring, randstrings, timeout,
|
||||
dataframeToCSV, dfToVectorDict, disintegrate_vectorDict, getDataFrameValue, dfRowtoString,
|
||||
dfToString, dataframe_to_json_list, dictToString, dictToString_noKey,
|
||||
dfToString, dataframe_to_json_list, dictToString, dictToString_noKey, issomething,
|
||||
dictToString_numbering, extract_triple_backtick_text,
|
||||
countGivenWords, remove_french_accents, detect_keyword, extractTextBetweenCharacter,
|
||||
extractTextBetweenString,
|
||||
countGivenWords, remove_french_accents,
|
||||
extractTextBetweenCharacter, extractTextBetweenString,
|
||||
convertCamelSnakeKebabCase, fitrange, recentElementsIndex, nonRecentElementsIndex
|
||||
|
||||
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames
|
||||
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, DataFrames
|
||||
|
||||
# ---------------------------------------------- 100 --------------------------------------------- #
|
||||
|
||||
@@ -244,102 +244,6 @@ function replaceDictKeys(d::Dict, replacementMap::Dict)::Dict
|
||||
end
|
||||
|
||||
|
||||
""" Convert text into a dictionary with a given keywords. This function use keywords to slice
|
||||
a given text into the following format: KW1|kw1_text|KW2|kw2_text|KW3|kw3_text.
|
||||
The left most string which has no keyword will be discarded. WARNING, ordering is important
|
||||
|
||||
# Arguments
|
||||
- `text::String`
|
||||
A text to be converted.
|
||||
- `keywords::Vector{String}`
|
||||
A list of keywords to be used to slice the text.
|
||||
These keywords also be the resulting dict keys.
|
||||
# Keyword Arguments
|
||||
- `rightmarker::String`
|
||||
A maker used to make a word to be unique. Ex, A keyword "plan" with rightmarker ":",
|
||||
the function will search for "plan:" otherwise the function will search for "plan".
|
||||
The marker will not be in the resulting dict keys.
|
||||
- `symbolkey::Bool`
|
||||
If true, resulting dict's key will be Symbols, otherwise string.
|
||||
- `lowercasekey::Bool`
|
||||
set resulting dict's key to be lowercase
|
||||
|
||||
# Return
|
||||
- `d::OrderedDict`
|
||||
|
||||
# Example
|
||||
```jldoctest
|
||||
julia> text = "TODAY thought: what to do plan: wake up and going out action: 1. wake up 2. eat 3. sleep"
|
||||
julia> sample_keywords = ["thought", "plan", "action"]
|
||||
julia> resultdict = GeneralUtils.textToDict(text, sample_keywords; rightmarker=":", symbolkey=true)
|
||||
julia> println(resultdict)
|
||||
OrderedCollections.OrderedDict{Any, Any}(:thought => "what to do",
|
||||
:plan => "wake up and going out",
|
||||
:action => "1. wake up 2. eat 3. sleep")
|
||||
```
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function textToDict(text::String, detectKeywords::Vector{String};
|
||||
dictKey::Union{Vector{String}, Nothing}=nothing,
|
||||
symbolkey::Bool=false, lowercasekey::Bool=false
|
||||
)::OrderedDict
|
||||
|
||||
# make sure this function detect variation of a work e.g. agent, Agent, AGENT
|
||||
kw = []
|
||||
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
|
||||
for keyword in detectKeywords
|
||||
detected = detect_keyword(keyword, text)
|
||||
if detected !== nothing
|
||||
push!(kw, detected)
|
||||
else
|
||||
error("Keyword $keyword not found in text.")
|
||||
end
|
||||
end
|
||||
|
||||
od1, od2 =
|
||||
if symbolkey
|
||||
OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}()
|
||||
else
|
||||
OrderedDict{String, Any}(), OrderedDict{String, Any}()
|
||||
end
|
||||
|
||||
remainingtext = text
|
||||
dictKey_ = reverse(dictKey)
|
||||
|
||||
# process text from back to front
|
||||
rkw = reverse(kw)
|
||||
for (i,keyword) in enumerate(rkw)
|
||||
# Find the position of the keyword in the text
|
||||
keywordidx = findlast(keyword, remainingtext)
|
||||
dKey = dictKey_[i]
|
||||
|
||||
if keywordidx !== nothing
|
||||
substr = remainingtext[keywordidx[end]+1:end]
|
||||
str = string(strip(substr)) # Removes both leading and trailing whitespace.
|
||||
_key = lowercasekey == true ? lowercase(dKey) : dKey
|
||||
key = symbolkey == true ? Symbol(_key) : _key
|
||||
od1[key] = str
|
||||
remainingtext = remainingtext[1:keywordidx[1]-1]
|
||||
else
|
||||
error("""keyword "$keyword" not found in the provided text: $text </end of error note>""")
|
||||
end
|
||||
end
|
||||
|
||||
# correct the order
|
||||
ks = reverse([i for i in keys(od1)])
|
||||
for k in ks
|
||||
k = symbolkey == true ? Symbol(k) : k
|
||||
od2[k] = od1[k]
|
||||
end
|
||||
|
||||
return od2
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
""" Generate a random string
|
||||
|
||||
# Arguments
|
||||
@@ -784,152 +688,6 @@ function cuttext(range, text)
|
||||
end
|
||||
end
|
||||
|
||||
"""
|
||||
detect_keyword(keywords::AbstractVector{String}, text::String; mode::Union{String, Nothing}=nothing, delimiter::AbstractVector=[' ', '\n', '.']) -> Dict{String, Integer}
|
||||
|
||||
Detects and counts occurrences of multiple keywords in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
|
||||
|
||||
# Arguments
|
||||
- `keywords::AbstractVector{String}` Vector of keywords to search for
|
||||
- `text::String` The text to search in
|
||||
|
||||
# Keyword Arguments
|
||||
- `mode::Union{String, Nothing}` When set to "individual", only counts matches that are individual words (default: nothing)
|
||||
- `delimiter::AbstractVector` Characters used to determine word boundaries when mode="individual" (default: [' ', '\n', '.'])
|
||||
|
||||
# Returns
|
||||
- `Dict{String, Integer}` Returns a dictionary mapping each keyword to its count in the text (0 if not found)
|
||||
|
||||
# Examples
|
||||
```jldoctest
|
||||
julia> detect_keyword(["test", "example"], "This is a Test EXAMPLE")
|
||||
Dict{String, Integer}("test" => 1, "example" => 1)
|
||||
|
||||
julia> detect_keyword(["cat"], "cats and category", mode="individual")
|
||||
Dict{String, Integer}("cat" => 0)
|
||||
|
||||
julia> detect_keyword(["error"], "No ERRORS found!")
|
||||
Dict{String, Integer}("error" => 1)
|
||||
```
|
||||
|
||||
# Signature
|
||||
"""
|
||||
# function detect_keyword(keywords::T1, text::String;
|
||||
# mode::Union{String, Nothing}=nothing, delimiter::T2=[' ', '\n', '.']
|
||||
# )::Dict{String, Integer} where {T1<:AbstractVector, T2<:AbstractVector}
|
||||
# # Initialize dictionary to store keyword counts
|
||||
# kwdict = Dict{String, Integer}()
|
||||
# for i in keywords
|
||||
# kwdict[i] = 0
|
||||
# end
|
||||
|
||||
# startindex = 1
|
||||
# # Iterate through each keyword and search for matches in text
|
||||
# for kw in keywords
|
||||
# # Check each possible starting position in the text
|
||||
# for startindex in 1:1:length(text)
|
||||
# # Get the window range for current keyword at current position
|
||||
# wordwindows = wordwindow(kw, startindex)
|
||||
# # Extract the text slice for comparison
|
||||
# cuttexts = cuttext(wordwindows, text)
|
||||
# if cuttexts !== nothing
|
||||
# # Try to detect keyword in current text slice
|
||||
# detected_kw = detect_keyword(kw, cuttexts)
|
||||
# if detected_kw !== nothing && mode === nothing
|
||||
# # Increment count if keyword found and no mode restrictions
|
||||
# kwdict[kw] +=1
|
||||
# elseif detected_kw !== nothing && mode === "individual"
|
||||
# # For individual word mode, check word boundaries
|
||||
# # Check if character before keyword is a delimiter or start of text
|
||||
# checkbefore =
|
||||
# if wordwindows.start > 1 &&
|
||||
# text[wordwindows.start-1] ∈ delimiter
|
||||
# true
|
||||
# elseif wordwindows.start == 1
|
||||
# true
|
||||
# else
|
||||
# false
|
||||
# end
|
||||
|
||||
# # Check if character after keyword is a delimiter or end of text
|
||||
# checkafter =
|
||||
# if wordwindows.stop < length(text) &&
|
||||
# text[wordwindows.stop+1] ∈ delimiter
|
||||
# true
|
||||
# elseif wordwindows.stop == length(text)
|
||||
# true
|
||||
# else
|
||||
# false
|
||||
# end
|
||||
# # Only count keyword if it's a complete word
|
||||
# if checkbefore && checkafter
|
||||
# kwdict[kw] +=1
|
||||
# end
|
||||
# end
|
||||
# end
|
||||
# end
|
||||
# end
|
||||
# return kwdict
|
||||
# end
|
||||
|
||||
|
||||
function detect_keyword(keywords::T, text::String)::Dict{String, Integer} where {T<:AbstractVector}
|
||||
kw = Dict{String, Integer}()
|
||||
splittext = string.(split(text, " "))
|
||||
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
|
||||
for keyword in keywords
|
||||
ws = detect_keyword.(keyword, splittext)
|
||||
total = sum(issomething.(ws))
|
||||
if total != 0
|
||||
kw[keyword] = total
|
||||
else
|
||||
kw[keyword] = 0
|
||||
end
|
||||
end
|
||||
return kw
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
detect_keyword(keyword::String, text::String) -> Union{Nothing, String}
|
||||
|
||||
Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
|
||||
|
||||
# Arguments:
|
||||
- `keyword::String` The keyword to search for
|
||||
- `text::String` The text to search in
|
||||
|
||||
# Returns:
|
||||
- `Union{Nothing, String}` Returns the matched keyword variation if found, otherwise returns nothing
|
||||
|
||||
# Examples:
|
||||
```jldoctest
|
||||
julia> detect_keyword("test", "This is a Test case")
|
||||
"Test"
|
||||
|
||||
julia> detect_keyword("error", "NO ERRORS FOUND")
|
||||
"ERRORS"
|
||||
|
||||
julia> detect_keyword("missing", "complete data")
|
||||
nothing
|
||||
```
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function detect_keyword(keyword::String, text::String)::Union{Nothing, String}
|
||||
# Define the keyword variations to search for
|
||||
keyword_variations = [keyword, uppercasefirst(keyword), uppercase(keyword), lowercase(keyword)]
|
||||
|
||||
# Check if any of the keyword variations are in the text
|
||||
for variation in keyword_variations
|
||||
if occursin(variation, text)
|
||||
return variation
|
||||
end
|
||||
end
|
||||
|
||||
# Return nothing if no variation is found
|
||||
return nothing
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
python -> pandas -> dataframe -> csv
|
||||
|
||||
|
||||
|
||||
julia -> DataFrames -> dataframe -> csv
|
||||
|
||||
dict -> dataframe -> csv
|
||||
Reference in New Issue
Block a user