v0.3.1 #1

Merged
ton merged 10 commits from v0.3.1 into main 2025-12-17 05:43:32 +00:00
2 changed files with 189 additions and 247 deletions
Showing only changes of commit b3e8df7287 - Show all commits

View File

@@ -6,7 +6,7 @@ export noNegative!, randomWithProb, randomChoiceWithProb, findIndex, limitvalue,
matMul_3Dto4D_batchwise, isNotEqual, linearToCartesian, vectorMax, findMax,
multiply_last, multiplyRandomElements, replaceElements, replaceElements!, isBetween,
isLess, allTrue, getStringBetweenCharacters, JSON3read_stringKey, mkDictPath!,
getDictPath
getDictPath, detectKeywordVariation, textToDict
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames, CSV
using ..util, ..communication
@@ -1150,9 +1150,193 @@ end
"""
detectKeywordVariation(keywords::AbstractVector{String}, text::String) -> Dict{String, Union{Array, Nothing}}
Detects and collects all case-variant occurrences of multiple keywords in the text.
This function processes each keyword individually and returns an array of matched variations for each keyword.
# Arguments
- `keywords::AbstractVector{String}` Vector of keywords to search for
- `text::String` The text to search in
# Returns
- `Dict{String, Array}` Returns a dictionary mapping each keyword to an array of matched variations found in the text
# Examples
```jldoctest
julia> detectKeywordVariation(["test", "example", "cat"], "This is a Test EXAMPLE")
Dict{String, Array}("test" => ["Test"], "example" => ["EXAMPLE"], "cat" => nothing)
"""
function detectKeywordVariation(keywords::T, text::String)::Dict{String, Union{Array, Nothing}} where {T<:AbstractVector}
kw = Dict{String, Union{Array, Nothing}}()
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
for keyword in keywords
ws = detectKeywordVariation.(keyword, text)
total = sum(issomething.(ws))
if total != 0
kw[keyword] = ws
else
kw[keyword] = nothing
end
end
return kw
end
"""
detectKeywordVariation(keyword::String, text::String) -> Union{Nothing, Array{String}}
Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
# Arguments:
- `keyword::String` The keyword to search for
- `text::String` The text to search in
# Returns:
- `Union{Nothing, Array{String}}` Returns an array of matched keyword variations if found, otherwise returns nothing
# Examples:
```jldoctest
julia> detectKeywordVariation("test", "This is a Test case")
["Test"]
julia> detectKeywordVariation("error", "NO ERRORS FOUND")
["ERRORS"]
julia> detectKeywordVariation("missing", "complete data")
nothing
"""
function detectKeywordVariation(keyword::String, text::String)::Union{Nothing, Array{String}}
# Define the keyword variations to search for
wordVariations = [uppercasefirst(keyword), uppercase(keyword), lowercase(keyword)]
# wordVariations may duplicate keyword
keyword_variations = [keyword]
for i in wordVariations
i != keyword ? push!(keyword_variations, i) : nothing
end
_splittext = string.(strip.(split(text, " ")))
splittext = String[]
# remove . after a word
for i in _splittext
if length(i) != 0 && i[end] ['.']
word = string(i[1:end-1])
push!(splittext, word)
else
push!(splittext, i)
end
end
result = String[]
for variation in keyword_variations
# if length of both word is equals then it is a whole word otherwise it is part of part of other word
r = findIndex(splittext, variation)
if isempty(r[2])
# skip
else
# if variation > 1 add them all so this function detect duplicate keyword
variations = [variation for i in eachindex(r[2])]
result = vcat(result, variations)
end
end
return result
end
""" Convert text into a dictionary with a given keywords. This function use keywords to slice
a given text into the following format: KW1|kw1_text|KW2|kw2_text|KW3|kw3_text.
The left most string which has no keyword will be discarded. WARNING, ordering is important
# Arguments
- `text::String`
A text to be converted.
- `keywords::Vector{String}`
A list of keywords to be used to slice the text.
These keywords also be the resulting dict keys.
# Keyword Arguments
- `rightmarker::String`
A maker used to make a word to be unique. Ex, A keyword "plan" with rightmarker ":",
the function will search for "plan:" otherwise the function will search for "plan".
The marker will not be in the resulting dict keys.
- `symbolkey::Bool`
If true, resulting dict's key will be Symbols, otherwise string.
- `lowercasekey::Bool`
set resulting dict's key to be lowercase
# Return
- `d::OrderedDict`
# Example
```jldoctest
julia> text = "TODAY thought: what to do plan: wake up and going out action: 1. wake up 2. eat 3. sleep"
julia> sample_keywords = ["thought", "plan", "action"]
julia> resultdict = GeneralUtils.textToDict(text, sample_keywords; rightmarker=":", symbolkey=true)
julia> println(resultdict)
OrderedCollections.OrderedDict{Any, Any}(:thought => "what to do",
:plan => "wake up and going out",
:action => "1. wake up 2. eat 3. sleep")
```
# Signature
"""
function textToDict(text::String, detectKeywords::Vector{String};
dictKey::Union{Vector{String}, Nothing}=nothing,
symbolkey::Bool=false, lowercasekey::Bool=false
)::OrderedDict
# make sure this function detect variation of a work e.g. agent, Agent, AGENT
kw = []
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
for keyword in detectKeywords
detected = detectKeywordVariation(keyword, text)
if detected !== nothing
push!(kw, detected)
else
error("Keyword $keyword not found in text: $text")
end
end
od1, od2 =
if symbolkey
OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}()
else
OrderedDict{String, Any}(), OrderedDict{String, Any}()
end
remainingtext = text
dictKey_ = reverse(dictKey)
# process text from back to front
rkw = reverse(kw)
for (i,keyword) in enumerate(rkw)
# Find the position of the keyword in the text
keywordidx = findlast(keyword, remainingtext)
dKey = dictKey_[i]
if keywordidx !== nothing
substr = remainingtext[keywordidx[end]+1:end]
str = string(strip(substr)) # Removes both leading and trailing whitespace.
_key = lowercasekey == true ? lowercase(dKey) : dKey
key = symbolkey == true ? Symbol(_key) : _key
od1[key] = str
remainingtext = remainingtext[1:keywordidx[1]-1]
else
error("""keyword "$keyword" not found in the provided text: $text </end of error note>""")
end
end
# correct the order
ks = reverse([i for i in keys(od1)])
for k in ks
k = symbolkey == true ? Symbol(k) : k
od2[k] = od1[k]
end
return od2
end

View File

@@ -1,12 +1,12 @@
module util
export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, replaceDictKeys,
findMatchingDictKey, textToDict, randstring, randstrings, timeout,
findMatchingDictKey, randstring, randstrings, timeout,
dataframeToCSV, dfToVectorDict, disintegrate_vectorDict, getDataFrameValue, dfRowtoString,
dfToString, dataframe_to_json_list, dictToString, dictToString_noKey,
dfToString, dataframe_to_json_list, dictToString, dictToString_noKey, issomething,
dictToString_numbering, extract_triple_backtick_text,
countGivenWords, remove_french_accents, detect_keyword, extractTextBetweenCharacter,
extractTextBetweenString,
countGivenWords, remove_french_accents,
extractTextBetweenCharacter, extractTextBetweenString,
convertCamelSnakeKebabCase, fitrange, recentElementsIndex, nonRecentElementsIndex
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames
@@ -244,102 +244,6 @@ function replaceDictKeys(d::Dict, replacementMap::Dict)::Dict
end
""" Convert text into a dictionary with a given keywords. This function use keywords to slice
a given text into the following format: KW1|kw1_text|KW2|kw2_text|KW3|kw3_text.
The left most string which has no keyword will be discarded. WARNING, ordering is important
# Arguments
- `text::String`
A text to be converted.
- `keywords::Vector{String}`
A list of keywords to be used to slice the text.
These keywords also be the resulting dict keys.
# Keyword Arguments
- `rightmarker::String`
A maker used to make a word to be unique. Ex, A keyword "plan" with rightmarker ":",
the function will search for "plan:" otherwise the function will search for "plan".
The marker will not be in the resulting dict keys.
- `symbolkey::Bool`
If true, resulting dict's key will be Symbols, otherwise string.
- `lowercasekey::Bool`
set resulting dict's key to be lowercase
# Return
- `d::OrderedDict`
# Example
```jldoctest
julia> text = "TODAY thought: what to do plan: wake up and going out action: 1. wake up 2. eat 3. sleep"
julia> sample_keywords = ["thought", "plan", "action"]
julia> resultdict = GeneralUtils.textToDict(text, sample_keywords; rightmarker=":", symbolkey=true)
julia> println(resultdict)
OrderedCollections.OrderedDict{Any, Any}(:thought => "what to do",
:plan => "wake up and going out",
:action => "1. wake up 2. eat 3. sleep")
```
# Signature
"""
function textToDict(text::String, detectKeywords::Vector{String};
dictKey::Union{Vector{String}, Nothing}=nothing,
symbolkey::Bool=false, lowercasekey::Bool=false
)::OrderedDict
# make sure this function detect variation of a work e.g. agent, Agent, AGENT
kw = []
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
for keyword in detectKeywords
detected = detect_keyword(keyword, text)
if detected !== nothing
push!(kw, detected)
else
error("Keyword $keyword not found in text.")
end
end
od1, od2 =
if symbolkey
OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}()
else
OrderedDict{String, Any}(), OrderedDict{String, Any}()
end
remainingtext = text
dictKey_ = reverse(dictKey)
# process text from back to front
rkw = reverse(kw)
for (i,keyword) in enumerate(rkw)
# Find the position of the keyword in the text
keywordidx = findlast(keyword, remainingtext)
dKey = dictKey_[i]
if keywordidx !== nothing
substr = remainingtext[keywordidx[end]+1:end]
str = string(strip(substr)) # Removes both leading and trailing whitespace.
_key = lowercasekey == true ? lowercase(dKey) : dKey
key = symbolkey == true ? Symbol(_key) : _key
od1[key] = str
remainingtext = remainingtext[1:keywordidx[1]-1]
else
error("""keyword "$keyword" not found in the provided text: $text </end of error note>""")
end
end
# correct the order
ks = reverse([i for i in keys(od1)])
for k in ks
k = symbolkey == true ? Symbol(k) : k
od2[k] = od1[k]
end
return od2
end
""" Generate a random string
# Arguments
@@ -784,152 +688,6 @@ function cuttext(range, text)
end
end
"""
detect_keyword(keywords::AbstractVector{String}, text::String; mode::Union{String, Nothing}=nothing, delimiter::AbstractVector=[' ', '\n', '.']) -> Dict{String, Integer}
Detects and counts occurrences of multiple keywords in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
# Arguments
- `keywords::AbstractVector{String}` Vector of keywords to search for
- `text::String` The text to search in
# Keyword Arguments
- `mode::Union{String, Nothing}` When set to "individual", only counts matches that are individual words (default: nothing)
- `delimiter::AbstractVector` Characters used to determine word boundaries when mode="individual" (default: [' ', '\n', '.'])
# Returns
- `Dict{String, Integer}` Returns a dictionary mapping each keyword to its count in the text (0 if not found)
# Examples
```jldoctest
julia> detect_keyword(["test", "example"], "This is a Test EXAMPLE")
Dict{String, Integer}("test" => 1, "example" => 1)
julia> detect_keyword(["cat"], "cats and category", mode="individual")
Dict{String, Integer}("cat" => 0)
julia> detect_keyword(["error"], "No ERRORS found!")
Dict{String, Integer}("error" => 1)
```
# Signature
"""
# function detect_keyword(keywords::T1, text::String;
# mode::Union{String, Nothing}=nothing, delimiter::T2=[' ', '\n', '.']
# )::Dict{String, Integer} where {T1<:AbstractVector, T2<:AbstractVector}
# # Initialize dictionary to store keyword counts
# kwdict = Dict{String, Integer}()
# for i in keywords
# kwdict[i] = 0
# end
# startindex = 1
# # Iterate through each keyword and search for matches in text
# for kw in keywords
# # Check each possible starting position in the text
# for startindex in 1:1:length(text)
# # Get the window range for current keyword at current position
# wordwindows = wordwindow(kw, startindex)
# # Extract the text slice for comparison
# cuttexts = cuttext(wordwindows, text)
# if cuttexts !== nothing
# # Try to detect keyword in current text slice
# detected_kw = detect_keyword(kw, cuttexts)
# if detected_kw !== nothing && mode === nothing
# # Increment count if keyword found and no mode restrictions
# kwdict[kw] +=1
# elseif detected_kw !== nothing && mode === "individual"
# # For individual word mode, check word boundaries
# # Check if character before keyword is a delimiter or start of text
# checkbefore =
# if wordwindows.start > 1 &&
# text[wordwindows.start-1] ∈ delimiter
# true
# elseif wordwindows.start == 1
# true
# else
# false
# end
# # Check if character after keyword is a delimiter or end of text
# checkafter =
# if wordwindows.stop < length(text) &&
# text[wordwindows.stop+1] ∈ delimiter
# true
# elseif wordwindows.stop == length(text)
# true
# else
# false
# end
# # Only count keyword if it's a complete word
# if checkbefore && checkafter
# kwdict[kw] +=1
# end
# end
# end
# end
# end
# return kwdict
# end
function detect_keyword(keywords::T, text::String)::Dict{String, Integer} where {T<:AbstractVector}
kw = Dict{String, Integer}()
splittext = string.(split(text, " "))
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
for keyword in keywords
ws = detect_keyword.(keyword, splittext)
total = sum(issomething.(ws))
if total != 0
kw[keyword] = total
else
kw[keyword] = 0
end
end
return kw
end
"""
detect_keyword(keyword::String, text::String) -> Union{Nothing, String}
Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
# Arguments:
- `keyword::String` The keyword to search for
- `text::String` The text to search in
# Returns:
- `Union{Nothing, String}` Returns the matched keyword variation if found, otherwise returns nothing
# Examples:
```jldoctest
julia> detect_keyword("test", "This is a Test case")
"Test"
julia> detect_keyword("error", "NO ERRORS FOUND")
"ERRORS"
julia> detect_keyword("missing", "complete data")
nothing
```
# Signature
"""
function detect_keyword(keyword::String, text::String)::Union{Nothing, String}
# Define the keyword variations to search for
keyword_variations = [keyword, uppercasefirst(keyword), uppercase(keyword), lowercase(keyword)]
# Check if any of the keyword variations are in the text
for variation in keyword_variations
if occursin(variation, text)
return variation
end
end
# Return nothing if no variation is found
return nothing
end
"""