update
This commit is contained in:
186
src/interface.jl
186
src/interface.jl
@@ -6,7 +6,7 @@ export noNegative!, randomWithProb, randomChoiceWithProb, findIndex, limitvalue,
|
||||
matMul_3Dto4D_batchwise, isNotEqual, linearToCartesian, vectorMax, findMax,
|
||||
multiply_last, multiplyRandomElements, replaceElements, replaceElements!, isBetween,
|
||||
isLess, allTrue, getStringBetweenCharacters, JSON3read_stringKey, mkDictPath!,
|
||||
getDictPath
|
||||
getDictPath, detectKeywordVariation, textToDict
|
||||
|
||||
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames, CSV
|
||||
using ..util, ..communication
|
||||
@@ -1150,9 +1150,193 @@ end
|
||||
|
||||
|
||||
|
||||
"""
|
||||
detectKeywordVariation(keywords::AbstractVector{String}, text::String) -> Dict{String, Union{Array, Nothing}}
|
||||
|
||||
Detects and collects all case-variant occurrences of multiple keywords in the text.
|
||||
This function processes each keyword individually and returns an array of matched variations for each keyword.
|
||||
|
||||
# Arguments
|
||||
- `keywords::AbstractVector{String}` Vector of keywords to search for
|
||||
- `text::String` The text to search in
|
||||
|
||||
# Returns
|
||||
- `Dict{String, Array}` Returns a dictionary mapping each keyword to an array of matched variations found in the text
|
||||
|
||||
# Examples
|
||||
```jldoctest
|
||||
julia> detectKeywordVariation(["test", "example", "cat"], "This is a Test EXAMPLE")
|
||||
Dict{String, Array}("test" => ["Test"], "example" => ["EXAMPLE"], "cat" => nothing)
|
||||
"""
|
||||
function detectKeywordVariation(keywords::T, text::String)::Dict{String, Union{Array, Nothing}} where {T<:AbstractVector}
|
||||
kw = Dict{String, Union{Array, Nothing}}()
|
||||
|
||||
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
|
||||
for keyword in keywords
|
||||
ws = detectKeywordVariation.(keyword, text)
|
||||
total = sum(issomething.(ws))
|
||||
if total != 0
|
||||
kw[keyword] = ws
|
||||
else
|
||||
kw[keyword] = nothing
|
||||
end
|
||||
end
|
||||
return kw
|
||||
end
|
||||
|
||||
|
||||
"""
|
||||
detectKeywordVariation(keyword::String, text::String) -> Union{Nothing, Array{String}}
|
||||
|
||||
Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
|
||||
|
||||
# Arguments:
|
||||
- `keyword::String` The keyword to search for
|
||||
- `text::String` The text to search in
|
||||
|
||||
# Returns:
|
||||
- `Union{Nothing, Array{String}}` Returns an array of matched keyword variations if found, otherwise returns nothing
|
||||
|
||||
# Examples:
|
||||
```jldoctest
|
||||
julia> detectKeywordVariation("test", "This is a Test case")
|
||||
["Test"]
|
||||
|
||||
julia> detectKeywordVariation("error", "NO ERRORS FOUND")
|
||||
["ERRORS"]
|
||||
|
||||
julia> detectKeywordVariation("missing", "complete data")
|
||||
nothing
|
||||
"""
|
||||
function detectKeywordVariation(keyword::String, text::String)::Union{Nothing, Array{String}}
|
||||
# Define the keyword variations to search for
|
||||
wordVariations = [uppercasefirst(keyword), uppercase(keyword), lowercase(keyword)]
|
||||
# wordVariations may duplicate keyword
|
||||
keyword_variations = [keyword]
|
||||
for i in wordVariations
|
||||
i != keyword ? push!(keyword_variations, i) : nothing
|
||||
end
|
||||
|
||||
_splittext = string.(strip.(split(text, " ")))
|
||||
splittext = String[]
|
||||
# remove . after a word
|
||||
for i in _splittext
|
||||
if length(i) != 0 && i[end] ∈ ['.']
|
||||
word = string(i[1:end-1])
|
||||
push!(splittext, word)
|
||||
else
|
||||
push!(splittext, i)
|
||||
end
|
||||
end
|
||||
|
||||
result = String[]
|
||||
for variation in keyword_variations
|
||||
# if length of both word is equals then it is a whole word otherwise it is part of part of other word
|
||||
r = findIndex(splittext, variation)
|
||||
|
||||
if isempty(r[2])
|
||||
# skip
|
||||
else
|
||||
# if variation > 1 add them all so this function detect duplicate keyword
|
||||
variations = [variation for i in eachindex(r[2])]
|
||||
result = vcat(result, variations)
|
||||
end
|
||||
end
|
||||
return result
|
||||
end
|
||||
|
||||
|
||||
""" Convert text into a dictionary with a given keywords. This function use keywords to slice
|
||||
a given text into the following format: KW1|kw1_text|KW2|kw2_text|KW3|kw3_text.
|
||||
The left most string which has no keyword will be discarded. WARNING, ordering is important
|
||||
|
||||
# Arguments
|
||||
- `text::String`
|
||||
A text to be converted.
|
||||
- `keywords::Vector{String}`
|
||||
A list of keywords to be used to slice the text.
|
||||
These keywords also be the resulting dict keys.
|
||||
# Keyword Arguments
|
||||
- `rightmarker::String`
|
||||
A maker used to make a word to be unique. Ex, A keyword "plan" with rightmarker ":",
|
||||
the function will search for "plan:" otherwise the function will search for "plan".
|
||||
The marker will not be in the resulting dict keys.
|
||||
- `symbolkey::Bool`
|
||||
If true, resulting dict's key will be Symbols, otherwise string.
|
||||
- `lowercasekey::Bool`
|
||||
set resulting dict's key to be lowercase
|
||||
|
||||
# Return
|
||||
- `d::OrderedDict`
|
||||
|
||||
# Example
|
||||
```jldoctest
|
||||
julia> text = "TODAY thought: what to do plan: wake up and going out action: 1. wake up 2. eat 3. sleep"
|
||||
julia> sample_keywords = ["thought", "plan", "action"]
|
||||
julia> resultdict = GeneralUtils.textToDict(text, sample_keywords; rightmarker=":", symbolkey=true)
|
||||
julia> println(resultdict)
|
||||
OrderedCollections.OrderedDict{Any, Any}(:thought => "what to do",
|
||||
:plan => "wake up and going out",
|
||||
:action => "1. wake up 2. eat 3. sleep")
|
||||
```
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function textToDict(text::String, detectKeywords::Vector{String};
|
||||
dictKey::Union{Vector{String}, Nothing}=nothing,
|
||||
symbolkey::Bool=false, lowercasekey::Bool=false
|
||||
)::OrderedDict
|
||||
|
||||
# make sure this function detect variation of a work e.g. agent, Agent, AGENT
|
||||
kw = []
|
||||
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
|
||||
for keyword in detectKeywords
|
||||
detected = detectKeywordVariation(keyword, text)
|
||||
if detected !== nothing
|
||||
push!(kw, detected)
|
||||
else
|
||||
error("Keyword $keyword not found in text: $text")
|
||||
end
|
||||
end
|
||||
|
||||
od1, od2 =
|
||||
if symbolkey
|
||||
OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}()
|
||||
else
|
||||
OrderedDict{String, Any}(), OrderedDict{String, Any}()
|
||||
end
|
||||
|
||||
remainingtext = text
|
||||
dictKey_ = reverse(dictKey)
|
||||
|
||||
# process text from back to front
|
||||
rkw = reverse(kw)
|
||||
for (i,keyword) in enumerate(rkw)
|
||||
# Find the position of the keyword in the text
|
||||
keywordidx = findlast(keyword, remainingtext)
|
||||
dKey = dictKey_[i]
|
||||
|
||||
if keywordidx !== nothing
|
||||
substr = remainingtext[keywordidx[end]+1:end]
|
||||
str = string(strip(substr)) # Removes both leading and trailing whitespace.
|
||||
_key = lowercasekey == true ? lowercase(dKey) : dKey
|
||||
key = symbolkey == true ? Symbol(_key) : _key
|
||||
od1[key] = str
|
||||
remainingtext = remainingtext[1:keywordidx[1]-1]
|
||||
else
|
||||
error("""keyword "$keyword" not found in the provided text: $text </end of error note>""")
|
||||
end
|
||||
end
|
||||
|
||||
# correct the order
|
||||
ks = reverse([i for i in keys(od1)])
|
||||
for k in ks
|
||||
k = symbolkey == true ? Symbol(k) : k
|
||||
od2[k] = od1[k]
|
||||
end
|
||||
|
||||
return od2
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user