906 lines
22 KiB
Julia
906 lines
22 KiB
Julia
module util
|
||
|
||
export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, replaceDictKeys,
|
||
findMatchingDictKey, textToDict, randstring, randstrings, timeout,
|
||
dataframeToCSV, dfToVectorDict, disintegrate_vectorDict, getDataFrameValue, dfRowtoString,
|
||
dfToString, dataframe_to_json_list, dict_to_string, extract_triple_backtick_text,
|
||
countGivenWords, remove_french_accents
|
||
|
||
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames
|
||
|
||
# ---------------------------------------------- 100 --------------------------------------------- #
|
||
|
||
""" Compute time different between start time and stop time in a given unit.
|
||
Unit can be "milliseconds", "seconds", "minutes", "hours".
|
||
|
||
# Arguments
|
||
- `starttime::DateTime`
|
||
start time
|
||
- `stoptime::DateTime`
|
||
stop time
|
||
- `unit::String`
|
||
unit of time difference
|
||
|
||
# Return
|
||
- time difference in given unit
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using Revise
|
||
julia> using GeneralUtils, Dates
|
||
julia> a = Dates.now()
|
||
julia> b = a + Dates.Day(5) # add 5 days
|
||
julia> GeneralUtils.timedifference(a, b, "hours")
|
||
120
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function timedifference(starttime::DateTime, stoptime::DateTime, unit::String)::Integer
|
||
diff = stoptime - starttime
|
||
unit = lowercase(unit)
|
||
|
||
if unit == "milliseconds"
|
||
return diff.value
|
||
elseif unit == "seconds"
|
||
return diff.value ÷ 1000
|
||
elseif unit == "minutes"
|
||
return diff.value ÷ (1000 * 60)
|
||
elseif unit == "hours"
|
||
return diff.value ÷ (1000 * 60 * 60)
|
||
else
|
||
error("Invalid unit specified. Please choose from: milliseconds, seconds, minutes, hours")
|
||
end
|
||
end
|
||
|
||
|
||
""" Capture then show error and stacktrace
|
||
|
||
# Arguments
|
||
- `f::Function`
|
||
a function that might throws an error
|
||
- `args` function f arguments
|
||
|
||
# Return
|
||
- `outcome::NamedTuple`
|
||
(success, result, errormsg, st)
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using Revise
|
||
julia> using GeneralUtils, PrettyPrinting
|
||
julia> testf(a, b) = a + b
|
||
julia> success, result, errormsg, st = GeneralUtils.showstracktrace(testf, 5, "6")
|
||
julia> pprint(st)
|
||
16-element Vector{Base.StackTraces.StackFrame}:
|
||
testf(a::Int64, b::String) at REPL[12]:1
|
||
showstracktrace(::Function, ::Int64, ::Vararg{Any}) at util.jl:95
|
||
...
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function showstracktrace(f::Function, args...)::NamedTuple
|
||
global st = nothing # stacktrace
|
||
global errorMsg = nothing
|
||
global success = false
|
||
global fResult = nothing
|
||
|
||
try
|
||
success, fResult
|
||
fResult = f(args...)
|
||
success = true
|
||
catch e
|
||
io = IOBuffer()
|
||
showerror(io, e)
|
||
errorMsg = String(take!(io))
|
||
|
||
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
|
||
@warn "Error occurred: $errorMsg\n$st"
|
||
end
|
||
|
||
return (success=success, result=fResult, errormsg=errorMsg, st=st)
|
||
end
|
||
|
||
""" Find all match key of a dictionary for a given key.
|
||
|
||
# Arguments
|
||
- `d<:AbstractDict`
|
||
The dictionary to search for keys.
|
||
- `text<:Symbol`
|
||
The text to match against the keys.
|
||
|
||
# Returns
|
||
- `result::Vector{Symbol}`
|
||
A vector of matched key
|
||
|
||
|
||
# Examples
|
||
```jldoctest
|
||
julia> using Revise
|
||
julia> using GeneralUtils
|
||
julia> d = Dict(:key_1 => "apple", :key_12 => "banana", :key_3 => "cherry")
|
||
julia> GeneralUtils.findMatchingDictKey(d, "key_1")
|
||
2-element Vector{Symbol}:
|
||
:key_1
|
||
:key_12
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function findMatchingDictKey(d::T, text::Union{String, Symbol}
|
||
)::Vector{Symbol} where {T<:AbstractDict}
|
||
|
||
_matching_keys = filter(k -> occursin(string(text), string(k)), keys(d))
|
||
matching_keys = collect(_matching_keys) # convert from Set into Array
|
||
|
||
return matching_keys
|
||
end
|
||
|
||
|
||
"""
|
||
Find the key in a dictionary `d` with the highest index value that matches a given `text`.
|
||
|
||
# Arguments
|
||
- `d<:AbstractDict`
|
||
The dictionary to search for keys.
|
||
- `text<:Union{String, Symbol}`
|
||
The text to match against the keys.
|
||
|
||
# Returns
|
||
- `NamedTuple{(:result, :maxindice), Tuple{Union{Symbol, Nothing}, Union{Integer, Nothing}}}`
|
||
The key in `d` with the highest index value that matches `text`, or `nothing` if no matches are found.
|
||
|
||
# Examples
|
||
```jldoctest
|
||
julia> using Revise
|
||
julia> using GeneralUtils
|
||
julia> d = Dict(:key_1 => "apple", :key_2 => "banana", :key_3 => "cherry")
|
||
julia> GeneralUtils.findHighestIndexKey(d, "key")
|
||
(:key_3, 3)
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function findHighestIndexKey(d::T, text::Union{String, Symbol}
|
||
)::NamedTuple{(:result, :maxindice), Tuple{Union{Symbol, Nothing}, Union{Integer, Nothing}}} where {T<:AbstractDict}
|
||
|
||
matching_keys = findMatchingDictKey(d, text)
|
||
|
||
if isempty(matching_keys)
|
||
return (result=nothing, maxindice=nothing)
|
||
elseif length(matching_keys) == 1 && matching_keys[1] == Symbol(text)
|
||
return (result=Symbol(text), maxindice=nothing)
|
||
else
|
||
indices = parse.(Int, replace.(string.(matching_keys), r"[^\d]" => ""))
|
||
maxIndexKey = matching_keys[argmax(indices)]
|
||
return (result=maxIndexKey, maxindice=maximum(indices))
|
||
end
|
||
end
|
||
|
||
|
||
|
||
""" Get uuid4 with snake case
|
||
|
||
# Return
|
||
- `uuid4::String`
|
||
uuid4 with snake case
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using Revise
|
||
julia> using GeneralUtils
|
||
julia> GeneralUtils.uuid4snakecase()
|
||
"0f6e4f_568c_4df4_8c79_1d7a58072f4a"
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function uuid4snakecase()::String
|
||
_id = string(uuid4())
|
||
id = replace(_id, "-" => "_")
|
||
return id
|
||
end
|
||
|
||
|
||
""" Replace a dictionary key with the new key
|
||
|
||
# Arguments
|
||
- `d::Dict`
|
||
The input dictionary that you want to modify
|
||
- `replacementMap::Dict`
|
||
A dictionary that maps old keys to new keys
|
||
|
||
# Return
|
||
- `newDict::Dict`
|
||
new dictionary with the replaced keys
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using Revise
|
||
julia> using GeneralUtils
|
||
julia> d = Dict(:a => 1, :b => 2, :c => 3)
|
||
julia> replacement_map = Dict(:a => :x, :b => :y)
|
||
julia> new_dict = GeneralUtils.replaceDictKeys(d, replacement_map)
|
||
Dict{Any, Any} with 3 entries:
|
||
:y => 2
|
||
:c => 3
|
||
:x => 1
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function replaceDictKeys(d::Dict, replacementMap::Dict)::Dict
|
||
newDict = Dict()
|
||
for (key, value) in d
|
||
newKey = get(replacementMap, key, key) # Get the replacement key if it exists, otherwise keep the original key
|
||
newDict[newKey] = value
|
||
end
|
||
return newDict
|
||
end
|
||
|
||
|
||
""" Convert text into a dictionary with a given keywords. This function use keywords to slice
|
||
a given text into the following format: KW1|kw1_text|KW2|kw2_text|KW3|kw3_text.
|
||
The left most string which has no keyword will be discarded. WARNING, ordering is important
|
||
|
||
# Arguments
|
||
- `text::String`
|
||
A text to be converted.
|
||
- `keywords::Vector{String}`
|
||
A list of keywords to be used to slice the text.
|
||
These keywords also be the resulting dict keys.
|
||
# Keyword Arguments
|
||
- `rightmarker::String`
|
||
A maker used to make a word to be unique. Ex, A keyword "plan" with rightmarker ":",
|
||
the function will search for "plan:" otherwise the function will search for "plan".
|
||
The marker will not be in the resulting dict keys.
|
||
- `symbolkey::Bool`
|
||
If true, resulting dict's key will be Symbols, otherwise string.
|
||
- `lowercasekey::Bool`
|
||
set resulting dict's key to be lowercase
|
||
|
||
# Return
|
||
- `d::OrderedDict`
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> text = "TODAY thought: what to do plan: wake up and going out action: 1. wake up 2. eat 3. sleep"
|
||
julia> sample_keywords = ["thought", "plan", "action"]
|
||
julia> resultdict = GeneralUtils.textToDict(text, sample_keywords; rightmarker=":", symbolkey=true)
|
||
julia> println(resultdict)
|
||
OrderedCollections.OrderedDict{Any, Any}(:thought => "what to do",
|
||
:plan => "wake up and going out",
|
||
:action => "1. wake up 2. eat 3. sleep")
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function textToDict(text::String, keywords::Vector{String};
|
||
rightmarker::Union{String, Nothing}=nothing, symbolkey::Bool=false, lowercasekey::Bool=false
|
||
)::OrderedDict
|
||
|
||
# make sure this function detect variation of a work e.g. agent, Agent, AGENT
|
||
kw = []
|
||
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
|
||
for keyword in keywords
|
||
push!(kw, detect_keyword(keyword, text))
|
||
end
|
||
|
||
od1, od2 =
|
||
if symbolkey
|
||
OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}()
|
||
else
|
||
OrderedDict{String, Any}(), OrderedDict{String, Any}()
|
||
end
|
||
|
||
remainingtext = text
|
||
|
||
for keyword in reverse(kw)
|
||
mkeyword = rightmarker !== nothing ? keyword * rightmarker : keyword
|
||
|
||
# Find the position of the keyword in the text
|
||
keywordidx = findlast(mkeyword, remainingtext)
|
||
|
||
if keywordidx !== nothing
|
||
substr = remainingtext[keywordidx[end]+1:end]
|
||
str = string(strip(substr)) # Removes both leading and trailing whitespace.
|
||
_key = lowercasekey == true ? lowercase(keyword) : keyword
|
||
key = symbolkey == true ? Symbol(_key) : _key
|
||
od1[key] = str
|
||
remainingtext = remainingtext[1:keywordidx[1]-1]
|
||
else
|
||
error("""keyword "$keyword" not found in the provided text""")
|
||
end
|
||
end
|
||
|
||
kw = lowercasekey == true ? lowercase.(kw) : kw
|
||
|
||
# correct the order
|
||
for keyword in kw
|
||
key = symbolkey == true ? Symbol(keyword) : keyword
|
||
od2[key] = od1[key]
|
||
end
|
||
|
||
return od2
|
||
end
|
||
|
||
|
||
|
||
|
||
|
||
""" Generate a random string
|
||
|
||
# Arguments
|
||
- `n::Integer`
|
||
A number of string to be generated
|
||
|
||
# Return
|
||
- `s::String`
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> result = randstring(5)
|
||
"fysmp"
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
randstring(n::Integer)::String = String(rand('a':'z', n))
|
||
|
||
|
||
""" Generate a random string in group
|
||
|
||
# Arguments
|
||
- `totalgroup::Integer`
|
||
A number of group of random string to be generated
|
||
- `stringlength::Integer`
|
||
A number of string to be generated
|
||
|
||
# Return
|
||
- `s::String`
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> result = randstrings(3, 5)
|
||
"fysmp cmhdk iuytr"
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function randstrings(totalgroup::Integer, stringlength::Integer)::String
|
||
str = ""
|
||
for i in 1:totalgroup
|
||
str *= randstring(stringlength) * " "
|
||
end
|
||
str = strip(str)
|
||
return str
|
||
end
|
||
|
||
|
||
|
||
""" Execute a function with timer.
|
||
|
||
# Arguments
|
||
- `f::Function`
|
||
a function to run
|
||
- `timeoutwindow::Integer``
|
||
timeout in seconds
|
||
|
||
# Keyword Argument
|
||
- `fargs`
|
||
arguments for the function
|
||
- `timeoutmsg::String`
|
||
time out message
|
||
|
||
# Return
|
||
- task result otherwise timeout message
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> function testfunc(x)
|
||
sleep(x)
|
||
return "task done"
|
||
end
|
||
julia> result = timeout(testfunc, 10; fargs=20)
|
||
"task timed out"
|
||
julia> result = timeout(testfunc, 20; fargs=10)
|
||
"task done"
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function timeout(f::Function, timeoutwindow::Integer; fargs=nothing, timeoutmsg="task timed out")
|
||
tsk = @task f(fargs)
|
||
schedule(tsk)
|
||
Timer(timeoutwindow) do timer
|
||
istaskdone(tsk) || Base.throwto(tsk, InterruptException())
|
||
end
|
||
try
|
||
fetch(tsk)
|
||
catch _;
|
||
timeoutmsg
|
||
end
|
||
end
|
||
|
||
|
||
|
||
""" Convert a dataframe into CSV.
|
||
|
||
# Arguments
|
||
- `df::DataFrame`
|
||
A connection object to Postgres database
|
||
|
||
# Return
|
||
- `result::String`
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using DataFrames, GeneralUtils
|
||
julia> df = DataFrame(A=1:3, B=5:7, fixed=1)
|
||
julia> result = GeneralUtils.dataframeToCSV(df)
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function dataframeToCSV(df::DataFrame)
|
||
# Create an IOBuffer to capture the output
|
||
io = IOBuffer()
|
||
CSV.write(io, df)
|
||
dfStr = String(take!(io))
|
||
return dfStr
|
||
end
|
||
|
||
""" Convert a DataFrame into a list of Dict rows.
|
||
|
||
# Arguments
|
||
- `df::DataFrame`
|
||
The input DataFrame to be converted.
|
||
|
||
# Return
|
||
- `rows::Vector{Dict{String, Any}}`
|
||
A vector of dictionaries, where each dictionary represents a row in a dataframe.
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using DataFrames, JSON3, GeneralUtils
|
||
julia> df = DataFrame(A = [1, 2, 3], B = ["apple", "banana", "cherry"])
|
||
julia> vectorDict = GeneralUtils.dfToVectorDict(df)
|
||
[Dict{String, Any}("B" => "apple", "A" => 1),
|
||
Dict{String, Any}("B" => "banana", "A" => 2)
|
||
Dict{String, Any}("B" => "cherry", "A" => 3)]
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function dfToVectorDict(df::DataFrame)
|
||
vec = []
|
||
for row in eachrow(df)
|
||
d = Dict{String, Any}()
|
||
for col in names(df)
|
||
d[col] = row[col]
|
||
end
|
||
push!(vec, d)
|
||
end
|
||
return vec
|
||
end
|
||
|
||
|
||
|
||
""" Turn a large vector of dictionaries into smaller one
|
||
|
||
# Arguments
|
||
- `data`
|
||
data to be partioning
|
||
- `partsize`
|
||
how many dicts per part
|
||
|
||
# Return
|
||
- `parts`
|
||
a dictionay of parts
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using GeneralUtils, Dates, JSON3, UUIDs
|
||
julia> vecDict = [Dict("a" => i) for i in 1:10]
|
||
julia> d = GeneralUtils.disintegrate_vectorDict(vecDict, 3)
|
||
julia> println(d[:data])
|
||
Dict{Int64, Vector{Dict}} with 4 entries:
|
||
1 => [Dict("a"=>1), Dict("a"=>2), Dict("a"=>3)]
|
||
2 => [Dict("a"=>4), Dict("a"=>5), Dict("a"=>6)]
|
||
3 => [Dict("a"=>7), Dict("a"=>8), Dict("a"=>9)]
|
||
4 => [Dict("a"=>10)]
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function disintegrate_vectorDict(data::Vector, partsize::Integer
|
||
)
|
||
println("--> disintegrate_vectorDict()")
|
||
parts = Dict{Int, Vector{Dict}}()
|
||
for (i, dict) in enumerate(data)
|
||
# println("--> disintegrate_vectorDict ", i)
|
||
partkey = (i - 1) ÷ partsize + 1
|
||
if !haskey(parts, partkey)
|
||
parts[partkey] = Vector{Dict}()
|
||
end
|
||
push!(parts[partkey], dict)
|
||
end
|
||
return (datatype="vector{Dict}", totalparts=length(parts), partsize=partsize, dataparts=parts)
|
||
end
|
||
|
||
|
||
|
||
""" Get a value from a DataFrame row by a given key
|
||
|
||
# Arguments
|
||
- `row::DataFrameRow`
|
||
The DataFrame row to retrieve the value from.
|
||
- `key::Symbol`
|
||
The column name (as a symbol) whose value is to be retrieved.
|
||
|
||
# Return
|
||
- `Any`
|
||
The value of the specified column in the given row.
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using DataFrames
|
||
|
||
julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30])
|
||
2×2 DataFrame
|
||
Row │ name age
|
||
│ String Int64
|
||
┌─────┼─────────┼───────
|
||
│ 1 │ Alice 25
|
||
│ 2 │ Bob 30
|
||
|
||
julia> getDataFrameValue(df[1, :], :name)
|
||
"Alice"
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
getDataFrameValue(row::DataFrameRow, key::Symbol) = row.:($key)
|
||
|
||
|
||
""" Convert a DataFrame row to a key:value string
|
||
|
||
# Arguments
|
||
- `row::DataFrameRow`
|
||
The DataFrame row to convert.
|
||
|
||
# Return
|
||
- `String`
|
||
A string containing the formatted representation of the row, with each column prefixed by its name and separated by commas.
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using DataFrames
|
||
|
||
julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30])
|
||
2×2 DataFrame
|
||
Row │ name age
|
||
│ String Int64
|
||
┌─────┼─────────┼───────
|
||
│ 1 │ Alice 25
|
||
│ 2 │ Bob 30
|
||
|
||
julia> dfRowtoString(df[1, :])
|
||
"name: Alice, age: 25"
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function dfRowtoString(row::DataFrameRow)::String
|
||
str = ""
|
||
for key in keys(row)
|
||
value = getDataFrameValue(row, key)
|
||
str *= "$key: $value, "
|
||
end
|
||
result = str[1:end-2] # remove ", " at the end of row
|
||
return result
|
||
end
|
||
|
||
|
||
""" Convert a DataFrame to a string representation
|
||
|
||
# Arguments
|
||
- `df::DataFrame`
|
||
The DataFrame to convert, where each row will be converted to a string.
|
||
|
||
# Return
|
||
- `String`
|
||
A string containing the formatted representation of the DataFrame, with each row prefixed by its index and separated by newlines.
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using DataFrames
|
||
|
||
julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30])
|
||
2×2 DataFrame
|
||
Row │ name age
|
||
│ String Int64
|
||
┌─────┼─────────┼───────
|
||
│ 1 │ Alice 25
|
||
│ 2 │ Bob 30
|
||
|
||
julia> dfToString(df)
|
||
"1) name: Alice, age: 25\n2) name: Bob, age: 30"
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function dfToString(df::DataFrame)
|
||
dfstr = ""
|
||
for (i, row) in enumerate(eachrow(df))
|
||
rowstr = dfRowtoString(row)
|
||
dfstr *= "$i) $rowstr\n"
|
||
end
|
||
return dfstr
|
||
end
|
||
|
||
|
||
""" Convert a DataFrame to a list of JSON strings
|
||
|
||
# Arguments
|
||
- `df::DataFrame`
|
||
The DataFrame to convert, where each row will be converted to a JSON string.
|
||
|
||
# Return
|
||
- `Vector{String}`
|
||
A vector containing the JSON representation of each row in the DataFrame.
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using DataFrames
|
||
|
||
julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30])
|
||
2×2 DataFrame
|
||
Row │ name age
|
||
│ String Int64
|
||
┌─────┼─────────┼───────
|
||
│ 1 │ Alice 25
|
||
│ 2 │ Bob 30
|
||
|
||
julia> dataframe_to_json_list(df)
|
||
2-element Vector{String}:
|
||
"{\"name\":\"Alice\",\"age\":25}"
|
||
"{\"name\":\"Bob\",\"age\":30}"
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function dataframe_to_json_list(df::DataFrame)::Vector{String}
|
||
json_list = []
|
||
for row in eachrow(df)
|
||
json_row = Dict(zip(names(df), row))
|
||
push!(json_list, JSON.json(json_row))
|
||
end
|
||
return json_list
|
||
end
|
||
|
||
|
||
""" Convert a dictionary to a string representation.
|
||
|
||
# Arguments
|
||
- `od::OrderedDict`
|
||
The OrderedDict to convert, where each key-value pair will be represented as "index) key: value".
|
||
|
||
# Return
|
||
- `String`
|
||
A string containing the representation of each key-value pair in the OrderedDict.
|
||
|
||
# Example
|
||
```jldoctest
|
||
julia> using DataStructures
|
||
|
||
julia> od = OrderedDict("name" => "Alice", "age" => 25)
|
||
OrderedDict{String,Any} with 2 entries:
|
||
"name" => "Alice"
|
||
"age" => 25
|
||
|
||
julia> dict_to_string(od)
|
||
"1) name: Alice, 2) age: 25"
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function dict_to_string(od::T) where {T<:AbstractDict}
|
||
items = []
|
||
for (i, (key, value)) in enumerate(od)
|
||
push!(items, "$i) $key: $value")
|
||
end
|
||
return join(items, ", ")
|
||
end
|
||
|
||
|
||
"""
|
||
extract_triple_backtick_text(text::String) -> Vector{String}
|
||
|
||
Extracts text enclosed within triple backticks (```) from the given string.
|
||
|
||
# Arguments:
|
||
- `text::String`: The input string containing potential triple backtick blocks.
|
||
|
||
# Returns:
|
||
- `Vector{String}`: A vector of strings, each representing a block of text enclosed within triple backticks found in the input string.
|
||
|
||
# Examples:
|
||
```julia
|
||
julia> extract_triple_backtick_text("Here is some text ```with a code block``` and more text.")
|
||
1-element Vector{String}:
|
||
"with a code block"
|
||
"""
|
||
function extract_triple_backtick_text(input::String)::Vector{String}
|
||
# Regular expression to match text wrapped by triple backticks
|
||
regex = r"```([\s\S]*?)```"
|
||
|
||
# Find all matches in the input string
|
||
matches = collect(eachmatch(regex, input))
|
||
|
||
# Extract the matched text (excluding the backticks)
|
||
extracted_text = [m.captures[1] for m in matches]
|
||
|
||
return extracted_text
|
||
end
|
||
|
||
|
||
"""
|
||
detect_keyword(keyword::String, text::String) -> Union{Nothing, String}
|
||
|
||
Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
|
||
|
||
# Arguments:
|
||
- `keyword::String`: The keyword to search for
|
||
- `text::String`: The text to search in
|
||
|
||
# Returns:
|
||
- `Union{Nothing, String}`: Returns the matched keyword variation if found, otherwise returns nothing
|
||
|
||
# Examples:
|
||
```julia
|
||
julia> detect_keyword("test", "This is a Test case")
|
||
"Test"
|
||
|
||
julia> detect_keyword("error", "NO ERRORS FOUND")
|
||
"ERRORS"
|
||
|
||
julia> detect_keyword("missing", "complete data")
|
||
nothing
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function detect_keyword(keyword::String, text::String)::Union{Nothing, String}
|
||
# Define the keyword variations to search for
|
||
keyword_variations = [keyword, uppercasefirst(keyword), uppercase(keyword)]
|
||
|
||
# Check if any of the keyword variations are in the text
|
||
for variation in keyword_variations
|
||
if occursin(variation, text)
|
||
return variation
|
||
end
|
||
end
|
||
|
||
# Return nothing if no variation is found
|
||
return nothing
|
||
end
|
||
|
||
|
||
"""
|
||
countGivenWords(text::String, words::Vector{String}) -> Dict{String, Int}
|
||
|
||
Count the occurrences of each word in the given list within the provided text.
|
||
|
||
# Arguments
|
||
- `text::String`: The input text to search through.
|
||
- `words::Vector{String}`: A vector of words whose occurrences need to be counted.
|
||
|
||
# Returns
|
||
- `Vector{Int64}`: Their respective counts in the `text`.
|
||
|
||
# Examples
|
||
```julia
|
||
julia> GeneralUtils.countGivenWords("hello world hello", ["hello", "world"])
|
||
2-element Vector{Int64}:
|
||
2
|
||
1
|
||
|
||
julia> GeneralUtils.countGivenWords("foo bar baz foo", ["foo", "qux"])
|
||
2-element Vector{Int64}:
|
||
2
|
||
0
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function countGivenWords(text::String, words::Vector{String})::Vector{Int}
|
||
count = []
|
||
|
||
# loop through each word in words
|
||
for word in words
|
||
# initialize a counter for the current word
|
||
splittext = split(text, word)
|
||
splittext_length = length(splittext)
|
||
thisWordCount = splittext_length - 1
|
||
push!(count, thisWordCount)
|
||
end
|
||
return count
|
||
end
|
||
|
||
|
||
|
||
"""
|
||
remove_french_accents(text::String) -> String
|
||
|
||
Remove French accents from the given text.
|
||
|
||
# Arguments
|
||
- `text::String`: The input string containing French accents.
|
||
|
||
# Returns
|
||
- `String`: The input string with all French accents removed.
|
||
|
||
# Examples
|
||
```julia
|
||
julia> remove_french_accents("Café")
|
||
"Cafe"
|
||
|
||
julia> remove_french_accents("L'été est beau.")
|
||
"L'ete est beau."
|
||
```
|
||
|
||
# Signature
|
||
"""
|
||
function remove_french_accents(text::AbstractString)::AbstractString
|
||
textcharlist = [i for i in text]
|
||
|
||
# Create a dictionary to map accented characters to their replacements
|
||
accented_to_regular = Dict(
|
||
'à' => 'a', 'â' => 'a', 'ä' => 'a', 'á' => 'a',
|
||
'é' => 'e', 'è' => 'e', 'ê' => 'e', 'ë' => 'e',
|
||
'î' => 'i', 'ï' => 'i', 'í' => 'i',
|
||
'ñ' => 'n',
|
||
'ô' => 'o', 'ö' => 'o', 'ò' => 'o', 'ó' => 'o',
|
||
'ù' => 'u', 'û' => 'u', 'ü' => 'u',
|
||
'ÿ' => 'y',
|
||
'ç' => 'c',
|
||
'Ä' => 'A',
|
||
'É' => 'E',
|
||
'Ö' => 'O',
|
||
'Ü' => 'U',
|
||
'’' => ''',
|
||
)
|
||
|
||
accentedchar = keys(accented_to_regular)
|
||
|
||
# Replace accented characters in the text using accented_to_regular dictionary above
|
||
for (i, char) in enumerate(textcharlist)
|
||
if char ∈ accentedchar
|
||
textcharlist[i] = accented_to_regular[char]
|
||
end
|
||
end
|
||
|
||
cleaned_text = join(textcharlist)
|
||
return cleaned_text
|
||
end
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
end # module util |