Files
GeneralUtils/src/util.jl
2025-01-06 06:19:28 +07:00

906 lines
22 KiB
Julia
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
module util
export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, replaceDictKeys,
findMatchingDictKey, textToDict, randstring, randstrings, timeout,
dataframeToCSV, dfToVectorDict, disintegrate_vectorDict, getDataFrameValue, dfRowtoString,
dfToString, dataframe_to_json_list, dict_to_string, extract_triple_backtick_text,
countGivenWords, remove_french_accents
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames
# ---------------------------------------------- 100 --------------------------------------------- #
""" Compute time different between start time and stop time in a given unit.
Unit can be "milliseconds", "seconds", "minutes", "hours".
# Arguments
- `starttime::DateTime`
start time
- `stoptime::DateTime`
stop time
- `unit::String`
unit of time difference
# Return
- time difference in given unit
# Example
```jldoctest
julia> using Revise
julia> using GeneralUtils, Dates
julia> a = Dates.now()
julia> b = a + Dates.Day(5) # add 5 days
julia> GeneralUtils.timedifference(a, b, "hours")
120
```
# Signature
"""
function timedifference(starttime::DateTime, stoptime::DateTime, unit::String)::Integer
diff = stoptime - starttime
unit = lowercase(unit)
if unit == "milliseconds"
return diff.value
elseif unit == "seconds"
return diff.value ÷ 1000
elseif unit == "minutes"
return diff.value ÷ (1000 * 60)
elseif unit == "hours"
return diff.value ÷ (1000 * 60 * 60)
else
error("Invalid unit specified. Please choose from: milliseconds, seconds, minutes, hours")
end
end
""" Capture then show error and stacktrace
# Arguments
- `f::Function`
a function that might throws an error
- `args` function f arguments
# Return
- `outcome::NamedTuple`
(success, result, errormsg, st)
# Example
```jldoctest
julia> using Revise
julia> using GeneralUtils, PrettyPrinting
julia> testf(a, b) = a + b
julia> success, result, errormsg, st = GeneralUtils.showstracktrace(testf, 5, "6")
julia> pprint(st)
16-element Vector{Base.StackTraces.StackFrame}:
testf(a::Int64, b::String) at REPL[12]:1
showstracktrace(::Function, ::Int64, ::Vararg{Any}) at util.jl:95
...
```
# Signature
"""
function showstracktrace(f::Function, args...)::NamedTuple
global st = nothing # stacktrace
global errorMsg = nothing
global success = false
global fResult = nothing
try
success, fResult
fResult = f(args...)
success = true
catch e
io = IOBuffer()
showerror(io, e)
errorMsg = String(take!(io))
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
@warn "Error occurred: $errorMsg\n$st"
end
return (success=success, result=fResult, errormsg=errorMsg, st=st)
end
""" Find all match key of a dictionary for a given key.
# Arguments
- `d<:AbstractDict`
The dictionary to search for keys.
- `text<:Symbol`
The text to match against the keys.
# Returns
- `result::Vector{Symbol}`
A vector of matched key
# Examples
```jldoctest
julia> using Revise
julia> using GeneralUtils
julia> d = Dict(:key_1 => "apple", :key_12 => "banana", :key_3 => "cherry")
julia> GeneralUtils.findMatchingDictKey(d, "key_1")
2-element Vector{Symbol}:
:key_1
:key_12
```
# Signature
"""
function findMatchingDictKey(d::T, text::Union{String, Symbol}
)::Vector{Symbol} where {T<:AbstractDict}
_matching_keys = filter(k -> occursin(string(text), string(k)), keys(d))
matching_keys = collect(_matching_keys) # convert from Set into Array
return matching_keys
end
"""
Find the key in a dictionary `d` with the highest index value that matches a given `text`.
# Arguments
- `d<:AbstractDict`
The dictionary to search for keys.
- `text<:Union{String, Symbol}`
The text to match against the keys.
# Returns
- `NamedTuple{(:result, :maxindice), Tuple{Union{Symbol, Nothing}, Union{Integer, Nothing}}}`
The key in `d` with the highest index value that matches `text`, or `nothing` if no matches are found.
# Examples
```jldoctest
julia> using Revise
julia> using GeneralUtils
julia> d = Dict(:key_1 => "apple", :key_2 => "banana", :key_3 => "cherry")
julia> GeneralUtils.findHighestIndexKey(d, "key")
(:key_3, 3)
```
# Signature
"""
function findHighestIndexKey(d::T, text::Union{String, Symbol}
)::NamedTuple{(:result, :maxindice), Tuple{Union{Symbol, Nothing}, Union{Integer, Nothing}}} where {T<:AbstractDict}
matching_keys = findMatchingDictKey(d, text)
if isempty(matching_keys)
return (result=nothing, maxindice=nothing)
elseif length(matching_keys) == 1 && matching_keys[1] == Symbol(text)
return (result=Symbol(text), maxindice=nothing)
else
indices = parse.(Int, replace.(string.(matching_keys), r"[^\d]" => ""))
maxIndexKey = matching_keys[argmax(indices)]
return (result=maxIndexKey, maxindice=maximum(indices))
end
end
""" Get uuid4 with snake case
# Return
- `uuid4::String`
uuid4 with snake case
# Example
```jldoctest
julia> using Revise
julia> using GeneralUtils
julia> GeneralUtils.uuid4snakecase()
"0f6e4f_568c_4df4_8c79_1d7a58072f4a"
```
# Signature
"""
function uuid4snakecase()::String
_id = string(uuid4())
id = replace(_id, "-" => "_")
return id
end
""" Replace a dictionary key with the new key
# Arguments
- `d::Dict`
The input dictionary that you want to modify
- `replacementMap::Dict`
A dictionary that maps old keys to new keys
# Return
- `newDict::Dict`
new dictionary with the replaced keys
# Example
```jldoctest
julia> using Revise
julia> using GeneralUtils
julia> d = Dict(:a => 1, :b => 2, :c => 3)
julia> replacement_map = Dict(:a => :x, :b => :y)
julia> new_dict = GeneralUtils.replaceDictKeys(d, replacement_map)
Dict{Any, Any} with 3 entries:
:y => 2
:c => 3
:x => 1
```
# Signature
"""
function replaceDictKeys(d::Dict, replacementMap::Dict)::Dict
newDict = Dict()
for (key, value) in d
newKey = get(replacementMap, key, key) # Get the replacement key if it exists, otherwise keep the original key
newDict[newKey] = value
end
return newDict
end
""" Convert text into a dictionary with a given keywords. This function use keywords to slice
a given text into the following format: KW1|kw1_text|KW2|kw2_text|KW3|kw3_text.
The left most string which has no keyword will be discarded. WARNING, ordering is important
# Arguments
- `text::String`
A text to be converted.
- `keywords::Vector{String}`
A list of keywords to be used to slice the text.
These keywords also be the resulting dict keys.
# Keyword Arguments
- `rightmarker::String`
A maker used to make a word to be unique. Ex, A keyword "plan" with rightmarker ":",
the function will search for "plan:" otherwise the function will search for "plan".
The marker will not be in the resulting dict keys.
- `symbolkey::Bool`
If true, resulting dict's key will be Symbols, otherwise string.
- `lowercasekey::Bool`
set resulting dict's key to be lowercase
# Return
- `d::OrderedDict`
# Example
```jldoctest
julia> text = "TODAY thought: what to do plan: wake up and going out action: 1. wake up 2. eat 3. sleep"
julia> sample_keywords = ["thought", "plan", "action"]
julia> resultdict = GeneralUtils.textToDict(text, sample_keywords; rightmarker=":", symbolkey=true)
julia> println(resultdict)
OrderedCollections.OrderedDict{Any, Any}(:thought => "what to do",
:plan => "wake up and going out",
:action => "1. wake up 2. eat 3. sleep")
```
# Signature
"""
function textToDict(text::String, keywords::Vector{String};
rightmarker::Union{String, Nothing}=nothing, symbolkey::Bool=false, lowercasekey::Bool=false
)::OrderedDict
# make sure this function detect variation of a work e.g. agent, Agent, AGENT
kw = []
# use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
for keyword in keywords
push!(kw, detect_keyword(keyword, text))
end
od1, od2 =
if symbolkey
OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}()
else
OrderedDict{String, Any}(), OrderedDict{String, Any}()
end
remainingtext = text
for keyword in reverse(kw)
mkeyword = rightmarker !== nothing ? keyword * rightmarker : keyword
# Find the position of the keyword in the text
keywordidx = findlast(mkeyword, remainingtext)
if keywordidx !== nothing
substr = remainingtext[keywordidx[end]+1:end]
str = string(strip(substr)) # Removes both leading and trailing whitespace.
_key = lowercasekey == true ? lowercase(keyword) : keyword
key = symbolkey == true ? Symbol(_key) : _key
od1[key] = str
remainingtext = remainingtext[1:keywordidx[1]-1]
else
error("""keyword "$keyword" not found in the provided text""")
end
end
kw = lowercasekey == true ? lowercase.(kw) : kw
# correct the order
for keyword in kw
key = symbolkey == true ? Symbol(keyword) : keyword
od2[key] = od1[key]
end
return od2
end
""" Generate a random string
# Arguments
- `n::Integer`
A number of string to be generated
# Return
- `s::String`
# Example
```jldoctest
julia> result = randstring(5)
"fysmp"
```
# Signature
"""
randstring(n::Integer)::String = String(rand('a':'z', n))
""" Generate a random string in group
# Arguments
- `totalgroup::Integer`
A number of group of random string to be generated
- `stringlength::Integer`
A number of string to be generated
# Return
- `s::String`
# Example
```jldoctest
julia> result = randstrings(3, 5)
"fysmp cmhdk iuytr"
```
# Signature
"""
function randstrings(totalgroup::Integer, stringlength::Integer)::String
str = ""
for i in 1:totalgroup
str *= randstring(stringlength) * " "
end
str = strip(str)
return str
end
""" Execute a function with timer.
# Arguments
- `f::Function`
a function to run
- `timeoutwindow::Integer``
timeout in seconds
# Keyword Argument
- `fargs`
arguments for the function
- `timeoutmsg::String`
time out message
# Return
- task result otherwise timeout message
# Example
```jldoctest
julia> function testfunc(x)
sleep(x)
return "task done"
end
julia> result = timeout(testfunc, 10; fargs=20)
"task timed out"
julia> result = timeout(testfunc, 20; fargs=10)
"task done"
```
# Signature
"""
function timeout(f::Function, timeoutwindow::Integer; fargs=nothing, timeoutmsg="task timed out")
tsk = @task f(fargs)
schedule(tsk)
Timer(timeoutwindow) do timer
istaskdone(tsk) || Base.throwto(tsk, InterruptException())
end
try
fetch(tsk)
catch _;
timeoutmsg
end
end
""" Convert a dataframe into CSV.
# Arguments
- `df::DataFrame`
A connection object to Postgres database
# Return
- `result::String`
# Example
```jldoctest
julia> using DataFrames, GeneralUtils
julia> df = DataFrame(A=1:3, B=5:7, fixed=1)
julia> result = GeneralUtils.dataframeToCSV(df)
```
# Signature
"""
function dataframeToCSV(df::DataFrame)
# Create an IOBuffer to capture the output
io = IOBuffer()
CSV.write(io, df)
dfStr = String(take!(io))
return dfStr
end
""" Convert a DataFrame into a list of Dict rows.
# Arguments
- `df::DataFrame`
The input DataFrame to be converted.
# Return
- `rows::Vector{Dict{String, Any}}`
A vector of dictionaries, where each dictionary represents a row in a dataframe.
# Example
```jldoctest
julia> using DataFrames, JSON3, GeneralUtils
julia> df = DataFrame(A = [1, 2, 3], B = ["apple", "banana", "cherry"])
julia> vectorDict = GeneralUtils.dfToVectorDict(df)
[Dict{String, Any}("B" => "apple", "A" => 1),
Dict{String, Any}("B" => "banana", "A" => 2)
Dict{String, Any}("B" => "cherry", "A" => 3)]
```
# Signature
"""
function dfToVectorDict(df::DataFrame)
vec = []
for row in eachrow(df)
d = Dict{String, Any}()
for col in names(df)
d[col] = row[col]
end
push!(vec, d)
end
return vec
end
""" Turn a large vector of dictionaries into smaller one
# Arguments
- `data`
data to be partioning
- `partsize`
how many dicts per part
# Return
- `parts`
a dictionay of parts
# Example
```jldoctest
julia> using GeneralUtils, Dates, JSON3, UUIDs
julia> vecDict = [Dict("a" => i) for i in 1:10]
julia> d = GeneralUtils.disintegrate_vectorDict(vecDict, 3)
julia> println(d[:data])
Dict{Int64, Vector{Dict}} with 4 entries:
1 => [Dict("a"=>1), Dict("a"=>2), Dict("a"=>3)]
2 => [Dict("a"=>4), Dict("a"=>5), Dict("a"=>6)]
3 => [Dict("a"=>7), Dict("a"=>8), Dict("a"=>9)]
4 => [Dict("a"=>10)]
```
# Signature
"""
function disintegrate_vectorDict(data::Vector, partsize::Integer
)
println("--> disintegrate_vectorDict()")
parts = Dict{Int, Vector{Dict}}()
for (i, dict) in enumerate(data)
# println("--> disintegrate_vectorDict ", i)
partkey = (i - 1) ÷ partsize + 1
if !haskey(parts, partkey)
parts[partkey] = Vector{Dict}()
end
push!(parts[partkey], dict)
end
return (datatype="vector{Dict}", totalparts=length(parts), partsize=partsize, dataparts=parts)
end
""" Get a value from a DataFrame row by a given key
# Arguments
- `row::DataFrameRow`
The DataFrame row to retrieve the value from.
- `key::Symbol`
The column name (as a symbol) whose value is to be retrieved.
# Return
- `Any`
The value of the specified column in the given row.
# Example
```jldoctest
julia> using DataFrames
julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30])
2×2 DataFrame
Row │ name age
│ String Int64
┌─────┼─────────┼───────
│ 1 │ Alice 25
│ 2 │ Bob 30
julia> getDataFrameValue(df[1, :], :name)
"Alice"
```
# Signature
"""
getDataFrameValue(row::DataFrameRow, key::Symbol) = row.:($key)
""" Convert a DataFrame row to a key:value string
# Arguments
- `row::DataFrameRow`
The DataFrame row to convert.
# Return
- `String`
A string containing the formatted representation of the row, with each column prefixed by its name and separated by commas.
# Example
```jldoctest
julia> using DataFrames
julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30])
2×2 DataFrame
Row │ name age
│ String Int64
┌─────┼─────────┼───────
│ 1 │ Alice 25
│ 2 │ Bob 30
julia> dfRowtoString(df[1, :])
"name: Alice, age: 25"
```
# Signature
"""
function dfRowtoString(row::DataFrameRow)::String
str = ""
for key in keys(row)
value = getDataFrameValue(row, key)
str *= "$key: $value, "
end
result = str[1:end-2] # remove ", " at the end of row
return result
end
""" Convert a DataFrame to a string representation
# Arguments
- `df::DataFrame`
The DataFrame to convert, where each row will be converted to a string.
# Return
- `String`
A string containing the formatted representation of the DataFrame, with each row prefixed by its index and separated by newlines.
# Example
```jldoctest
julia> using DataFrames
julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30])
2×2 DataFrame
Row │ name age
│ String Int64
┌─────┼─────────┼───────
│ 1 │ Alice 25
│ 2 │ Bob 30
julia> dfToString(df)
"1) name: Alice, age: 25\n2) name: Bob, age: 30"
```
# Signature
"""
function dfToString(df::DataFrame)
dfstr = ""
for (i, row) in enumerate(eachrow(df))
rowstr = dfRowtoString(row)
dfstr *= "$i) $rowstr\n"
end
return dfstr
end
""" Convert a DataFrame to a list of JSON strings
# Arguments
- `df::DataFrame`
The DataFrame to convert, where each row will be converted to a JSON string.
# Return
- `Vector{String}`
A vector containing the JSON representation of each row in the DataFrame.
# Example
```jldoctest
julia> using DataFrames
julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30])
2×2 DataFrame
Row │ name age
│ String Int64
┌─────┼─────────┼───────
│ 1 │ Alice 25
│ 2 │ Bob 30
julia> dataframe_to_json_list(df)
2-element Vector{String}:
"{\"name\":\"Alice\",\"age\":25}"
"{\"name\":\"Bob\",\"age\":30}"
```
# Signature
"""
function dataframe_to_json_list(df::DataFrame)::Vector{String}
json_list = []
for row in eachrow(df)
json_row = Dict(zip(names(df), row))
push!(json_list, JSON.json(json_row))
end
return json_list
end
""" Convert a dictionary to a string representation.
# Arguments
- `od::OrderedDict`
The OrderedDict to convert, where each key-value pair will be represented as "index) key: value".
# Return
- `String`
A string containing the representation of each key-value pair in the OrderedDict.
# Example
```jldoctest
julia> using DataStructures
julia> od = OrderedDict("name" => "Alice", "age" => 25)
OrderedDict{String,Any} with 2 entries:
"name" => "Alice"
"age" => 25
julia> dict_to_string(od)
"1) name: Alice, 2) age: 25"
```
# Signature
"""
function dict_to_string(od::T) where {T<:AbstractDict}
items = []
for (i, (key, value)) in enumerate(od)
push!(items, "$i) $key: $value")
end
return join(items, ", ")
end
"""
extract_triple_backtick_text(text::String) -> Vector{String}
Extracts text enclosed within triple backticks (```) from the given string.
# Arguments:
- `text::String`: The input string containing potential triple backtick blocks.
# Returns:
- `Vector{String}`: A vector of strings, each representing a block of text enclosed within triple backticks found in the input string.
# Examples:
```julia
julia> extract_triple_backtick_text("Here is some text ```with a code block``` and more text.")
1-element Vector{String}:
"with a code block"
"""
function extract_triple_backtick_text(input::String)::Vector{String}
# Regular expression to match text wrapped by triple backticks
regex = r"```([\s\S]*?)```"
# Find all matches in the input string
matches = collect(eachmatch(regex, input))
# Extract the matched text (excluding the backticks)
extracted_text = [m.captures[1] for m in matches]
return extracted_text
end
"""
detect_keyword(keyword::String, text::String) -> Union{Nothing, String}
Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase).
# Arguments:
- `keyword::String`: The keyword to search for
- `text::String`: The text to search in
# Returns:
- `Union{Nothing, String}`: Returns the matched keyword variation if found, otherwise returns nothing
# Examples:
```julia
julia> detect_keyword("test", "This is a Test case")
"Test"
julia> detect_keyword("error", "NO ERRORS FOUND")
"ERRORS"
julia> detect_keyword("missing", "complete data")
nothing
```
# Signature
"""
function detect_keyword(keyword::String, text::String)::Union{Nothing, String}
# Define the keyword variations to search for
keyword_variations = [keyword, uppercasefirst(keyword), uppercase(keyword)]
# Check if any of the keyword variations are in the text
for variation in keyword_variations
if occursin(variation, text)
return variation
end
end
# Return nothing if no variation is found
return nothing
end
"""
countGivenWords(text::String, words::Vector{String}) -> Dict{String, Int}
Count the occurrences of each word in the given list within the provided text.
# Arguments
- `text::String`: The input text to search through.
- `words::Vector{String}`: A vector of words whose occurrences need to be counted.
# Returns
- `Vector{Int64}`: Their respective counts in the `text`.
# Examples
```julia
julia> GeneralUtils.countGivenWords("hello world hello", ["hello", "world"])
2-element Vector{Int64}:
2
1
julia> GeneralUtils.countGivenWords("foo bar baz foo", ["foo", "qux"])
2-element Vector{Int64}:
2
0
```
# Signature
"""
function countGivenWords(text::String, words::Vector{String})::Vector{Int}
count = []
# loop through each word in words
for word in words
# initialize a counter for the current word
splittext = split(text, word)
splittext_length = length(splittext)
thisWordCount = splittext_length - 1
push!(count, thisWordCount)
end
return count
end
"""
remove_french_accents(text::String) -> String
Remove French accents from the given text.
# Arguments
- `text::String`: The input string containing French accents.
# Returns
- `String`: The input string with all French accents removed.
# Examples
```julia
julia> remove_french_accents("Café")
"Cafe"
julia> remove_french_accents("L'été est beau.")
"L'ete est beau."
```
# Signature
"""
function remove_french_accents(text::AbstractString)::AbstractString
textcharlist = [i for i in text]
# Create a dictionary to map accented characters to their replacements
accented_to_regular = Dict(
'à' => 'a', 'â' => 'a', 'ä' => 'a', 'á' => 'a',
'é' => 'e', 'è' => 'e', 'ê' => 'e', 'ë' => 'e',
'î' => 'i', 'ï' => 'i', 'í' => 'i',
'ñ' => 'n',
'ô' => 'o', 'ö' => 'o', 'ò' => 'o', 'ó' => 'o',
'ù' => 'u', 'û' => 'u', 'ü' => 'u',
'ÿ' => 'y',
'ç' => 'c',
'Ä' => 'A',
'É' => 'E',
'Ö' => 'O',
'Ü' => 'U',
'' => ''',
)
accentedchar = keys(accented_to_regular)
# Replace accented characters in the text using accented_to_regular dictionary above
for (i, char) in enumerate(textcharlist)
if char accentedchar
textcharlist[i] = accented_to_regular[char]
end
end
cleaned_text = join(textcharlist)
return cleaned_text
end
end # module util