module util export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, replaceDictKeys, findMatchingDictKey, textToDict, randstring, randstrings, timeout, dataframeToCSV, dfToVectorDict, disintegrate_vectorDict, getDataFrameValue, dfRowtoString, dfToString, dataframe_to_json_list, dict_to_string, extract_triple_backtick_text, countGivenWords, remove_french_accents using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames # ---------------------------------------------- 100 --------------------------------------------- # """ Compute time different between start time and stop time in a given unit. Unit can be "milliseconds", "seconds", "minutes", "hours". # Arguments - `starttime::DateTime` start time - `stoptime::DateTime` stop time - `unit::String` unit of time difference # Return - time difference in given unit # Example ```jldoctest julia> using Revise julia> using GeneralUtils, Dates julia> a = Dates.now() julia> b = a + Dates.Day(5) # add 5 days julia> GeneralUtils.timedifference(a, b, "hours") 120 ``` # Signature """ function timedifference(starttime::DateTime, stoptime::DateTime, unit::String)::Integer diff = stoptime - starttime unit = lowercase(unit) if unit == "milliseconds" return diff.value elseif unit == "seconds" return diff.value ÷ 1000 elseif unit == "minutes" return diff.value ÷ (1000 * 60) elseif unit == "hours" return diff.value ÷ (1000 * 60 * 60) else error("Invalid unit specified. Please choose from: milliseconds, seconds, minutes, hours") end end """ Capture then show error and stacktrace # Arguments - `f::Function` a function that might throws an error - `args` function f arguments # Return - `outcome::NamedTuple` (success, result, errormsg, st) # Example ```jldoctest julia> using Revise julia> using GeneralUtils, PrettyPrinting julia> testf(a, b) = a + b julia> success, result, errormsg, st = GeneralUtils.showstracktrace(testf, 5, "6") julia> pprint(st) 16-element Vector{Base.StackTraces.StackFrame}: testf(a::Int64, b::String) at REPL[12]:1 showstracktrace(::Function, ::Int64, ::Vararg{Any}) at util.jl:95 ... ``` # Signature """ function showstracktrace(f::Function, args...)::NamedTuple global st = nothing # stacktrace global errorMsg = nothing global success = false global fResult = nothing try success, fResult fResult = f(args...) success = true catch e io = IOBuffer() showerror(io, e) errorMsg = String(take!(io)) st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace())) @warn "Error occurred: $errorMsg\n$st" end return (success=success, result=fResult, errormsg=errorMsg, st=st) end """ Find all match key of a dictionary for a given key. # Arguments - `d<:AbstractDict` The dictionary to search for keys. - `text<:Symbol` The text to match against the keys. # Returns - `result::Vector{Symbol}` A vector of matched key # Examples ```jldoctest julia> using Revise julia> using GeneralUtils julia> d = Dict(:key_1 => "apple", :key_12 => "banana", :key_3 => "cherry") julia> GeneralUtils.findMatchingDictKey(d, "key_1") 2-element Vector{Symbol}: :key_1 :key_12 ``` # Signature """ function findMatchingDictKey(d::T, text::Union{String, Symbol} )::Vector{Symbol} where {T<:AbstractDict} _matching_keys = filter(k -> occursin(string(text), string(k)), keys(d)) matching_keys = collect(_matching_keys) # convert from Set into Array return matching_keys end """ Find the key in a dictionary `d` with the highest index value that matches a given `text`. # Arguments - `d<:AbstractDict` The dictionary to search for keys. - `text<:Union{String, Symbol}` The text to match against the keys. # Returns - `NamedTuple{(:result, :maxindice), Tuple{Union{Symbol, Nothing}, Union{Integer, Nothing}}}` The key in `d` with the highest index value that matches `text`, or `nothing` if no matches are found. # Examples ```jldoctest julia> using Revise julia> using GeneralUtils julia> d = Dict(:key_1 => "apple", :key_2 => "banana", :key_3 => "cherry") julia> GeneralUtils.findHighestIndexKey(d, "key") (:key_3, 3) ``` # Signature """ function findHighestIndexKey(d::T, text::Union{String, Symbol} )::NamedTuple{(:result, :maxindice), Tuple{Union{Symbol, Nothing}, Union{Integer, Nothing}}} where {T<:AbstractDict} matching_keys = findMatchingDictKey(d, text) if isempty(matching_keys) return (result=nothing, maxindice=nothing) elseif length(matching_keys) == 1 && matching_keys[1] == Symbol(text) return (result=Symbol(text), maxindice=nothing) else indices = parse.(Int, replace.(string.(matching_keys), r"[^\d]" => "")) maxIndexKey = matching_keys[argmax(indices)] return (result=maxIndexKey, maxindice=maximum(indices)) end end """ Get uuid4 with snake case # Return - `uuid4::String` uuid4 with snake case # Example ```jldoctest julia> using Revise julia> using GeneralUtils julia> GeneralUtils.uuid4snakecase() "0f6e4f_568c_4df4_8c79_1d7a58072f4a" ``` # Signature """ function uuid4snakecase()::String _id = string(uuid4()) id = replace(_id, "-" => "_") return id end """ Replace a dictionary key with the new key # Arguments - `d::Dict` The input dictionary that you want to modify - `replacementMap::Dict` A dictionary that maps old keys to new keys # Return - `newDict::Dict` new dictionary with the replaced keys # Example ```jldoctest julia> using Revise julia> using GeneralUtils julia> d = Dict(:a => 1, :b => 2, :c => 3) julia> replacement_map = Dict(:a => :x, :b => :y) julia> new_dict = GeneralUtils.replaceDictKeys(d, replacement_map) Dict{Any, Any} with 3 entries: :y => 2 :c => 3 :x => 1 ``` # Signature """ function replaceDictKeys(d::Dict, replacementMap::Dict)::Dict newDict = Dict() for (key, value) in d newKey = get(replacementMap, key, key) # Get the replacement key if it exists, otherwise keep the original key newDict[newKey] = value end return newDict end """ Convert text into a dictionary with a given keywords. This function use keywords to slice a given text into the following format: KW1|kw1_text|KW2|kw2_text|KW3|kw3_text. The left most string which has no keyword will be discarded. WARNING, ordering is important # Arguments - `text::String` A text to be converted. - `keywords::Vector{String}` A list of keywords to be used to slice the text. These keywords also be the resulting dict keys. # Keyword Arguments - `rightmarker::String` A maker used to make a word to be unique. Ex, A keyword "plan" with rightmarker ":", the function will search for "plan:" otherwise the function will search for "plan". The marker will not be in the resulting dict keys. - `symbolkey::Bool` If true, resulting dict's key will be Symbols, otherwise string. - `lowercasekey::Bool` set resulting dict's key to be lowercase # Return - `d::OrderedDict` # Example ```jldoctest julia> text = "TODAY thought: what to do plan: wake up and going out action: 1. wake up 2. eat 3. sleep" julia> sample_keywords = ["thought", "plan", "action"] julia> resultdict = GeneralUtils.textToDict(text, sample_keywords; rightmarker=":", symbolkey=true) julia> println(resultdict) OrderedCollections.OrderedDict{Any, Any}(:thought => "what to do", :plan => "wake up and going out", :action => "1. wake up 2. eat 3. sleep") ``` # Signature """ function textToDict(text::String, keywords::Vector{String}; rightmarker::Union{String, Nothing}=nothing, symbolkey::Bool=false, lowercasekey::Bool=false )::OrderedDict # make sure this function detect variation of a work e.g. agent, Agent, AGENT kw = [] # use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list for keyword in keywords push!(kw, detect_keyword(keyword, text)) end od1, od2 = if symbolkey OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}() else OrderedDict{String, Any}(), OrderedDict{String, Any}() end remainingtext = text for keyword in reverse(kw) mkeyword = rightmarker !== nothing ? keyword * rightmarker : keyword # Find the position of the keyword in the text keywordidx = findlast(mkeyword, remainingtext) if keywordidx !== nothing substr = remainingtext[keywordidx[end]+1:end] str = string(strip(substr)) # Removes both leading and trailing whitespace. _key = lowercasekey == true ? lowercase(keyword) : keyword key = symbolkey == true ? Symbol(_key) : _key od1[key] = str remainingtext = remainingtext[1:keywordidx[1]-1] else error("""keyword "$keyword" not found in the provided text""") end end kw = lowercasekey == true ? lowercase.(kw) : kw # correct the order for keyword in kw key = symbolkey == true ? Symbol(keyword) : keyword od2[key] = od1[key] end return od2 end """ Generate a random string # Arguments - `n::Integer` A number of string to be generated # Return - `s::String` # Example ```jldoctest julia> result = randstring(5) "fysmp" ``` # Signature """ randstring(n::Integer)::String = String(rand('a':'z', n)) """ Generate a random string in group # Arguments - `totalgroup::Integer` A number of group of random string to be generated - `stringlength::Integer` A number of string to be generated # Return - `s::String` # Example ```jldoctest julia> result = randstrings(3, 5) "fysmp cmhdk iuytr" ``` # Signature """ function randstrings(totalgroup::Integer, stringlength::Integer)::String str = "" for i in 1:totalgroup str *= randstring(stringlength) * " " end str = strip(str) return str end """ Execute a function with timer. # Arguments - `f::Function` a function to run - `timeoutwindow::Integer`` timeout in seconds # Keyword Argument - `fargs` arguments for the function - `timeoutmsg::String` time out message # Return - task result otherwise timeout message # Example ```jldoctest julia> function testfunc(x) sleep(x) return "task done" end julia> result = timeout(testfunc, 10; fargs=20) "task timed out" julia> result = timeout(testfunc, 20; fargs=10) "task done" ``` # Signature """ function timeout(f::Function, timeoutwindow::Integer; fargs=nothing, timeoutmsg="task timed out") tsk = @task f(fargs) schedule(tsk) Timer(timeoutwindow) do timer istaskdone(tsk) || Base.throwto(tsk, InterruptException()) end try fetch(tsk) catch _; timeoutmsg end end """ Convert a dataframe into CSV. # Arguments - `df::DataFrame` A connection object to Postgres database # Return - `result::String` # Example ```jldoctest julia> using DataFrames, GeneralUtils julia> df = DataFrame(A=1:3, B=5:7, fixed=1) julia> result = GeneralUtils.dataframeToCSV(df) ``` # Signature """ function dataframeToCSV(df::DataFrame) # Create an IOBuffer to capture the output io = IOBuffer() CSV.write(io, df) dfStr = String(take!(io)) return dfStr end """ Convert a DataFrame into a list of Dict rows. # Arguments - `df::DataFrame` The input DataFrame to be converted. # Return - `rows::Vector{Dict{String, Any}}` A vector of dictionaries, where each dictionary represents a row in a dataframe. # Example ```jldoctest julia> using DataFrames, JSON3, GeneralUtils julia> df = DataFrame(A = [1, 2, 3], B = ["apple", "banana", "cherry"]) julia> vectorDict = GeneralUtils.dfToVectorDict(df) [Dict{String, Any}("B" => "apple", "A" => 1), Dict{String, Any}("B" => "banana", "A" => 2) Dict{String, Any}("B" => "cherry", "A" => 3)] ``` # Signature """ function dfToVectorDict(df::DataFrame) vec = [] for row in eachrow(df) d = Dict{String, Any}() for col in names(df) d[col] = row[col] end push!(vec, d) end return vec end """ Turn a large vector of dictionaries into smaller one # Arguments - `data` data to be partioning - `partsize` how many dicts per part # Return - `parts` a dictionay of parts # Example ```jldoctest julia> using GeneralUtils, Dates, JSON3, UUIDs julia> vecDict = [Dict("a" => i) for i in 1:10] julia> d = GeneralUtils.disintegrate_vectorDict(vecDict, 3) julia> println(d[:data]) Dict{Int64, Vector{Dict}} with 4 entries: 1 => [Dict("a"=>1), Dict("a"=>2), Dict("a"=>3)] 2 => [Dict("a"=>4), Dict("a"=>5), Dict("a"=>6)] 3 => [Dict("a"=>7), Dict("a"=>8), Dict("a"=>9)] 4 => [Dict("a"=>10)] ``` # Signature """ function disintegrate_vectorDict(data::Vector, partsize::Integer ) println("--> disintegrate_vectorDict()") parts = Dict{Int, Vector{Dict}}() for (i, dict) in enumerate(data) # println("--> disintegrate_vectorDict ", i) partkey = (i - 1) ÷ partsize + 1 if !haskey(parts, partkey) parts[partkey] = Vector{Dict}() end push!(parts[partkey], dict) end return (datatype="vector{Dict}", totalparts=length(parts), partsize=partsize, dataparts=parts) end """ Get a value from a DataFrame row by a given key # Arguments - `row::DataFrameRow` The DataFrame row to retrieve the value from. - `key::Symbol` The column name (as a symbol) whose value is to be retrieved. # Return - `Any` The value of the specified column in the given row. # Example ```jldoctest julia> using DataFrames julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30]) 2×2 DataFrame Row │ name age │ String Int64 ┌─────┼─────────┼─────── │ 1 │ Alice 25 │ 2 │ Bob 30 julia> getDataFrameValue(df[1, :], :name) "Alice" ``` # Signature """ getDataFrameValue(row::DataFrameRow, key::Symbol) = row.:($key) """ Convert a DataFrame row to a key:value string # Arguments - `row::DataFrameRow` The DataFrame row to convert. # Return - `String` A string containing the formatted representation of the row, with each column prefixed by its name and separated by commas. # Example ```jldoctest julia> using DataFrames julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30]) 2×2 DataFrame Row │ name age │ String Int64 ┌─────┼─────────┼─────── │ 1 │ Alice 25 │ 2 │ Bob 30 julia> dfRowtoString(df[1, :]) "name: Alice, age: 25" ``` # Signature """ function dfRowtoString(row::DataFrameRow)::String str = "" for key in keys(row) value = getDataFrameValue(row, key) str *= "$key: $value, " end result = str[1:end-2] # remove ", " at the end of row return result end """ Convert a DataFrame to a string representation # Arguments - `df::DataFrame` The DataFrame to convert, where each row will be converted to a string. # Return - `String` A string containing the formatted representation of the DataFrame, with each row prefixed by its index and separated by newlines. # Example ```jldoctest julia> using DataFrames julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30]) 2×2 DataFrame Row │ name age │ String Int64 ┌─────┼─────────┼─────── │ 1 │ Alice 25 │ 2 │ Bob 30 julia> dfToString(df) "1) name: Alice, age: 25\n2) name: Bob, age: 30" ``` # Signature """ function dfToString(df::DataFrame) dfstr = "" for (i, row) in enumerate(eachrow(df)) rowstr = dfRowtoString(row) dfstr *= "$i) $rowstr\n" end return dfstr end """ Convert a DataFrame to a list of JSON strings # Arguments - `df::DataFrame` The DataFrame to convert, where each row will be converted to a JSON string. # Return - `Vector{String}` A vector containing the JSON representation of each row in the DataFrame. # Example ```jldoctest julia> using DataFrames julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30]) 2×2 DataFrame Row │ name age │ String Int64 ┌─────┼─────────┼─────── │ 1 │ Alice 25 │ 2 │ Bob 30 julia> dataframe_to_json_list(df) 2-element Vector{String}: "{\"name\":\"Alice\",\"age\":25}" "{\"name\":\"Bob\",\"age\":30}" ``` # Signature """ function dataframe_to_json_list(df::DataFrame)::Vector{String} json_list = [] for row in eachrow(df) json_row = Dict(zip(names(df), row)) push!(json_list, JSON.json(json_row)) end return json_list end """ Convert a dictionary to a string representation. # Arguments - `od::OrderedDict` The OrderedDict to convert, where each key-value pair will be represented as "index) key: value". # Return - `String` A string containing the representation of each key-value pair in the OrderedDict. # Example ```jldoctest julia> using DataStructures julia> od = OrderedDict("name" => "Alice", "age" => 25) OrderedDict{String,Any} with 2 entries: "name" => "Alice" "age" => 25 julia> dict_to_string(od) "1) name: Alice, 2) age: 25" ``` # Signature """ function dict_to_string(od::T) where {T<:AbstractDict} items = [] for (i, (key, value)) in enumerate(od) push!(items, "$i) $key: $value") end return join(items, ", ") end """ extract_triple_backtick_text(text::String) -> Vector{String} Extracts text enclosed within triple backticks (```) from the given string. # Arguments: - `text::String`: The input string containing potential triple backtick blocks. # Returns: - `Vector{String}`: A vector of strings, each representing a block of text enclosed within triple backticks found in the input string. # Examples: ```julia julia> extract_triple_backtick_text("Here is some text ```with a code block``` and more text.") 1-element Vector{String}: "with a code block" """ function extract_triple_backtick_text(input::String)::Vector{String} # Regular expression to match text wrapped by triple backticks regex = r"```([\s\S]*?)```" # Find all matches in the input string matches = collect(eachmatch(regex, input)) # Extract the matched text (excluding the backticks) extracted_text = [m.captures[1] for m in matches] return extracted_text end """ detect_keyword(keyword::String, text::String) -> Union{Nothing, String} Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase). # Arguments: - `keyword::String`: The keyword to search for - `text::String`: The text to search in # Returns: - `Union{Nothing, String}`: Returns the matched keyword variation if found, otherwise returns nothing # Examples: ```julia julia> detect_keyword("test", "This is a Test case") "Test" julia> detect_keyword("error", "NO ERRORS FOUND") "ERRORS" julia> detect_keyword("missing", "complete data") nothing ``` # Signature """ function detect_keyword(keyword::String, text::String)::Union{Nothing, String} # Define the keyword variations to search for keyword_variations = [keyword, uppercasefirst(keyword), uppercase(keyword)] # Check if any of the keyword variations are in the text for variation in keyword_variations if occursin(variation, text) return variation end end # Return nothing if no variation is found return nothing end """ countGivenWords(text::String, words::Vector{String}) -> Dict{String, Int} Count the occurrences of each word in the given list within the provided text. # Arguments - `text::String`: The input text to search through. - `words::Vector{String}`: A vector of words whose occurrences need to be counted. # Returns - `Vector{Int64}`: Their respective counts in the `text`. # Examples ```julia julia> GeneralUtils.countGivenWords("hello world hello", ["hello", "world"]) 2-element Vector{Int64}: 2 1 julia> GeneralUtils.countGivenWords("foo bar baz foo", ["foo", "qux"]) 2-element Vector{Int64}: 2 0 ``` # Signature """ function countGivenWords(text::String, words::Vector{String})::Vector{Int} count = [] # loop through each word in words for word in words # initialize a counter for the current word splittext = split(text, word) splittext_length = length(splittext) thisWordCount = splittext_length - 1 push!(count, thisWordCount) end return count end """ remove_french_accents(text::String) -> String Remove French accents from the given text. # Arguments - `text::String`: The input string containing French accents. # Returns - `String`: The input string with all French accents removed. # Examples ```julia julia> remove_french_accents("Café") "Cafe" julia> remove_french_accents("L'été est beau.") "L'ete est beau." ``` # Signature """ function remove_french_accents(text::AbstractString)::AbstractString textcharlist = [i for i in text] # Create a dictionary to map accented characters to their replacements accented_to_regular = Dict( 'à' => 'a', 'â' => 'a', 'ä' => 'a', 'á' => 'a', 'é' => 'e', 'è' => 'e', 'ê' => 'e', 'ë' => 'e', 'î' => 'i', 'ï' => 'i', 'í' => 'i', 'ñ' => 'n', 'ô' => 'o', 'ö' => 'o', 'ò' => 'o', 'ó' => 'o', 'ù' => 'u', 'û' => 'u', 'ü' => 'u', 'ÿ' => 'y', 'ç' => 'c', 'Ä' => 'A', 'É' => 'E', 'Ö' => 'O', 'Ü' => 'U', '’' => ''', ) accentedchar = keys(accented_to_regular) # Replace accented characters in the text using accented_to_regular dictionary above for (i, char) in enumerate(textcharlist) if char ∈ accentedchar textcharlist[i] = accented_to_regular[char] end end cleaned_text = join(textcharlist) return cleaned_text end end # module util