GeneralUtils/src/util.jl

module util

export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, replaceDictKeys,
        findMatchingDictKey, textToDict, randstring, randstrings, timeout,
        dataframeToCSV, dfToVectorDict, disintegrate_vectorDict, getDataFrameValue, dfRowtoString,
        dfToString, dataframe_to_json_list, dict_to_string, extract_triple_backtick_text,
        countGivenWords, remove_french_accents

using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames

# ---------------------------------------------- 100 --------------------------------------------- #

""" Compute time different between start time and stop time in a given unit.
 Unit can be "milliseconds", "seconds", "minutes", "hours".

# Arguments
  - `starttime::DateTime`
    start time
  - `stoptime::DateTime`
    stop time
  - `unit::String`
    unit of time difference

# Return
  - time difference in given unit

# Example
```jldoctest
julia> using Revise
julia> using GeneralUtils, Dates
julia> a = Dates.now()
julia> b = a + Dates.Day(5) # add 5 days
julia> GeneralUtils.timedifference(a, b, "hours")
120
```

# Signature
"""
function timedifference(starttime::DateTime, stoptime::DateTime, unit::String)::Integer
  diff = stoptime - starttime
  unit = lowercase(unit)

  if unit == "milliseconds"
      return diff.value
  elseif unit == "seconds"
      return diff.value ÷ 1000
  elseif unit == "minutes"
      return diff.value ÷ (1000 * 60)
  elseif unit == "hours"
      return diff.value ÷ (1000 * 60 * 60)
  else
      error("Invalid unit specified. Please choose from: milliseconds, seconds, minutes, hours")
  end
end


""" Capture then show error and stacktrace

# Arguments
  - `f::Function`
    a function that might throws an error
  - `args` function f arguments

# Return
  - `outcome::NamedTuple`
    (success, result, errormsg, st)

# Example
```jldoctest
julia> using Revise
julia> using GeneralUtils, PrettyPrinting
julia> testf(a, b) = a + b
julia> success, result, errormsg, st = GeneralUtils.showstracktrace(testf, 5, "6")
julia> pprint(st)
16-element Vector{Base.StackTraces.StackFrame}:
testf(a::Int64, b::String) at REPL[12]:1
showstracktrace(::Function, ::Int64, ::Vararg{Any}) at util.jl:95
...
```

# Signature
"""
function showstracktrace(f::Function, args...)::NamedTuple
  global st = nothing         # stacktrace
  global errorMsg = nothing
  global success = false
  global fResult = nothing

  try
    success, fResult
    fResult = f(args...)
    success = true
  catch e
    io = IOBuffer()
    showerror(io, e)
    errorMsg = String(take!(io))

    st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
    @warn "Error occurred: $errorMsg\n$st"
  end

  return (success=success, result=fResult, errormsg=errorMsg, st=st)
end

""" Find all match key of a dictionary for a given key.

# Arguments
  - `d<:AbstractDict`
    The dictionary to search for keys.
  - `text<:Symbol`
    The text to match against the keys.

# Returns
  - `result::Vector{Symbol}`
    A vector of matched key


# Examples
```jldoctest
julia> using Revise
julia> using GeneralUtils
julia> d = Dict(:key_1 => "apple", :key_12 => "banana", :key_3 => "cherry")
julia> GeneralUtils.findMatchingDictKey(d, "key_1")
2-element Vector{Symbol}:
 :key_1
 :key_12
```

# Signature
"""
function findMatchingDictKey(d::T, text::Union{String, Symbol}
  )::Vector{Symbol} where {T<:AbstractDict}

  _matching_keys = filter(k -> occursin(string(text), string(k)), keys(d))
  matching_keys = collect(_matching_keys) # convert from Set into Array

  return matching_keys
end


"""
Find the key in a dictionary `d` with the highest index value that matches a given `text`.

# Arguments
  - `d<:AbstractDict`
    The dictionary to search for keys.
  - `text<:Union{String, Symbol}`
    The text to match against the keys.

# Returns
  - `NamedTuple{(:result, :maxindice), Tuple{Union{Symbol, Nothing}, Union{Integer, Nothing}}}`
    The key in `d` with the highest index value that matches `text`, or `nothing` if no matches are found.

# Examples
```jldoctest
julia> using Revise
julia> using GeneralUtils
julia> d = Dict(:key_1 => "apple", :key_2 => "banana", :key_3 => "cherry")
julia> GeneralUtils.findHighestIndexKey(d, "key")
(:key_3, 3)
```

# Signature
"""
function findHighestIndexKey(d::T, text::Union{String, Symbol}
  )::NamedTuple{(:result, :maxindice), Tuple{Union{Symbol, Nothing}, Union{Integer, Nothing}}} where {T<:AbstractDict}

  matching_keys = findMatchingDictKey(d, text)

  if isempty(matching_keys)
    return (result=nothing, maxindice=nothing)
  elseif length(matching_keys) == 1 && matching_keys[1] == Symbol(text)
    return (result=Symbol(text), maxindice=nothing)
  else
    indices = parse.(Int, replace.(string.(matching_keys), r"[^\d]" => ""))
    maxIndexKey = matching_keys[argmax(indices)]
    return (result=maxIndexKey, maxindice=maximum(indices))
  end
end


""" Get uuid4 with snake case

# Return
  - `uuid4::String`
    uuid4 with snake case

# Example
```jldoctest
julia> using Revise
julia> using GeneralUtils
julia> GeneralUtils.uuid4snakecase()
"0f6e4f_568c_4df4_8c79_1d7a58072f4a"
```

# Signature
"""
function uuid4snakecase()::String
  _id = string(uuid4())
  id = replace(_id, "-" => "_")
  return id
end


""" Replace a dictionary key with the new key

# Arguments
  - `d::Dict`
    The input dictionary that you want to modify
  - `replacementMap::Dict`
    A dictionary that maps old keys to new keys

# Return
  - `newDict::Dict`
    new dictionary with the replaced keys

# Example
```jldoctest
julia> using Revise
julia> using GeneralUtils
julia> d = Dict(:a => 1, :b => 2, :c => 3)
julia> replacement_map = Dict(:a => :x, :b => :y)
julia> new_dict = GeneralUtils.replaceDictKeys(d, replacement_map)
Dict{Any, Any} with 3 entries:
  :y => 2
  :c => 3
  :x => 1
```

# Signature
"""
function replaceDictKeys(d::Dict, replacementMap::Dict)::Dict
  newDict = Dict()
  for (key, value) in d
      newKey = get(replacementMap, key, key)  # Get the replacement key if it exists, otherwise keep the original key
      newDict[newKey] = value
  end
  return newDict
end


""" Convert text into a dictionary with a given keywords. This function use keywords to slice
  a given text into the following format: KW1|kw1_text|KW2|kw2_text|KW3|kw3_text.
  The left most string which has no keyword will be discarded. WARNING, ordering is important

# Arguments
  - `text::String`
    A text to be converted.
  - `keywords::Vector{String}`
    A list of keywords to be used to slice the text.
    These keywords also be the resulting dict keys.
# Keyword Arguments
  - `rightmarker::String`
    A maker used to make a word to be unique. Ex, A keyword "plan" with rightmarker ":",
    the function will search for "plan:" otherwise the function will search for "plan".
    The marker will not be in the resulting dict keys.
  - `symbolkey::Bool`
    If true, resulting dict's key will be Symbols, otherwise string.
  - `lowercasekey::Bool`
    set resulting dict's key to be lowercase

# Return
  - `d::OrderedDict`

# Example
```jldoctest
julia> text = "TODAY thought: what to do plan: wake up and going out action: 1. wake up 2. eat 3. sleep"
julia> sample_keywords = ["thought", "plan", "action"]
julia> resultdict = GeneralUtils.textToDict(text, sample_keywords; rightmarker=":", symbolkey=true)
julia> println(resultdict)
OrderedCollections.OrderedDict{Any, Any}(:thought => "what to do",
                                        :plan => "wake up and going out",
                                        :action => "1. wake up 2. eat 3. sleep")
```

# Signature
"""
function textToDict(text::String, keywords::Vector{String};
    rightmarker::Union{String, Nothing}=nothing, symbolkey::Bool=false, lowercasekey::Bool=false
    )::OrderedDict

  # make sure this function detect variation of a work e.g. agent, Agent, AGENT
  kw = []
  # use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list
  for keyword in keywords
    push!(kw, detect_keyword(keyword, text))
  end

  od1, od2 =
  if symbolkey
    OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}()
  else
    OrderedDict{String, Any}(), OrderedDict{String, Any}()
  end

  remainingtext = text

  for keyword in reverse(kw)
    mkeyword = rightmarker !== nothing ? keyword * rightmarker : keyword

    # Find the position of the keyword in the text
    keywordidx = findlast(mkeyword, remainingtext)

    if keywordidx !== nothing
      substr = remainingtext[keywordidx[end]+1:end]
      str = string(strip(substr))  # Removes both leading and trailing whitespace.
      _key = lowercasekey == true ? lowercase(keyword) : keyword
      key = symbolkey == true ? Symbol(_key) : _key
      od1[key] = str
      remainingtext = remainingtext[1:keywordidx[1]-1]
    else
      error("""keyword "$keyword" not found in the provided text""")
    end
  end

  kw = lowercasekey == true ? lowercase.(kw) : kw

  # correct the order
  for keyword in kw
    key = symbolkey == true ? Symbol(keyword) : keyword
    od2[key] = od1[key]
  end

  return od2
end


""" Generate a random string

# Arguments
  - `n::Integer`
    A number of string to be generated

# Return
  - `s::String`

# Example
```jldoctest
julia> result = randstring(5)
"fysmp"
```

# Signature
"""
randstring(n::Integer)::String = String(rand('a':'z', n))


""" Generate a random string in group

# Arguments
  - `totalgroup::Integer`
    A number of group of random string to be generated
  - `stringlength::Integer`
    A number of string to be generated

# Return
  - `s::String`

# Example
```jldoctest
julia> result = randstrings(3, 5)
"fysmp cmhdk iuytr"
```

# Signature
"""
function randstrings(totalgroup::Integer, stringlength::Integer)::String
  str = ""
  for i in 1:totalgroup
    str *= randstring(stringlength) * " "
  end
  str = strip(str)
  return str
end


""" Execute a function with timer.

# Arguments
  - `f::Function`
      a function to run
  - `timeoutwindow::Integer``
      timeout in seconds

# Keyword Argument
  - `fargs`
    arguments for the function
  - `timeoutmsg::String`
    time out message

# Return
  - task result otherwise timeout message

# Example
  ```jldoctest
julia> function testfunc(x)
          sleep(x)
          return "task done"
        end
julia> result = timeout(testfunc, 10; fargs=20)
"task timed out"
julia> result = timeout(testfunc, 20; fargs=10)
"task done"
```

#  Signature
"""
function timeout(f::Function, timeoutwindow::Integer; fargs=nothing, timeoutmsg="task timed out")
  tsk = @task f(fargs)
  schedule(tsk)
  Timer(timeoutwindow) do timer
      istaskdone(tsk) || Base.throwto(tsk, InterruptException())
  end
  try
      fetch(tsk)
  catch _;
    timeoutmsg
  end
end


""" Convert a dataframe into CSV.

# Arguments
  - `df::DataFrame`
    A connection object to Postgres database

# Return
  - `result::String`

# Example
```jldoctest
julia> using DataFrames, GeneralUtils
julia> df = DataFrame(A=1:3, B=5:7, fixed=1)
julia> result = GeneralUtils.dataframeToCSV(df)
```

# Signature
"""
function dataframeToCSV(df::DataFrame)
  # Create an IOBuffer to capture the output
  io = IOBuffer()
  CSV.write(io, df)
  dfStr = String(take!(io))
  return dfStr
end

""" Convert a DataFrame into a list of Dict rows.

#  Arguments
  - `df::DataFrame`
    The input DataFrame to be converted.

#  Return
  - `rows::Vector{Dict{String, Any}}`
    A vector of dictionaries, where each dictionary represents a row in a dataframe.

#  Example
  ```jldoctest
  julia> using DataFrames, JSON3, GeneralUtils
  julia> df = DataFrame(A = [1, 2, 3], B = ["apple", "banana", "cherry"])
  julia> vectorDict = GeneralUtils.dfToVectorDict(df)
  [Dict{String, Any}("B" => "apple", "A" => 1),
  Dict{String, Any}("B" => "banana", "A" => 2)
  Dict{String, Any}("B" => "cherry", "A" => 3)]
  ```

#  Signature
"""
function dfToVectorDict(df::DataFrame)
  vec = []
  for row in eachrow(df)
      d = Dict{String, Any}()
      for col in names(df)
          d[col] = row[col]
      end
      push!(vec, d)
  end
  return vec
end


""" Turn a large vector of dictionaries into smaller one

# Arguments
  - `data`
      data to be partioning
  - `partsize`
    how many dicts per part

# Return
  - `parts`
    a dictionay of parts

# Example
  ```jldoctest
  julia> using GeneralUtils, Dates, JSON3, UUIDs
  julia> vecDict = [Dict("a" => i) for i in 1:10]
  julia> d = GeneralUtils.disintegrate_vectorDict(vecDict, 3)
  julia> println(d[:data])
  Dict{Int64, Vector{Dict}} with 4 entries:
  1 => [Dict("a"=>1), Dict("a"=>2), Dict("a"=>3)]
  2 => [Dict("a"=>4), Dict("a"=>5), Dict("a"=>6)]
  3 => [Dict("a"=>7), Dict("a"=>8), Dict("a"=>9)]
  4 => [Dict("a"=>10)]
  ```

# Signature
"""
function disintegrate_vectorDict(data::Vector, partsize::Integer
  )
  println("--> disintegrate_vectorDict()")
  parts = Dict{Int, Vector{Dict}}()
  for (i, dict) in enumerate(data)
      # println("--> disintegrate_vectorDict ", i)
      partkey = (i - 1) ÷ partsize + 1
      if !haskey(parts, partkey)
        parts[partkey] = Vector{Dict}()
      end
      push!(parts[partkey], dict)
  end
  return (datatype="vector{Dict}", totalparts=length(parts), partsize=partsize, dataparts=parts)
end


""" Get a value from a DataFrame row by a given key

# Arguments
  - `row::DataFrameRow`
    The DataFrame row to retrieve the value from.
  - `key::Symbol`
    The column name (as a symbol) whose value is to be retrieved.

# Return
  - `Any`
    The value of the specified column in the given row.

# Example
  ```jldoctest
  julia> using DataFrames

  julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30])
  2×2 DataFrame
   Row │ name    age
       │ String  Int64
   ┌─────┼─────────┼───────
   │ 1   │ Alice   25
   │ 2   │ Bob     30

  julia> getDataFrameValue(df[1, :], :name)
  "Alice"
  ```

# Signature
"""
getDataFrameValue(row::DataFrameRow, key::Symbol) = row.:($key)


""" Convert a DataFrame row to a key:value string

# Arguments
  - `row::DataFrameRow`
    The DataFrame row to convert.

# Return
  - `String`
    A string containing the formatted representation of the row, with each column prefixed by its name and separated by commas.

# Example
  ```jldoctest
  julia> using DataFrames

  julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30])
  2×2 DataFrame
   Row │ name    age
       │ String  Int64
   ┌─────┼─────────┼───────
   │ 1   │ Alice   25
   │ 2   │ Bob     30

  julia> dfRowtoString(df[1, :])
  "name: Alice, age: 25"
  ```

# Signature
"""
function dfRowtoString(row::DataFrameRow)::String
  str = ""
  for key in keys(row)
    value = getDataFrameValue(row, key)
    str *= "$key: $value, "
  end
  result = str[1:end-2] # remove ", " at the end of row
  return result
end


""" Convert a DataFrame to a string representation

# Arguments
  - `df::DataFrame`
    The DataFrame to convert, where each row will be converted to a string.

# Return
  - `String`
    A string containing the formatted representation of the DataFrame, with each row prefixed by its index and separated by newlines.

# Example
  ```jldoctest
  julia> using DataFrames

  julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30])
  2×2 DataFrame
   Row │ name    age
       │ String  Int64
   ┌─────┼─────────┼───────
   │ 1   │ Alice   25
   │ 2   │ Bob     30

  julia> dfToString(df)
  "1) name: Alice, age: 25\n2) name: Bob, age: 30"
  ```

# Signature
"""
function dfToString(df::DataFrame)
  dfstr = ""
  for (i, row) in enumerate(eachrow(df))
    rowstr = dfRowtoString(row)
    dfstr *= "$i) $rowstr\n"
  end
  return dfstr
end


""" Convert a DataFrame to a list of JSON strings

# Arguments
  - `df::DataFrame`
    The DataFrame to convert, where each row will be converted to a JSON string.

# Return
  - `Vector{String}`
    A vector containing the JSON representation of each row in the DataFrame.

# Example
  ```jldoctest
  julia> using DataFrames

  julia> df = DataFrame(name=["Alice", "Bob"], age=[25, 30])
  2×2 DataFrame
   Row │ name    age
       │ String  Int64
   ┌─────┼─────────┼───────
   │ 1   │ Alice   25
   │ 2   │ Bob     30

  julia> dataframe_to_json_list(df)
  2-element Vector{String}:
  "{\"name\":\"Alice\",\"age\":25}"
  "{\"name\":\"Bob\",\"age\":30}"
  ```

# Signature
"""
function dataframe_to_json_list(df::DataFrame)::Vector{String}
  json_list = []
  for row in eachrow(df)
      json_row = Dict(zip(names(df), row))
      push!(json_list, JSON.json(json_row))
  end
  return json_list
end


""" Convert a dictionary to a string representation.

# Arguments
  - `od::OrderedDict`
    The OrderedDict to convert, where each key-value pair will be represented as "index) key: value".

# Return
  - `String`
    A string containing the representation of each key-value pair in the OrderedDict.

# Example
  ```jldoctest
  julia> using DataStructures

  julia> od = OrderedDict("name" => "Alice", "age" => 25)
  OrderedDict{String,Any} with 2 entries:
    "name" => "Alice"
    "age"  => 25

  julia> dict_to_string(od)
  "1) name: Alice, 2) age: 25"
  ```

# Signature
"""
function dict_to_string(od::T) where {T<:AbstractDict}
  items = []
  for (i, (key, value)) in enumerate(od)
      push!(items, "$i) $key: $value")
  end
  return join(items, ", ")
end


"""
    extract_triple_backtick_text(text::String) -> Vector{String}

Extracts text enclosed within triple backticks (```) from the given string.

# Arguments:
- `text::String`: The input string containing potential triple backtick blocks.

# Returns:
- `Vector{String}`: A vector of strings, each representing a block of text enclosed within triple backticks found in the input string.

# Examples:
  ```julia
  julia> extract_triple_backtick_text("Here is some text ```with a code block``` and more text.")
  1-element Vector{String}:
  "with a code block"
"""
function extract_triple_backtick_text(input::String)::Vector{String}
  # Regular expression to match text wrapped by triple backticks
  regex = r"```([\s\S]*?)```"

  # Find all matches in the input string
  matches = collect(eachmatch(regex, input))

  # Extract the matched text (excluding the backticks)
  extracted_text = [m.captures[1] for m in matches]

  return extracted_text
end


"""
    detect_keyword(keyword::String, text::String) -> Union{Nothing, String}

Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase).

# Arguments:
- `keyword::String`: The keyword to search for
- `text::String`: The text to search in

# Returns:
- `Union{Nothing, String}`: Returns the matched keyword variation if found, otherwise returns nothing

# Examples:
  ```julia
  julia> detect_keyword("test", "This is a Test case")
  "Test"

  julia> detect_keyword("error", "NO ERRORS FOUND")
  "ERRORS"

  julia> detect_keyword("missing", "complete data")
  nothing
  ```

# Signature
"""
function detect_keyword(keyword::String, text::String)::Union{Nothing, String}
  # Define the keyword variations to search for
  keyword_variations = [keyword, uppercasefirst(keyword), uppercase(keyword)]

  # Check if any of the keyword variations are in the text
  for variation in keyword_variations
      if occursin(variation, text)
          return variation
      end
  end

  # Return nothing if no variation is found
  return nothing
end


"""
    countGivenWords(text::String, words::Vector{String}) -> Dict{String, Int}

Count the occurrences of each word in the given list within the provided text.

# Arguments
- `text::String`: The input text to search through.
- `words::Vector{String}`: A vector of words whose occurrences need to be counted.

# Returns
- `Vector{Int64}`: Their respective counts in the `text`.

# Examples
  ```julia
  julia> GeneralUtils.countGivenWords("hello world hello", ["hello", "world"])
  2-element Vector{Int64}:
    2
    1

  julia> GeneralUtils.countGivenWords("foo bar baz foo", ["foo", "qux"])
  2-element Vector{Int64}:
    2
    0
  ```

# Signature
"""
function countGivenWords(text::String, words::Vector{String})::Vector{Int}
  count = []

  # loop through each word in words
  for word in words
    # initialize a counter for the current word
    splittext = split(text, word)
    splittext_length = length(splittext)
    thisWordCount = splittext_length - 1
    push!(count, thisWordCount)
  end
  return count
end


"""
    remove_french_accents(text::String) -> String

Remove French accents from the given text.

# Arguments
- `text::String`: The input string containing French accents.

# Returns
- `String`: The input string with all French accents removed.

# Examples
  ```julia
  julia> remove_french_accents("Café")
  "Cafe"

  julia> remove_french_accents("L'été est beau.")
  "L'ete est beau."
  ```

# Signature
"""
function remove_french_accents(text::AbstractString)::AbstractString
  textcharlist = [i for i in text]

  # Create a dictionary to map accented characters to their replacements
  accented_to_regular = Dict(
    'à' => 'a', 'â' => 'a', 'ä' => 'a', 'á' => 'a',
    'é' => 'e', 'è' => 'e', 'ê' => 'e', 'ë' => 'e',
    'î' => 'i', 'ï' => 'i', 'í' => 'i',
    'ñ' => 'n',
    'ô' => 'o', 'ö' => 'o', 'ò' => 'o', 'ó' => 'o',
    'ù' => 'u', 'û' => 'u', 'ü' => 'u',
    'ÿ' => 'y',
    'ç' => 'c',
    'Ä' => 'A',
    'É' => 'E',
    'Ö' => 'O',
    'Ü' => 'U',
    '’' => ''',
  )

  accentedchar = keys(accented_to_regular)

  # Replace accented characters in the text using accented_to_regular dictionary above
  for (i, char) in enumerate(textcharlist)
    if char ∈ accentedchar
      textcharlist[i] = accented_to_regular[char]
    end
  end

  cleaned_text = join(textcharlist)
  return cleaned_text
end


end # module util