diff --git a/src/util.jl b/src/util.jl index 88fe060..369d48a 100644 --- a/src/util.jl +++ b/src/util.jl @@ -3,7 +3,7 @@ module util export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, replaceDictKeys, findMatchingDictKey, textToDict, randstring, randstrings, timeout, dataframeToCSV, dfToVectorDict, disintegrate_vectorDict, getDataFrameValue, dfRowtoString, - dfToString, dataframe_to_json_list, dict_to_string + dfToString, dataframe_to_json_list, dict_to_string, extract_triple_backtick_text using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames @@ -279,6 +279,13 @@ function textToDict(text::String, keywords::Vector{String}; rightmarker::Union{String, Nothing}=nothing, symbolkey::Bool=false, lowercasekey::Bool=false )::OrderedDict + #[WORKING] make sure this function detect variation of a work e.g. agent, Agent, AGENT + kw = [] + # use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list + for keyword in keywords + push!(kw, detect_keyword(keyword, text)) + end + od1, od2 = if symbolkey OrderedDict{Symbol, Any}(), OrderedDict{Symbol, Any}() @@ -288,7 +295,7 @@ function textToDict(text::String, keywords::Vector{String}; remainingtext = text - for keyword in reverse(keywords) + for keyword in reverse(kw) mkeyword = rightmarker !== nothing ? keyword * rightmarker : keyword # Find the position of the keyword in the text @@ -306,10 +313,10 @@ function textToDict(text::String, keywords::Vector{String}; end end - keywords = lowercasekey == true ? lowercase.(keywords) : keywords + kw = lowercasekey == true ? lowercase.(kw) : kw # correct the order - for keyword in keywords + for keyword in kw key = symbolkey == true ? Symbol(keyword) : keyword od2[key] = od1[key] end @@ -707,8 +714,77 @@ function dict_to_string(od::T) where {T<:AbstractDict} end +""" + extract_triple_backtick_text(text::String) -> Vector{String} + +Extracts text enclosed within triple backticks (```) from the given string. + +# Arguments: +- `text::String`: The input string containing potential triple backtick blocks. + +# Returns: +- `Vector{String}`: A vector of strings, each representing a block of text enclosed within triple backticks found in the input string. + +# Examples: + ```julia + julia> extract_triple_backtick_text("Here is some text ```with a code block``` and more text.") + 1-element Vector{String}: + "with a code block" +""" +function extract_triple_backtick_text(input::String)::Vector{String} + # Regular expression to match text wrapped by triple backticks + regex = r"(```)(.*?)(```)" + + # Find all matches in the input string + matches = collect(eachmatch(regex, input)) + + # Extract the matched text (excluding the backticks) + extracted_text = [m.captures[2] for m in matches] + + return extracted_text +end +""" + detect_keyword(keyword::String, text::String) -> Union{Nothing, String} + +Detects if a keyword exists in the text in different case variations (lowercase, uppercase first letter, or all uppercase). + +# Arguments: +- `keyword::String`: The keyword to search for +- `text::String`: The text to search in + +# Returns: +- `Union{Nothing, String}`: Returns the matched keyword variation if found, otherwise returns nothing + +# Examples: + ```julia + julia> detect_keyword("test", "This is a Test case") + "Test" + + julia> detect_keyword("error", "NO ERRORS FOUND") + "ERRORS" + + julia> detect_keyword("missing", "complete data") + nothing + ``` + +# Signature +""" +function detect_keyword(keyword::String, text::String)::Union{Nothing, String} + # Define the keyword variations to search for + keyword_variations = [keyword, uppercasefirst(keyword), uppercase(keyword)] + + # Check if any of the keyword variations are in the text + for variation in keyword_variations + if occursin(variation, text) + return variation + end + end + + # Return nothing if no variation is found + return nothing +end