diff --git a/Manifest.toml b/Manifest.toml index a74bf74..c29e4fe 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,6 +1,6 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.11.2" +julia_version = "1.11.4" manifest_format = "2.0" project_hash = "75c6a269a13b222c106479d2177b05facfa23f74" @@ -310,7 +310,7 @@ version = "0.3.27+1" [[deps.OpenLibm_jll]] deps = ["Artifacts", "Libdl"] uuid = "05823500-19ac-5b8b-9628-191a04bc5112" -version = "0.8.1+2" +version = "0.8.1+4" [[deps.OpenSpecFun_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] diff --git a/Project.toml b/Project.toml index fd80752..6be28fb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "GeneralUtils" uuid = "c6c72f09-b708-4ac8-ac7c-2084d70108fe" authors = ["tonaerospace "] -version = "0.2.2" +version = "0.2.3" [deps] CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" diff --git a/src/communication.jl b/src/communication.jl index 1160915..18acde8 100644 --- a/src/communication.jl +++ b/src/communication.jl @@ -599,15 +599,8 @@ function sendReceiveMqttMsg(mqttInstance::mqttClientInstance_v2, receivechannel: )::NamedTuple where {T<:Any} timepass = nothing - attempts = 0 + attempts = 1 while attempts <= maxattempt - attempts += 1 - if attempts > 1 - println("\n attempts $attempts/$maxattempt ", @__FILE__, ":", @__LINE__, " $(Dates.now())") - pprintln(outgoingMsg) - println(" attempts $attempts/$maxattempt ", @__FILE__, ":", @__LINE__, " $(Dates.now())\n") - end - sendMqttMsg(mqttInstance, outgoingMsg) starttime = Dates.now() @@ -632,6 +625,12 @@ function sendReceiveMqttMsg(mqttInstance::mqttClientInstance_v2, receivechannel: end sleep(1) end + if attempts > 1 + println("\n attempts $attempts/$maxattempt ", @__FILE__, ":", @__LINE__, " $(Dates.now())") + pprintln(outgoingMsg) + println(" attempts $attempts/$maxattempt ", @__FILE__, ":", @__LINE__, " $(Dates.now())\n") + end + attempts += 1 end return (success=false, diff --git a/src/llmUtil.jl b/src/llmUtil.jl index b0a90f7..a021dd9 100644 --- a/src/llmUtil.jl +++ b/src/llmUtil.jl @@ -1,6 +1,6 @@ module llmUtil -export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection +export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection, deFormatLLMtext, extractthink using UUIDs, JSON3, Dates using GeneralUtils @@ -43,7 +43,7 @@ julia> formattedtext = YiemAgent.formatLLMtext_llama3instruct(d[:name], d[:text] Signature """ function formatLLMtext_llama3instruct(name::T, text::T; - assistantStarter::Bool=true) where {T<:AbstractString} + assistantStarter::Bool=false) where {T<:AbstractString} formattedtext = if name == "system" """ @@ -68,28 +68,10 @@ function formatLLMtext_llama3instruct(name::T, text::T; return formattedtext end -# function formatLLMtext_llama3instruct(name::T, text::T) where {T<:AbstractString} -# formattedtext = -# if name == "system" -# """ -# <|begin_of_text|> -# <|start_header_id|>$name<|end_header_id|> -# $text -# <|eot_id|> -# """ -# else -# """ -# <|start_header_id|>$name<|end_header_id|> -# $text -# <|eot_id|> -# """ -# end -# return formattedtext -# end function formatLLMtext_qwen(name::T, text::T; - assistantStarter::Bool=true) where {T<:AbstractString} + assistantStarter::Bool=false) where {T<:AbstractString} formattedtext = if name == "system" """ @@ -116,14 +98,94 @@ function formatLLMtext_qwen(name::T, text::T; end -""" Convert a chat messages in vector of dictionary into LLM model instruct format. +function formatLLMtext_qwen3(name::T, text::T; + assistantStarter::Bool=false) where {T<:AbstractString} + formattedtext = + if name == "system" + """ + <|im_start|>$name + $text + <|im_end|> + """ + else + """ + <|im_start|>$name + $text + <|im_end|> + """ + end + + if assistantStarter + formattedtext *= + """ + <|im_start|>assistant + """ + end + + return formattedtext +end + + +function formatLLMtext_phi4(name::T, text::T; + assistantStarter::Bool=false) where {T<:AbstractString} + formattedtext = + if name == "system" + """ + <|system|> + $text + <|end|> + """ + else + """ + <|assistant|> + $text + <|end|> + """ + end + + if assistantStarter + formattedtext *= + """ + <|assistant|> + """ + end + + return formattedtext +end + + +function formatLLMtext_granite3(name::T, text::T; + assistantStarter::Bool=false) where {T<:AbstractString} + formattedtext = + if name == "system" + """ + <|start_of_role|>system<|end_of_role|>{$text}<|end_of_text|> + """ + else + """ + <|start_of_role|>$name<|end_of_role|>{$text}<|end_of_text|> + """ + end + + if assistantStarter + formattedtext *= + """ + <|start_of_role|>assistant<|end_of_role|>{ + """ + end + + return formattedtext +end + + + +""" Convert a vector of chat message dictionaries into LLM model instruct format. # Arguments - `messages::Vector{Dict{Symbol, T}}` - message owner name e.f. "system", "user" or "assistant" + A vector of dictionaries where each dictionary contains the keys `:name` (the name of the message owner) and `:text` (the text of the message). - `formatname::T` - format name to be used - + The name of the format to be used for converting the chat messages. # Return - `formattedtext::String` text formatted to model format @@ -140,31 +202,137 @@ julia> chatmessage = [ julia> formattedtext = YiemAgent.formatLLMtext(chatmessage, "llama3instruct") "<|begin_of_text|>\n <|start_header_id|>system<|end_header_id|>\n You are a helpful, respectful and honest assistant.\n <|eot_id|>\n <|start_header_id|>user<|end_header_id|>\n list me all planets in our solar system.\n <|eot_id|>\n <|start_header_id|>assistant<|end_header_id|>\n I'm sorry. I don't know. You tell me.\n <|eot_id|>\n" ``` - -# Signature """ -function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="llama3instruct" - )::String where {T<:Any} - f = if formatname == "llama3instruct" - formatLLMtext_llama3instruct - elseif formatname == "mistral" - # not define yet - elseif formatname == "phi3instruct" - # not define yet - elseif formatname == "qwen" - formatLLMtext_qwen - else - error("$formatname template not define yet") - end +function formatLLMtext(messages::Vector{Dict{Symbol, T}}, formatname::String + )::String where {T<:AbstractString} + f = + if formatname == "llama3instruct" + formatLLMtext_llama3instruct + elseif formatname == "mistral" + # not define yet + elseif formatname == "phi3instruct" + # not define yet + elseif formatname == "qwen" + formatLLMtext_qwen + elseif formatname == "qwen3" + formatLLMtext_qwen3 + elseif formatname == "phi4" + formatLLMtext_phi4 + elseif formatname == "granite3" + formatLLMtext_granite3 + else + error("$formatname template not define yet") + end str = "" - for t in messages - str *= f(t[:name], t[:text]) + for (i, t) in enumerate(messages) + if i < length(messages) + str *= f(t[:name], t[:text]) + else + str *= f(t[:name], t[:text]; assistantStarter=true) + end end return str end +""" Revert LLM-format response back into regular text. + +# Arguments + - `text::String` + The LLM formatted string to be converted. + +# Return + - `normalText::String` + The original plain text extracted from the given LLM-formatted string. + +# Example +```jldoctest +julia> using Revise +julia> using YiemAgent +julia> response = "<|begin_of_text|>This is a sample system instruction.<|eot_id|>" +julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3") +"This is a sample system instruction." +``` +""" +function deFormatLLMtext(text::String, formatname::String; includethink::Bool=false + )::String + f = + if formatname == "granite3" + deFormatLLMtext_granite3 + elseif formatname == "qwen3" + deFormatLLMtext_qwen3 + else + error("$formatname template not define yet") + end + + r = f(text) + result = r === nothing ? text : r + return result +end + + +""" Revert LLM-format response back into regular text for Granite 3 format. + +# Arguments + - `text::String` + The LLM formatted string to be converted. + +# Return + - `normalText::Union{Nothing, String}` + The original plain text extracted from the given LLM-formatted string. + Returns nothing if the text is not in Granite 3 format. + +# Example +```jldoctest +julia> using Revise +julia> using YiemAgent +julia> response = "{This is a sample LLM response.}" +julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3") +"This is a sample LLM response." +""" +function deFormatLLMtext_granite3(text::String)::Union{Nothing, String} + # check if '{' and '}' are in the text because it's a special format for the LLM response + if contains(text, "<|im_start|>assistant") + # get the text between '{' and '}' + text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1] + return text_between_braces + elseif text[end] == '}' + text = "{$text" + text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1] + else + return nothing + end +end + + +function deFormatLLMtext_qwen3(text::String)::Union{Nothing, String} + return text +end + +# function deFormatLLMtext_qwen3(text::String; includethink::Bool=false)::Union{Nothing, String} +# think = nothing +# str = nothing + +# if occursin("", text) +# r = GeneralUtils.extractTextBetweenString(text, "", "") +# if r[:success] +# think = r[:text] +# end +# str = string(split(text, "")[2]) +# end + +# if includethink == true && occursin("", text) +# result = "ModelThought: $think $str" +# return result +# elseif includethink == false && occursin("", text) +# result = str +# return result +# else +# return text +# end +# end + """ Attemp to correct LLM response's incorrect JSON response. @@ -255,7 +423,20 @@ function jsoncorrection(config::T1, input::T2, correctJsonExample::T3; end - +function extractthink(text::String) + think = nothing + str = nothing + if occursin("", text) + r = GeneralUtils.extractTextBetweenString(text, "", "") + if r[:success] + think = r[:text] + end + str = string(split(text, "")[2]) + else + str = text + end + return think, str +end diff --git a/src/util.jl b/src/util.jl index 8021f85..3dabd31 100644 --- a/src/util.jl +++ b/src/util.jl @@ -6,7 +6,8 @@ export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, rep dfToString, dataframe_to_json_list, dictToString, dictToString_noKey, dictToString_numbering, extract_triple_backtick_text, countGivenWords, remove_french_accents, detect_keyword, extractTextBetweenCharacter, - convertCamelSnakeKebabCase + extractTextBetweenString, + convertCamelSnakeKebabCase, fitrange, recentElementsIndex, nonRecentElementsIndex using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames @@ -42,6 +43,7 @@ function timedifference(starttime::DateTime, stoptime::DateTime, unit::String):: diff = stoptime - starttime unit = lowercase(unit) + # Check the unit and calculate the time difference accordingly if unit == "milliseconds" return diff.value elseif unit == "seconds" @@ -306,7 +308,8 @@ function textToDict(text::String, detectKeywords::Vector{String}; dictKey_ = reverse(dictKey) # process text from back to front - for (i,keyword) in enumerate(reverse(kw)) + rkw = reverse(kw) + for (i,keyword) in enumerate(rkw) # Find the position of the keyword in the text keywordidx = findlast(keyword, remainingtext) dKey = dictKey_[i] @@ -770,6 +773,123 @@ function extract_triple_backtick_text(input::String)::Vector{String} end +wordwindow(word::String, startindex::Integer)::UnitRange = startindex:startindex + length(word) -1 + +function cuttext(range, text) + # check whether range is outside text boundary + if range.start > length(text) || range.stop > length(text) + return nothing + else + return text[range] + end +end + +""" + detect_keyword(keywords::AbstractVector{String}, text::String; mode::Union{String, Nothing}=nothing, delimiter::AbstractVector=[' ', '\n', '.']) -> Dict{String, Integer} + +Detects and counts occurrences of multiple keywords in the text in different case variations (lowercase, uppercase first letter, or all uppercase). + +# Arguments +- `keywords::AbstractVector{String}` Vector of keywords to search for +- `text::String` The text to search in + +# Keyword Arguments +- `mode::Union{String, Nothing}` When set to "individual", only counts matches that are individual words (default: nothing) +- `delimiter::AbstractVector` Characters used to determine word boundaries when mode="individual" (default: [' ', '\n', '.']) + +# Returns +- `Dict{String, Integer}` Returns a dictionary mapping each keyword to its count in the text (0 if not found) + +# Examples + ```jldoctest + julia> detect_keyword(["test", "example"], "This is a Test EXAMPLE") + Dict{String, Integer}("test" => 1, "example" => 1) + + julia> detect_keyword(["cat"], "cats and category", mode="individual") + Dict{String, Integer}("cat" => 0) + + julia> detect_keyword(["error"], "No ERRORS found!") + Dict{String, Integer}("error" => 1) + ``` + +# Signature +""" +# function detect_keyword(keywords::T1, text::String; +# mode::Union{String, Nothing}=nothing, delimiter::T2=[' ', '\n', '.'] +# )::Dict{String, Integer} where {T1<:AbstractVector, T2<:AbstractVector} +# # Initialize dictionary to store keyword counts +# kwdict = Dict{String, Integer}() +# for i in keywords +# kwdict[i] = 0 +# end + +# startindex = 1 +# # Iterate through each keyword and search for matches in text +# for kw in keywords +# # Check each possible starting position in the text +# for startindex in 1:1:length(text) +# # Get the window range for current keyword at current position +# wordwindows = wordwindow(kw, startindex) +# # Extract the text slice for comparison +# cuttexts = cuttext(wordwindows, text) +# if cuttexts !== nothing +# # Try to detect keyword in current text slice +# detected_kw = detect_keyword(kw, cuttexts) +# if detected_kw !== nothing && mode === nothing +# # Increment count if keyword found and no mode restrictions +# kwdict[kw] +=1 +# elseif detected_kw !== nothing && mode === "individual" +# # For individual word mode, check word boundaries +# # Check if character before keyword is a delimiter or start of text +# checkbefore = +# if wordwindows.start > 1 && +# text[wordwindows.start-1] ∈ delimiter +# true +# elseif wordwindows.start == 1 +# true +# else +# false +# end + +# # Check if character after keyword is a delimiter or end of text +# checkafter = +# if wordwindows.stop < length(text) && +# text[wordwindows.stop+1] ∈ delimiter +# true +# elseif wordwindows.stop == length(text) +# true +# else +# false +# end +# # Only count keyword if it's a complete word +# if checkbefore && checkafter +# kwdict[kw] +=1 +# end +# end +# end +# end +# end +# return kwdict +# end + + +function detect_keyword(keywords::T, text::String)::Dict{String, Integer} where {T<:AbstractVector} + kw = Dict{String, Integer}() + splittext = string.(split(text, " ")) + # use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list + for keyword in keywords + ws = detect_keyword.(keyword, splittext) + total = sum(issomething.(ws)) + if total != 0 + kw[keyword] = total + else + kw[keyword] = 0 + end + end + return kw +end + + """ detect_keyword(keyword::String, text::String) -> Union{Nothing, String} @@ -924,9 +1044,11 @@ Extracts and returns the text that is enclosed between two specified characters # Examples ```jldoctest -julia> text = "Hello [World]!" +julia> text = "Hello [World]! [Yay]" julia> extracted_text = extractTextBetweenCharacter(text, '[', ']') -println(extracted_text) # Output: "World" +2-element Vector{Any}: + "World" + "Yay" ``` """ function extractTextBetweenCharacter(text::String, startchar::Char, endchar::Char) @@ -950,6 +1072,29 @@ function extractTextBetweenCharacter(text::String, startchar::Char, endchar::Cha end +function extractTextBetweenString(text::String, startstr::String, endstr::String) + # check whether startstr is in the text or not + isStartStr = split(text, startstr) + if length(isStartStr) > 2 + return (success=false, error="There are more than one occurrences of the start string '$startstr' in the text. Text must has only one start string", errorcode=2, result=nothing) + elseif length(isStartStr) == 1 + return (success=false, error="There are no start string '$startstr' in the text. Text must has only one start string", errorcode=1, result=nothing) + end + + # check whether endstr is in the text or not + isEndStr = split(text, endstr) + if length(isEndStr) > 2 + return (success=false, error="There are more than one occurrences of the end string '$endstr' in the text. Text must has only one end string", errorcode=3, result=nothing) + elseif length(isStartStr) == 1 + return (success=false, error="There are no end string '$endstr' in the text. Text must has only one end string", errorcode=4, result=nothing) + end + + s = string(split(isStartStr[2], endstr)[1]) + + return (success=true, error=nothing, errorcode=0, text=s) +end + + """ Determines if the given string follows camel case naming convention. @@ -1075,6 +1220,140 @@ function convertCamelSnakeKebabCase(text::T, tocase::Symbol)::String where {T<:A end +""" Check if a value is not `nothing`. + +# Arguments +- `x`: The value to check + +# Returns +- `Bool`: `true` if `x` is not `nothing`, `false` otherwise + +# Examples +```jldoctest +julia> issomething(1) +true +julia> issomething(nothing) +false +julia> issomething("test") +true +```` +""" +function issomething(x) + return x === nothing ? false : true +end + + +""" Adjust a given range to fit within the bounds of a vector's length. + +# Arguments + - `v::T1` + the input vector to check against + - `range::UnitRange` + the original range to be adjusted + +# Return + - `adjusted_range::UnitRange` + a range that is constrained to the vector's length, preventing out-of-bounds indexing + +# Example + +julia> v = [1, 2, 3, 4, 5] +julia> fitrange(v, 3:10) +3:5 + +""" +function fitrange(v::T1, range::UnitRange) where {T1<:AbstractVector} + totalelements = length(v) + + startind = + # check if user put start range greater than total event + if range.start > totalelements + totalelements + else + range.start + end + + stopind = + if range.stop > totalelements + totalelements + else + range.stop + end + + return startind:stopind +end + + +""" Find a unit range for a vector given a number of the most recent elements of interest. + +# Arguments + - `vectorLength::Integer` + the length of the vector to generate range from + - `n::Integer` + the number of most recent elements to include in range + + +# Return + - `UnitRange` + a range representing the n most recent elements of a vector with length vectorLength + +# Example +```jldoctest +julia> a = [1, 2, 3, 4, 5] +julia> recentElementsIndex(length(a), 3) +3:5 +julia> recentElementsIndex(length(a), 0) +5:5 +``` +""" +function recentElementsIndex(vectorlength::Integer, n::Integer; includelatest::Bool=false) + if n == 0 + error("n must be greater than 0") + end + + if includelatest + start = max(1, vectorlength - n + 1) + return start:vectorlength + else + startind = max(1, vectorlength - n) + endind = vectorlength -1 + return startind:endind + end +end + + +""" Find a unit range for a vector excluding the most recent elements. + +# Arguments + - `vectorlength::Integer` + the length of the vector to generate range from + - `n::Integer` + the number of most recent elements to exclude from range + +# Return + - `UnitRange` + a range representing the elements of the vector excluding the last `n` elements + +# Example +```jldoctest +julia> a = [1, 2, 3, 4, 5] +julia> nonRecentElementsIndex(length(a), 3) +1:2 +julia> nonRecentElementsIndex(length(a), 1) +1:4 +julia> nonRecentElementsIndex(length(a), 0) +1:5 +``` +""" +function nonRecentElementsIndex(vectorlength::Integer, n::Integer) + if n < 0 + error("n must be non-negative") + end + if n > vectorlength + return 1:0 # empty range + end + return 1:(vectorlength-n) +end diff --git a/test/.vscode/settings.json b/test/.vscode/settings.json new file mode 100644 index 0000000..ca4f214 --- /dev/null +++ b/test/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "julia.environmentPath": "/appfolder/app/dev/GeneralUtils/test" +} \ No newline at end of file diff --git a/test/Manifest.toml b/test/Manifest.toml new file mode 100644 index 0000000..83f035b --- /dev/null +++ b/test/Manifest.toml @@ -0,0 +1,41 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.11.4" +manifest_format = "2.0" +project_hash = "71d91126b5a1fb1020e1098d9d492de2a4438fd2" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" +version = "1.11.0" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +version = "1.11.0" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" +version = "1.11.0" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" +version = "1.11.0" + +[[deps.Random]] +deps = ["SHA"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +version = "1.11.0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +version = "1.11.0" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +version = "1.11.0" diff --git a/test/Project.toml b/test/Project.toml new file mode 100644 index 0000000..0c36332 --- /dev/null +++ b/test/Project.toml @@ -0,0 +1,2 @@ +[deps] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/etc.jl b/test/etc.jl new file mode 100644 index 0000000..4227812 --- /dev/null +++ b/test/etc.jl @@ -0,0 +1,7 @@ +python -> pandas -> dataframe -> csv + + + +julia -> DataFrames -> dataframe -> csv + + dict -> dataframe -> csv \ No newline at end of file diff --git a/test/runtest.jl b/test/runtest.jl deleted file mode 100644 index 2a844ed..0000000 --- a/test/runtest.jl +++ /dev/null @@ -1,44 +0,0 @@ -using Revise -using GeneralUtils, MQTTClient, JSON3 - -mqttMsgReceiveTopic = ["/receivetopic_1", "/receivetopic_2"] -mqttMsgReceiveChannel = (ch1=Channel(8), ch2=Channel(32)) -keepaliveChannel = Channel(8) -function onMsgCallback(topic, payload) - jobj = JSON3.read(String(payload)) - incomingMqttMsg = copy(jobj) # convert json object into julia dictionary recursively - - if occursin("topic_1", topic) - put!(mqttMsgReceiveChannel[:ch1], incomingMqttMsg) - elseif occursin("topic_2", topic) - put!(mqttMsgReceiveChannel[:ch2], incomingMqttMsg) - elseif occursin("keepalive", topic) - put!(keepaliveChannel, incomingMqttMsg) - else - println("undefined condition ", @__FILE__, " ", @__LINE__) - end - end -mqttInstance = GeneralUtils.mqttClientInstance_v2( - "mqtt.yiem.cc", - mqttMsgReceiveTopic, - mqttMsgReceiveChannel, - keepaliveChannel, - onMsgCallback - ) - - -_ = GeneralUtils.checkMqttConnection!(mqttInstance) - - -println("GeneralUtils test done") - - - - - - - - - - - diff --git a/test/runtests.jl b/test/runtests.jl new file mode 100644 index 0000000..f415525 --- /dev/null +++ b/test/runtests.jl @@ -0,0 +1,39 @@ +using Test +using GeneralUtils: detect_keyword + +@testset "detect_keyword tests" begin + @test detect_keyword(["test"], "this is a test") == Dict("test" => 1) + + @test detect_keyword(["hello", "world"], "hello world hello") == Dict("hello" => 2, "world" => 1) + + @test detect_keyword(["cat"], "category") == Dict("cat" => 1) + + @test detect_keyword(["cat"], "category"; mode="individual") == Dict("cat" => 0) + + @test detect_keyword(["dog"], "dogs and cats"; mode="individual", delimiter=[' ']) == Dict("dog" => 0) + + @test detect_keyword(["test"], "test.case"; mode="individual", delimiter=['.']) == Dict("test" => 1) + + @test detect_keyword(["word"], "") == Dict("word" => 0) + + @test detect_keyword(String[], "some text") == Dict{String, Integer}() + + @test detect_keyword(["a", "b"], "a.b\nc"; delimiter=['.', '\n']) == Dict("a" => 1, "b" => 1) + + multiline_text = """ + first line + second line + first word + """ + @test detect_keyword(["first"], multiline_text) == Dict("first" => 2) + + @test detect_keyword(["word"], "word"; mode="individual") == Dict("word" => 1) + + @test detect_keyword(["test"], "testing.test.tester"; mode="individual", delimiter=['.']) == Dict("test" => 1) +end + + + + + +