From e6344f1a9215934e7cd2c123da7ccc22472b4827 Mon Sep 17 00:00:00 2001 From: tonaerospace Date: Mon, 17 Mar 2025 09:54:32 +0700 Subject: [PATCH 01/12] mark new version --- Project.toml | 2 +- src/llmUtil.jl | 49 +++++++++++++++++++++++++++++++------------------ 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/Project.toml b/Project.toml index fd80752..6be28fb 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "GeneralUtils" uuid = "c6c72f09-b708-4ac8-ac7c-2084d70108fe" authors = ["tonaerospace "] -version = "0.2.2" +version = "0.2.3" [deps] CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b" diff --git a/src/llmUtil.jl b/src/llmUtil.jl index b0a90f7..ef9511c 100644 --- a/src/llmUtil.jl +++ b/src/llmUtil.jl @@ -68,25 +68,7 @@ function formatLLMtext_llama3instruct(name::T, text::T; return formattedtext end -# function formatLLMtext_llama3instruct(name::T, text::T) where {T<:AbstractString} -# formattedtext = -# if name == "system" -# """ -# <|begin_of_text|> -# <|start_header_id|>$name<|end_header_id|> -# $text -# <|eot_id|> -# """ -# else -# """ -# <|start_header_id|>$name<|end_header_id|> -# $text -# <|eot_id|> -# """ -# end -# return formattedtext -# end function formatLLMtext_qwen(name::T, text::T; assistantStarter::Bool=true) where {T<:AbstractString} @@ -116,6 +98,35 @@ function formatLLMtext_qwen(name::T, text::T; end +function formatLLMtext_phi4(name::T, text::T; + assistantStarter::Bool=true) where {T<:AbstractString} + formattedtext = + if name == "system" + """ + <|im_start|>$name<|im_sep|> + $text + <|im_end|> + """ + else + """ + <|im_start|>$name<|im_sep|> + $text + <|im_end|> + """ + end + + if assistantStarter + formattedtext *= + """ + <|im_start|>assistant<|im_sep|> + """ + end + + return formattedtext +end + + + """ Convert a chat messages in vector of dictionary into LLM model instruct format. # Arguments @@ -153,6 +164,8 @@ function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="ll # not define yet elseif formatname == "qwen" formatLLMtext_qwen + elseif formatname == "phi4" + formatLLMtext_phi4 else error("$formatname template not define yet") end From cb4d01c6128fa5e35f344e46b514dda6f020d259 Mon Sep 17 00:00:00 2001 From: tonaerospace Date: Thu, 20 Mar 2025 16:05:39 +0700 Subject: [PATCH 02/12] update --- Manifest.toml | 4 +-- src/util.jl | 62 ++++++++++++++++++++++++++++++++++++++ test/.vscode/settings.json | 3 ++ test/Manifest.toml | 41 +++++++++++++++++++++++++ test/Project.toml | 2 ++ test/etc.jl | 7 +++++ test/runtest.jl | 44 --------------------------- test/runtests.jl | 23 ++++++++++++++ 8 files changed, 140 insertions(+), 46 deletions(-) create mode 100644 test/.vscode/settings.json create mode 100644 test/Manifest.toml create mode 100644 test/Project.toml create mode 100644 test/etc.jl delete mode 100644 test/runtest.jl create mode 100644 test/runtests.jl diff --git a/Manifest.toml b/Manifest.toml index a74bf74..c29e4fe 100644 --- a/Manifest.toml +++ b/Manifest.toml @@ -1,6 +1,6 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.11.2" +julia_version = "1.11.4" manifest_format = "2.0" project_hash = "75c6a269a13b222c106479d2177b05facfa23f74" @@ -310,7 +310,7 @@ version = "0.3.27+1" [[deps.OpenLibm_jll]] deps = ["Artifacts", "Libdl"] uuid = "05823500-19ac-5b8b-9628-191a04bc5112" -version = "0.8.1+2" +version = "0.8.1+4" [[deps.OpenSpecFun_jll]] deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] diff --git a/src/util.jl b/src/util.jl index 8021f85..5e67e11 100644 --- a/src/util.jl +++ b/src/util.jl @@ -770,6 +770,49 @@ function extract_triple_backtick_text(input::String)::Vector{String} end +""" + detect_keyword(keywords::AbstractVector{String}, text::String) -> Vector{Union{Nothing, String}} + +Detects if multiple keywords exist in the text in different case variations (lowercase, uppercase first letter, or all uppercase). + +# Arguments: +- `keywords::AbstractVector{String}` Vector of keywords to search for +- `text::String` The text to search in + +# Returns: +- `Vector{Union{Nothing, String}}` Returns a vector containing the matched keyword variations if found, otherwise nothing for each keyword + +# Examples: + ```jldoctest + julia> detect_keyword(["test", "error", "case"], "This is a Test case with ERRORS case") + 2-element Vector{Union{Nothing, String}}: + "Test" + "ERRORS" + nothing + + julia> detect_keyword(["warning", "missing"], "Warning: data is complete") + 2-element Vector{Union{Nothing, String}}: + "Warning" + nothing + ``` + +# Signature +""" +function detect_keyword(keywords::T, text::String)::Union{Nothing, Dict} where {T<:AbstractVector} + kw = Dict{String, Any}() + splittext = string.(split(text, " ")) + # use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list + for keyword in keywords + ws = detect_keyword.(keyword, splittext) + total = sum(issomething.(ws)) + if total != 0 + kw[keyword] = total + else + kw[keyword] = nothing + end + end + return kw +end """ detect_keyword(keyword::String, text::String) -> Union{Nothing, String} @@ -1075,8 +1118,27 @@ function convertCamelSnakeKebabCase(text::T, tocase::Symbol)::String where {T<:A end +""" Check if a value is not `nothing`. +# Arguments +- `x`: The value to check +# Returns +- `Bool`: `true` if `x` is not `nothing`, `false` otherwise + +# Examples +```jldoctest +julia> issomething(1) +true +julia> issomething(nothing) +false +julia> issomething("test") +true +```` +""" +function issomething(x) +return x === nothing ? false : true +end diff --git a/test/.vscode/settings.json b/test/.vscode/settings.json new file mode 100644 index 0000000..ca4f214 --- /dev/null +++ b/test/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "julia.environmentPath": "/appfolder/app/dev/GeneralUtils/test" +} \ No newline at end of file diff --git a/test/Manifest.toml b/test/Manifest.toml new file mode 100644 index 0000000..83f035b --- /dev/null +++ b/test/Manifest.toml @@ -0,0 +1,41 @@ +# This file is machine-generated - editing it directly is not advised + +julia_version = "1.11.4" +manifest_format = "2.0" +project_hash = "71d91126b5a1fb1020e1098d9d492de2a4438fd2" + +[[deps.Base64]] +uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" +version = "1.11.0" + +[[deps.InteractiveUtils]] +deps = ["Markdown"] +uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +version = "1.11.0" + +[[deps.Logging]] +uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" +version = "1.11.0" + +[[deps.Markdown]] +deps = ["Base64"] +uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" +version = "1.11.0" + +[[deps.Random]] +deps = ["SHA"] +uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +version = "1.11.0" + +[[deps.SHA]] +uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" +version = "0.7.0" + +[[deps.Serialization]] +uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +version = "1.11.0" + +[[deps.Test]] +deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] +uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +version = "1.11.0" diff --git a/test/Project.toml b/test/Project.toml new file mode 100644 index 0000000..0c36332 --- /dev/null +++ b/test/Project.toml @@ -0,0 +1,2 @@ +[deps] +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/etc.jl b/test/etc.jl new file mode 100644 index 0000000..4227812 --- /dev/null +++ b/test/etc.jl @@ -0,0 +1,7 @@ +python -> pandas -> dataframe -> csv + + + +julia -> DataFrames -> dataframe -> csv + + dict -> dataframe -> csv \ No newline at end of file diff --git a/test/runtest.jl b/test/runtest.jl deleted file mode 100644 index 2a844ed..0000000 --- a/test/runtest.jl +++ /dev/null @@ -1,44 +0,0 @@ -using Revise -using GeneralUtils, MQTTClient, JSON3 - -mqttMsgReceiveTopic = ["/receivetopic_1", "/receivetopic_2"] -mqttMsgReceiveChannel = (ch1=Channel(8), ch2=Channel(32)) -keepaliveChannel = Channel(8) -function onMsgCallback(topic, payload) - jobj = JSON3.read(String(payload)) - incomingMqttMsg = copy(jobj) # convert json object into julia dictionary recursively - - if occursin("topic_1", topic) - put!(mqttMsgReceiveChannel[:ch1], incomingMqttMsg) - elseif occursin("topic_2", topic) - put!(mqttMsgReceiveChannel[:ch2], incomingMqttMsg) - elseif occursin("keepalive", topic) - put!(keepaliveChannel, incomingMqttMsg) - else - println("undefined condition ", @__FILE__, " ", @__LINE__) - end - end -mqttInstance = GeneralUtils.mqttClientInstance_v2( - "mqtt.yiem.cc", - mqttMsgReceiveTopic, - mqttMsgReceiveChannel, - keepaliveChannel, - onMsgCallback - ) - - -_ = GeneralUtils.checkMqttConnection!(mqttInstance) - - -println("GeneralUtils test done") - - - - - - - - - - - diff --git a/test/runtests.jl b/test/runtests.jl new file mode 100644 index 0000000..c00177a --- /dev/null +++ b/test/runtests.jl @@ -0,0 +1,23 @@ +using Test +using GeneralUtils + +@testset "detect_keyword tests" begin + @test GeneralUtils.detect_keyword(["test"], "this is a test string") == Dict("test" => 1) + @test GeneralUtils.detect_keyword(["hello", "world"], "hello world") == Dict("hello" => 1, "world" => 1) + @test GeneralUtils.detect_keyword(["missing"], "no keyword here") == Dict("missing" => nothing) + @test GeneralUtils.detect_keyword(["a", "b"], "a a b b b") == Dict("a" => 2, "b" => 3) + @test GeneralUtils.detect_keyword(String[], "empty keywords") == Dict{String, Any}() + @test GeneralUtils.detect_keyword(["keyword"], "") == Dict("keyword" => nothing) + @test GeneralUtils.detect_keyword(["case"], "CASE case Case cAsE") == Dict("case" => 4) + mixed_results = GeneralUtils.detect_keyword(["found", "notfound"], "found found found") + @test mixed_results["found"] == 3 + @test mixed_results["notfound"] === nothing + + special_chars = GeneralUtils.detect_keyword(["test!"], "test! test? test.") + @test special_chars["test!"] == 1 +end + + + + + From 840b0e6205fc739ad0e67915f5189718d9c182b0 Mon Sep 17 00:00:00 2001 From: narawat lamaiin Date: Sat, 22 Mar 2025 09:41:39 +0700 Subject: [PATCH 03/12] update --- src/communication.jl | 15 +++++++-------- src/util.jl | 25 +++++++++++-------------- 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/src/communication.jl b/src/communication.jl index 1160915..18acde8 100644 --- a/src/communication.jl +++ b/src/communication.jl @@ -599,15 +599,8 @@ function sendReceiveMqttMsg(mqttInstance::mqttClientInstance_v2, receivechannel: )::NamedTuple where {T<:Any} timepass = nothing - attempts = 0 + attempts = 1 while attempts <= maxattempt - attempts += 1 - if attempts > 1 - println("\n attempts $attempts/$maxattempt ", @__FILE__, ":", @__LINE__, " $(Dates.now())") - pprintln(outgoingMsg) - println(" attempts $attempts/$maxattempt ", @__FILE__, ":", @__LINE__, " $(Dates.now())\n") - end - sendMqttMsg(mqttInstance, outgoingMsg) starttime = Dates.now() @@ -632,6 +625,12 @@ function sendReceiveMqttMsg(mqttInstance::mqttClientInstance_v2, receivechannel: end sleep(1) end + if attempts > 1 + println("\n attempts $attempts/$maxattempt ", @__FILE__, ":", @__LINE__, " $(Dates.now())") + pprintln(outgoingMsg) + println(" attempts $attempts/$maxattempt ", @__FILE__, ":", @__LINE__, " $(Dates.now())\n") + end + attempts += 1 end return (success=false, diff --git a/src/util.jl b/src/util.jl index 5e67e11..9f2024c 100644 --- a/src/util.jl +++ b/src/util.jl @@ -771,35 +771,30 @@ end """ - detect_keyword(keywords::AbstractVector{String}, text::String) -> Vector{Union{Nothing, String}} + detect_keyword(keywords::AbstractVector{String}, text::String) -> Dict{String, Integer} -Detects if multiple keywords exist in the text in different case variations (lowercase, uppercase first letter, or all uppercase). +Detects and counts occurrences of multiple keywords in the text in different case variations (lowercase, uppercase first letter, or all uppercase). # Arguments: - `keywords::AbstractVector{String}` Vector of keywords to search for - `text::String` The text to search in # Returns: -- `Vector{Union{Nothing, String}}` Returns a vector containing the matched keyword variations if found, otherwise nothing for each keyword +- `Dict{String, Integer}` Returns a dictionary mapping each keyword to its count in the text (0 if not found) # Examples: ```jldoctest julia> detect_keyword(["test", "error", "case"], "This is a Test case with ERRORS case") - 2-element Vector{Union{Nothing, String}}: - "Test" - "ERRORS" - nothing + Dict{String, Integer}("test" => 1, "error" => 1, "case" => 2) julia> detect_keyword(["warning", "missing"], "Warning: data is complete") - 2-element Vector{Union{Nothing, String}}: - "Warning" - nothing - ``` + Dict{String, Integer}("warning" => 1, "missing" => 0) + # Signature """ -function detect_keyword(keywords::T, text::String)::Union{Nothing, Dict} where {T<:AbstractVector} - kw = Dict{String, Any}() +function detect_keyword(keywords::T, text::String)::Dict{String, Integer} where {T<:AbstractVector} + kw = Dict{String, Integer}() splittext = string.(split(text, " ")) # use for loop and detect_keyword function to get the exact variation of each keyword in the text then push to kw list for keyword in keywords @@ -808,11 +803,13 @@ function detect_keyword(keywords::T, text::String)::Union{Nothing, Dict} where { if total != 0 kw[keyword] = total else - kw[keyword] = nothing + kw[keyword] = 0 end end return kw end + + """ detect_keyword(keyword::String, text::String) -> Union{Nothing, String} From 562f528c0195df06ab766227d056fb01720ce13a Mon Sep 17 00:00:00 2001 From: narawat lamaiin Date: Thu, 27 Mar 2025 13:09:20 +0700 Subject: [PATCH 04/12] update --- src/llmUtil.jl | 10 +++--- src/util.jl | 80 +++++++++++++++++++++++++++++++++++++++++++----- test/runtests.jl | 23 ++++++-------- 3 files changed, 87 insertions(+), 26 deletions(-) diff --git a/src/llmUtil.jl b/src/llmUtil.jl index ef9511c..53fbd29 100644 --- a/src/llmUtil.jl +++ b/src/llmUtil.jl @@ -103,22 +103,22 @@ function formatLLMtext_phi4(name::T, text::T; formattedtext = if name == "system" """ - <|im_start|>$name<|im_sep|> + <|system|> $text - <|im_end|> + <|end|> """ else """ - <|im_start|>$name<|im_sep|> + <|assistant|> $text - <|im_end|> + <|end|> """ end if assistantStarter formattedtext *= """ - <|im_start|>assistant<|im_sep|> + <|assistant|> """ end diff --git a/src/util.jl b/src/util.jl index 9f2024c..681a73b 100644 --- a/src/util.jl +++ b/src/util.jl @@ -6,7 +6,7 @@ export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, rep dfToString, dataframe_to_json_list, dictToString, dictToString_noKey, dictToString_numbering, extract_triple_backtick_text, countGivenWords, remove_french_accents, detect_keyword, extractTextBetweenCharacter, - convertCamelSnakeKebabCase + convertCamelSnakeKebabCase, fitrange, lastElementsIndex using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames @@ -1134,14 +1134,80 @@ true ```` """ function issomething(x) -return x === nothing ? false : true + return x === nothing ? false : true +end + + +""" Adjust a given range to fit within the bounds of a vector's length. + +# Arguments + - `v::T1` + the input vector to check against + - `range::UnitRange` + the original range to be adjusted + +# Return + - `adjusted_range::UnitRange` + a range that is constrained to the vector's length, preventing out-of-bounds indexing + +# Example + +julia> v = [1, 2, 3, 4, 5] +julia> fitrange(v, 3:10) +3:5 + +""" +function fitrange(v::T1, range::UnitRange) where {T1<:AbstractVector} + totalelements = length(v) + + startind = + # check if user put start range greater than total event + if range.start > totalelements + totalelements + else + range.start + end + + stopind = + if range.stop > totalelements + totalelements + else + range.stop + end + + return startind:stopind +end + + +""" Find a unit range for a vector given a number of the most recent elements of interest. + +# Arguments + - `v::AbstractVector` + the input vector to extract recent elements from + - `n::Integer` + the number of most recent elements to include in the range + +# Return + - `UnitRange` + a range representing the last `n` elements of the vector + +# Example +```jldoctest +julia> a = [1, 2, 3, 4, 5] +julia> lastElementsIndex(a, 3) +3:5 +``` +""" +function lastElementsIndex(v::AbstractVector, n::Integer) + len = length(v) + if n == 0 + return 1:len + end + + start = max(1, len - n + 1) + return start:len end - - - - - end # module util \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index c00177a..5bc7609 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,23 +1,18 @@ using Test using GeneralUtils -@testset "detect_keyword tests" begin - @test GeneralUtils.detect_keyword(["test"], "this is a test string") == Dict("test" => 1) - @test GeneralUtils.detect_keyword(["hello", "world"], "hello world") == Dict("hello" => 1, "world" => 1) - @test GeneralUtils.detect_keyword(["missing"], "no keyword here") == Dict("missing" => nothing) - @test GeneralUtils.detect_keyword(["a", "b"], "a a b b b") == Dict("a" => 2, "b" => 3) - @test GeneralUtils.detect_keyword(String[], "empty keywords") == Dict{String, Any}() - @test GeneralUtils.detect_keyword(["keyword"], "") == Dict("keyword" => nothing) - @test GeneralUtils.detect_keyword(["case"], "CASE case Case cAsE") == Dict("case" => 4) - mixed_results = GeneralUtils.detect_keyword(["found", "notfound"], "found found found") - @test mixed_results["found"] == 3 - @test mixed_results["notfound"] === nothing - - special_chars = GeneralUtils.detect_keyword(["test!"], "test! test? test.") - @test special_chars["test!"] == 1 +@testset "lastElementsIndex" begin + @test GeneralUtils.lastElementsIndex([1,2,3,4,5], 3) == 3:5 + @test GeneralUtils.lastElementsIndex([1,2,3], 5) == 1:3 + @test GeneralUtils.lastElementsIndex([1], 1) == 1:1 + @test GeneralUtils.lastElementsIndex(collect(1:10), 4) == 7:10 + @test GeneralUtils.lastElementsIndex(Float64[], 2) == 1:0 + @test GeneralUtils.lastElementsIndex([1,2,3], 0) == length([1,2,3])+1:length([1,2,3]) + @test GeneralUtils.lastElementsIndex(["a","b","c"], 2) == 2:3 end + From 1da05f5cae41938f6af3c29509f4379fd737e71d Mon Sep 17 00:00:00 2001 From: narawat lamaiin Date: Mon, 31 Mar 2025 21:30:29 +0700 Subject: [PATCH 05/12] update --- src/util.jl | 65 ++++++++++++++++++++++++++++++++++++++++-------- test/runtests.jl | 17 +++++++------ 2 files changed, 63 insertions(+), 19 deletions(-) diff --git a/src/util.jl b/src/util.jl index 681a73b..f34e2f3 100644 --- a/src/util.jl +++ b/src/util.jl @@ -6,7 +6,7 @@ export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, rep dfToString, dataframe_to_json_list, dictToString, dictToString_noKey, dictToString_numbering, extract_triple_backtick_text, countGivenWords, remove_french_accents, detect_keyword, extractTextBetweenCharacter, - convertCamelSnakeKebabCase, fitrange, lastElementsIndex + convertCamelSnakeKebabCase, fitrange, recentElementsIndex, nonRecentElementsIndex using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames @@ -1182,32 +1182,75 @@ end """ Find a unit range for a vector given a number of the most recent elements of interest. # Arguments - - `v::AbstractVector` - the input vector to extract recent elements from + - `vectorLength::Integer` + the length of the vector to generate range from - `n::Integer` - the number of most recent elements to include in the range + the number of most recent elements to include in range + # Return - `UnitRange` - a range representing the last `n` elements of the vector + a range representing the n most recent elements of a vector with length vectorLength # Example ```jldoctest julia> a = [1, 2, 3, 4, 5] -julia> lastElementsIndex(a, 3) +julia> recentElementsIndex(length(a), 3) 3:5 +julia> recentElementsIndex(length(a), 0) +5:5 ``` """ -function lastElementsIndex(v::AbstractVector, n::Integer) - len = length(v) +function recentElementsIndex(vectorlength::Integer, n::Integer) if n == 0 - return 1:len + error("n must be greater than 0") end - start = max(1, len - n + 1) - return start:len + start = max(1, vectorlength - n + 1) + return start:vectorlength +end + + +""" Find a unit range for a vector excluding the most recent elements. + +# Arguments + - `vectorlength::Integer` + the length of the vector to generate range from + - `n::Integer` + the number of most recent elements to exclude from range + +# Return + - `UnitRange` + a range representing the elements of the vector excluding the last `n` elements + +# Example +```jldoctest +julia> a = [1, 2, 3, 4, 5] +julia> nonRecentElementsIndex(length(a), 3) +1:2 +julia> nonRecentElementsIndex(length(a), 1) +1:4 +julia> nonRecentElementsIndex(length(a), 0) +1:5 +``` +""" +function nonRecentElementsIndex(vectorlength::Integer, n::Integer) + if n < 0 + error("n must be non-negative") + end + if n > vectorlength + return 1:0 # empty range + end + return 1:(vectorlength-n) end + + + + + + + end # module util \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl index 5bc7609..444903e 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,14 +1,15 @@ using Test using GeneralUtils -@testset "lastElementsIndex" begin - @test GeneralUtils.lastElementsIndex([1,2,3,4,5], 3) == 3:5 - @test GeneralUtils.lastElementsIndex([1,2,3], 5) == 1:3 - @test GeneralUtils.lastElementsIndex([1], 1) == 1:1 - @test GeneralUtils.lastElementsIndex(collect(1:10), 4) == 7:10 - @test GeneralUtils.lastElementsIndex(Float64[], 2) == 1:0 - @test GeneralUtils.lastElementsIndex([1,2,3], 0) == length([1,2,3])+1:length([1,2,3]) - @test GeneralUtils.lastElementsIndex(["a","b","c"], 2) == 2:3 +@testset "ealierElementsIndex" begin + @test GeneralUtils.ealierElementsIndex([1,2,3,4,5], 2) == 1:3 + @test GeneralUtils.ealierElementsIndex([1,2,3], 0) == 1:3 + @test GeneralUtils.ealierElementsIndex([1], 1) == 1:0 + @test GeneralUtils.ealierElementsIndex([], 0) == 1:0 + @test GeneralUtils.ealierElementsIndex([1,2,3,4], 4) == 1:0 + @test GeneralUtils.ealierElementsIndex([1,2,3,4], 5) == 1:0 + @test GeneralUtils.ealierElementsIndex(collect(1:10), 3) == 1:7 + @test_throws ErrorException GeneralUtils.ealierElementsIndex([1,2,3], -1) end From a894ad85ba794e123a2625432cc4a407d88782f3 Mon Sep 17 00:00:00 2001 From: narawat lamaiin Date: Fri, 4 Apr 2025 15:04:19 +0700 Subject: [PATCH 06/12] update --- src/util.jl | 95 +++++++++++++++++++++++++++++++++++++++++++----- test/runtests.jl | 40 +++++++++++++++----- 2 files changed, 116 insertions(+), 19 deletions(-) diff --git a/src/util.jl b/src/util.jl index f34e2f3..682f6c3 100644 --- a/src/util.jl +++ b/src/util.jl @@ -770,29 +770,106 @@ function extract_triple_backtick_text(input::String)::Vector{String} end +wordwindow(word::String, startindex::Integer)::UnitRange = startindex:startindex + length(word) -1 + +function cuttext(range, text) + # check whether range is outside text boundary + if range.start > length(text) || range.stop > length(text) + return nothing + else + return text[range] + end +end + """ - detect_keyword(keywords::AbstractVector{String}, text::String) -> Dict{String, Integer} + detect_keyword(keywords::AbstractVector{String}, text::String; mode::Union{String, Nothing}=nothing, delimiter::AbstractVector=[' ', '\n', '.']) -> Dict{String, Integer} Detects and counts occurrences of multiple keywords in the text in different case variations (lowercase, uppercase first letter, or all uppercase). -# Arguments: +# Arguments - `keywords::AbstractVector{String}` Vector of keywords to search for - `text::String` The text to search in -# Returns: +# Keyword Arguments +- `mode::Union{String, Nothing}` When set to "individual", only counts matches that are individual words (default: nothing) +- `delimiter::AbstractVector` Characters used to determine word boundaries when mode="individual" (default: [' ', '\n', '.']) + +# Returns - `Dict{String, Integer}` Returns a dictionary mapping each keyword to its count in the text (0 if not found) -# Examples: +# Examples ```jldoctest - julia> detect_keyword(["test", "error", "case"], "This is a Test case with ERRORS case") - Dict{String, Integer}("test" => 1, "error" => 1, "case" => 2) + julia> detect_keyword(["test", "example"], "This is a Test EXAMPLE") + Dict{String, Integer}("test" => 1, "example" => 1) - julia> detect_keyword(["warning", "missing"], "Warning: data is complete") - Dict{String, Integer}("warning" => 1, "missing" => 0) + julia> detect_keyword(["cat"], "cats and category", mode="individual") + Dict{String, Integer}("cat" => 0) + + julia> detect_keyword(["error"], "No ERRORS found!") + Dict{String, Integer}("error" => 1) + ``` - # Signature """ +# function detect_keyword(keywords::T1, text::String; +# mode::Union{String, Nothing}=nothing, delimiter::T2=[' ', '\n', '.'] +# )::Dict{String, Integer} where {T1<:AbstractVector, T2<:AbstractVector} +# # Initialize dictionary to store keyword counts +# kwdict = Dict{String, Integer}() +# for i in keywords +# kwdict[i] = 0 +# end + +# startindex = 1 +# # Iterate through each keyword and search for matches in text +# for kw in keywords +# # Check each possible starting position in the text +# for startindex in 1:1:length(text) +# # Get the window range for current keyword at current position +# wordwindows = wordwindow(kw, startindex) +# # Extract the text slice for comparison +# cuttexts = cuttext(wordwindows, text) +# if cuttexts !== nothing +# # Try to detect keyword in current text slice +# detected_kw = detect_keyword(kw, cuttexts) +# if detected_kw !== nothing && mode === nothing +# # Increment count if keyword found and no mode restrictions +# kwdict[kw] +=1 +# elseif detected_kw !== nothing && mode === "individual" +# # For individual word mode, check word boundaries +# # Check if character before keyword is a delimiter or start of text +# checkbefore = +# if wordwindows.start > 1 && +# text[wordwindows.start-1] ∈ delimiter +# true +# elseif wordwindows.start == 1 +# true +# else +# false +# end + +# # Check if character after keyword is a delimiter or end of text +# checkafter = +# if wordwindows.stop < length(text) && +# text[wordwindows.stop+1] ∈ delimiter +# true +# elseif wordwindows.stop == length(text) +# true +# else +# false +# end +# # Only count keyword if it's a complete word +# if checkbefore && checkafter +# kwdict[kw] +=1 +# end +# end +# end +# end +# end +# return kwdict +# end + + function detect_keyword(keywords::T, text::String)::Dict{String, Integer} where {T<:AbstractVector} kw = Dict{String, Integer}() splittext = string.(split(text, " ")) diff --git a/test/runtests.jl b/test/runtests.jl index 444903e..f415525 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,15 +1,35 @@ using Test -using GeneralUtils +using GeneralUtils: detect_keyword -@testset "ealierElementsIndex" begin - @test GeneralUtils.ealierElementsIndex([1,2,3,4,5], 2) == 1:3 - @test GeneralUtils.ealierElementsIndex([1,2,3], 0) == 1:3 - @test GeneralUtils.ealierElementsIndex([1], 1) == 1:0 - @test GeneralUtils.ealierElementsIndex([], 0) == 1:0 - @test GeneralUtils.ealierElementsIndex([1,2,3,4], 4) == 1:0 - @test GeneralUtils.ealierElementsIndex([1,2,3,4], 5) == 1:0 - @test GeneralUtils.ealierElementsIndex(collect(1:10), 3) == 1:7 - @test_throws ErrorException GeneralUtils.ealierElementsIndex([1,2,3], -1) +@testset "detect_keyword tests" begin + @test detect_keyword(["test"], "this is a test") == Dict("test" => 1) + + @test detect_keyword(["hello", "world"], "hello world hello") == Dict("hello" => 2, "world" => 1) + + @test detect_keyword(["cat"], "category") == Dict("cat" => 1) + + @test detect_keyword(["cat"], "category"; mode="individual") == Dict("cat" => 0) + + @test detect_keyword(["dog"], "dogs and cats"; mode="individual", delimiter=[' ']) == Dict("dog" => 0) + + @test detect_keyword(["test"], "test.case"; mode="individual", delimiter=['.']) == Dict("test" => 1) + + @test detect_keyword(["word"], "") == Dict("word" => 0) + + @test detect_keyword(String[], "some text") == Dict{String, Integer}() + + @test detect_keyword(["a", "b"], "a.b\nc"; delimiter=['.', '\n']) == Dict("a" => 1, "b" => 1) + + multiline_text = """ + first line + second line + first word + """ + @test detect_keyword(["first"], multiline_text) == Dict("first" => 2) + + @test detect_keyword(["word"], "word"; mode="individual") == Dict("word" => 1) + + @test detect_keyword(["test"], "testing.test.tester"; mode="individual", delimiter=['.']) == Dict("test" => 1) end From ccd91a7b6f9d8855277ca6a2775eeaf6ef169b23 Mon Sep 17 00:00:00 2001 From: narawat lamaiin Date: Mon, 7 Apr 2025 05:20:05 +0700 Subject: [PATCH 07/12] update --- src/util.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/util.jl b/src/util.jl index 682f6c3..a70e985 100644 --- a/src/util.jl +++ b/src/util.jl @@ -42,6 +42,7 @@ function timedifference(starttime::DateTime, stoptime::DateTime, unit::String):: diff = stoptime - starttime unit = lowercase(unit) + # Check the unit and calculate the time difference accordingly if unit == "milliseconds" return diff.value elseif unit == "seconds" From 14766ae171b427a5e59c2479a50e8071ce2063f1 Mon Sep 17 00:00:00 2001 From: narawat lamaiin Date: Sun, 13 Apr 2025 21:45:47 +0700 Subject: [PATCH 08/12] update --- src/llmUtil.jl | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/llmUtil.jl b/src/llmUtil.jl index 53fbd29..2d2ac45 100644 --- a/src/llmUtil.jl +++ b/src/llmUtil.jl @@ -43,7 +43,7 @@ julia> formattedtext = YiemAgent.formatLLMtext_llama3instruct(d[:name], d[:text] Signature """ function formatLLMtext_llama3instruct(name::T, text::T; - assistantStarter::Bool=true) where {T<:AbstractString} + assistantStarter::Bool=false) where {T<:AbstractString} formattedtext = if name == "system" """ @@ -71,7 +71,7 @@ end function formatLLMtext_qwen(name::T, text::T; - assistantStarter::Bool=true) where {T<:AbstractString} + assistantStarter::Bool=false) where {T<:AbstractString} formattedtext = if name == "system" """ @@ -99,7 +99,7 @@ end function formatLLMtext_phi4(name::T, text::T; - assistantStarter::Bool=true) where {T<:AbstractString} + assistantStarter::Bool=false) where {T<:AbstractString} formattedtext = if name == "system" """ @@ -155,7 +155,7 @@ julia> formattedtext = YiemAgent.formatLLMtext(chatmessage, "llama3instruct") # Signature """ function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="llama3instruct" - )::String where {T<:Any} + )::String where {T<:AbstractString} f = if formatname == "llama3instruct" formatLLMtext_llama3instruct elseif formatname == "mistral" @@ -171,8 +171,12 @@ function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="ll end str = "" - for t in messages - str *= f(t[:name], t[:text]) + for (i, t) in enumerate(messages) + if i < length(messages) + str *= f(t[:name], t[:text]) + else + str *= f(t[:name], t[:text]; assistantStarter=true) + end end return str From 5108ad1f6b043de2793700a14003541b962570d0 Mon Sep 17 00:00:00 2001 From: narawat lamaiin Date: Fri, 25 Apr 2025 21:12:14 +0700 Subject: [PATCH 09/12] update --- src/llmUtil.jl | 133 +++++++++++++++++++++++++++++++++++++++++-------- src/util.jl | 6 ++- 2 files changed, 116 insertions(+), 23 deletions(-) diff --git a/src/llmUtil.jl b/src/llmUtil.jl index 2d2ac45..82a3708 100644 --- a/src/llmUtil.jl +++ b/src/llmUtil.jl @@ -1,6 +1,6 @@ module llmUtil -export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection +export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection, deFormatLLMtext using UUIDs, JSON3, Dates using GeneralUtils @@ -126,15 +126,38 @@ function formatLLMtext_phi4(name::T, text::T; end +function formatLLMtext_granite3(name::T, text::T; + assistantStarter::Bool=false) where {T<:AbstractString} + formattedtext = + if name == "system" + """ + <|start_of_role|>system<|end_of_role|>{$text}<|end_of_text|> + """ + else + """ + <|start_of_role|>$name<|end_of_role|>{$text}<|end_of_text|> + """ + end -""" Convert a chat messages in vector of dictionary into LLM model instruct format. + if assistantStarter + formattedtext *= + """ + <|start_of_role|>assistant<|end_of_role|>{ + """ + end + + return formattedtext +end + + + +""" Convert a vector of chat message dictionaries into LLM model instruct format. # Arguments - `messages::Vector{Dict{Symbol, T}}` - message owner name e.f. "system", "user" or "assistant" + A vector of dictionaries where each dictionary contains the keys `:name` (the name of the message owner) and `:text` (the text of the message). - `formatname::T` - format name to be used - + The name of the format to be used for converting the chat messages. # Return - `formattedtext::String` text formatted to model format @@ -151,24 +174,25 @@ julia> chatmessage = [ julia> formattedtext = YiemAgent.formatLLMtext(chatmessage, "llama3instruct") "<|begin_of_text|>\n <|start_header_id|>system<|end_header_id|>\n You are a helpful, respectful and honest assistant.\n <|eot_id|>\n <|start_header_id|>user<|end_header_id|>\n list me all planets in our solar system.\n <|eot_id|>\n <|start_header_id|>assistant<|end_header_id|>\n I'm sorry. I don't know. You tell me.\n <|eot_id|>\n" ``` - -# Signature """ -function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="llama3instruct" +function formatLLMtext(messages::Vector{Dict{Symbol, T}}, formatname::String )::String where {T<:AbstractString} - f = if formatname == "llama3instruct" - formatLLMtext_llama3instruct - elseif formatname == "mistral" - # not define yet - elseif formatname == "phi3instruct" - # not define yet - elseif formatname == "qwen" - formatLLMtext_qwen - elseif formatname == "phi4" - formatLLMtext_phi4 - else - error("$formatname template not define yet") - end + f = + if formatname == "llama3instruct" + formatLLMtext_llama3instruct + elseif formatname == "mistral" + # not define yet + elseif formatname == "phi3instruct" + # not define yet + elseif formatname == "qwen" + formatLLMtext_qwen + elseif formatname == "phi4" + formatLLMtext_phi4 + elseif formatname == "granite3" + formatLLMtext_granite3 + else + error("$formatname template not define yet") + end str = "" for (i, t) in enumerate(messages) @@ -182,6 +206,73 @@ function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="ll return str end +""" Revert LLM-format response back into regular text. + +# Arguments + - `text::String` + The LLM formatted string to be converted. + +# Return + - `normalText::String` + The original plain text extracted from the given LLM-formatted string. + +# Example +```jldoctest +julia> using Revise +julia> using YiemAgent +julia> response = "<|begin_of_text|>This is a sample system instruction.<|eot_id|>" +julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3") +"This is a sample system instruction." +``` +""" +function deFormatLLMtext(text::String, formatname::String + )::String + f = + if formatname == "granite3" + deFormatLLMtext_granite3 + else + error("$formatname template not define yet") + end + + r = f(text) + result = r === nothing ? text : r + return result +end + + +""" Revert LLM-format response back into regular text for Granite 3 format. + +# Arguments + - `text::String` + The LLM formatted string to be converted. + +# Return + - `normalText::Union{Nothing, String}` + The original plain text extracted from the given LLM-formatted string. + Returns nothing if the text is not in Granite 3 format. + +# Example +```jldoctest +julia> using Revise +julia> using YiemAgent +julia> response = "{This is a sample LLM response.}" +julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3") +"This is a sample LLM response." +""" +function deFormatLLMtext_granite3(text::String)::Union{Nothing, String} + # check if '{' and '}' are in the text because it's a special format for the LLM response + if contains(text, '{') && contains(text, '}') + # get the text between '{' and '}' + text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1] + return text_between_braces + elseif text[end] == '}' + text = "{$text" + text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1] + else + return nothing + end +end + """ Attemp to correct LLM response's incorrect JSON response. diff --git a/src/util.jl b/src/util.jl index a70e985..a8ddac5 100644 --- a/src/util.jl +++ b/src/util.jl @@ -1042,9 +1042,11 @@ Extracts and returns the text that is enclosed between two specified characters # Examples ```jldoctest -julia> text = "Hello [World]!" +julia> text = "Hello [World]! [Yay]" julia> extracted_text = extractTextBetweenCharacter(text, '[', ']') -println(extracted_text) # Output: "World" +2-element Vector{Any}: + "World" + "Yay" ``` """ function extractTextBetweenCharacter(text::String, startchar::Char, endchar::Char) From 150ddac2c078a5908b775a90c9b4f8f02c786159 Mon Sep 17 00:00:00 2001 From: narawat lamaiin Date: Wed, 30 Apr 2025 12:59:14 +0700 Subject: [PATCH 10/12] add extractTextBetweenString --- src/llmUtil.jl | 58 +++++++++++++++++++++++++++++++++++++++++++++++++- src/util.jl | 24 +++++++++++++++++++++ 2 files changed, 81 insertions(+), 1 deletion(-) diff --git a/src/llmUtil.jl b/src/llmUtil.jl index 82a3708..4a8a58f 100644 --- a/src/llmUtil.jl +++ b/src/llmUtil.jl @@ -98,6 +98,34 @@ function formatLLMtext_qwen(name::T, text::T; end +function formatLLMtext_qwen3(name::T, text::T; + assistantStarter::Bool=false) where {T<:AbstractString} + formattedtext = + if name == "system" + """ + <|im_start|>$name + $text + <|im_end|> + """ + else + """ + <|im_start|>$name + $text + <|im_end|> + """ + end + + if assistantStarter + formattedtext *= + """ + <|im_start|>assistant + """ + end + + return formattedtext +end + + function formatLLMtext_phi4(name::T, text::T; assistantStarter::Bool=false) where {T<:AbstractString} formattedtext = @@ -186,6 +214,8 @@ function formatLLMtext(messages::Vector{Dict{Symbol, T}}, formatname::String # not define yet elseif formatname == "qwen" formatLLMtext_qwen + elseif formatname == "qwen3" + formatLLMtext_qwen3 elseif formatname == "phi4" formatLLMtext_phi4 elseif formatname == "granite3" @@ -230,6 +260,8 @@ function deFormatLLMtext(text::String, formatname::String f = if formatname == "granite3" deFormatLLMtext_granite3 + elseif formatname == "qwen3" + deFormatLLMtext_qwen3 else error("$formatname template not define yet") end @@ -261,7 +293,7 @@ julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3") """ function deFormatLLMtext_granite3(text::String)::Union{Nothing, String} # check if '{' and '}' are in the text because it's a special format for the LLM response - if contains(text, '{') && contains(text, '}') + if contains(text, "<|im_start|>assistant") # get the text between '{' and '}' text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1] return text_between_braces @@ -274,6 +306,30 @@ function deFormatLLMtext_granite3(text::String)::Union{Nothing, String} end +function deFormatLLMtext_qwen3(text::String; includethink::Bool=false)::Union{Nothing, String} + think = nothing + str = nothing + + if occursin("", text) + r = GeneralUtils.extractTextBetweenString(text, "", "") + if r[:success] + think = r[:text] + end + str = string(split(text, "")[2]) + end + + if includethink == true && occursin("", text) + result = "ModelThought: $think $str" + return result + elseif includethink == false && occursin("", text) + result = str + return result + else + return text + end +end + + """ Attemp to correct LLM response's incorrect JSON response. # Arguments diff --git a/src/util.jl b/src/util.jl index a8ddac5..2d4498b 100644 --- a/src/util.jl +++ b/src/util.jl @@ -6,6 +6,7 @@ export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, rep dfToString, dataframe_to_json_list, dictToString, dictToString_noKey, dictToString_numbering, extract_triple_backtick_text, countGivenWords, remove_french_accents, detect_keyword, extractTextBetweenCharacter, + extractTextBetweenString, convertCamelSnakeKebabCase, fitrange, recentElementsIndex, nonRecentElementsIndex using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames @@ -1070,6 +1071,29 @@ function extractTextBetweenCharacter(text::String, startchar::Char, endchar::Cha end +function extractTextBetweenString(text::String, startstr::String, endstr::String) + # check whether startstr is in the text or not + isStartStr = split(text, startstr) + if length(isStartStr) > 2 + return (success=false, error="There are more than one occurrences of the start string '$startstr' in the text. Text must has only one start string", errorcode=2, result=nothing) + elseif length(isStartStr) == 1 + return (success=false, error="There are no start string '$startstr' in the text. Text must has only one start string", errorcode=1, result=nothing) + end + + # check whether endstr is in the text or not + isEndStr = split(text, endstr) + if length(isEndStr) > 2 + return (success=false, error="There are more than one occurrences of the end string '$endstr' in the text. Text must has only one end string", errorcode=3, result=nothing) + elseif length(isStartStr) == 1 + return (success=false, error="There are no end string '$endstr' in the text. Text must has only one end string", errorcode=4, result=nothing) + end + + s = string(split(isStartStr[2], endstr)[1]) + + return (success=true, error=nothing, errorcode=0, text=s) +end + + """ Determines if the given string follows camel case naming convention. From d8ea4b70a91325a45ce2c7333d12f6d2f969b55d Mon Sep 17 00:00:00 2001 From: narawat lamaiin Date: Sun, 4 May 2025 20:56:36 +0700 Subject: [PATCH 11/12] update --- src/llmUtil.jl | 65 +++++++++++++++++++++++++++++++------------------- src/util.jl | 12 +++++++--- 2 files changed, 50 insertions(+), 27 deletions(-) diff --git a/src/llmUtil.jl b/src/llmUtil.jl index 4a8a58f..a021dd9 100644 --- a/src/llmUtil.jl +++ b/src/llmUtil.jl @@ -1,6 +1,6 @@ module llmUtil -export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection, deFormatLLMtext +export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection, deFormatLLMtext, extractthink using UUIDs, JSON3, Dates using GeneralUtils @@ -255,7 +255,7 @@ julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3") "This is a sample system instruction." ``` """ -function deFormatLLMtext(text::String, formatname::String +function deFormatLLMtext(text::String, formatname::String; includethink::Bool=false )::String f = if formatname == "granite3" @@ -306,29 +306,33 @@ function deFormatLLMtext_granite3(text::String)::Union{Nothing, String} end -function deFormatLLMtext_qwen3(text::String; includethink::Bool=false)::Union{Nothing, String} - think = nothing - str = nothing - - if occursin("", text) - r = GeneralUtils.extractTextBetweenString(text, "", "") - if r[:success] - think = r[:text] - end - str = string(split(text, "")[2]) - end - - if includethink == true && occursin("", text) - result = "ModelThought: $think $str" - return result - elseif includethink == false && occursin("", text) - result = str - return result - else - return text - end +function deFormatLLMtext_qwen3(text::String)::Union{Nothing, String} + return text end +# function deFormatLLMtext_qwen3(text::String; includethink::Bool=false)::Union{Nothing, String} +# think = nothing +# str = nothing + +# if occursin("", text) +# r = GeneralUtils.extractTextBetweenString(text, "", "") +# if r[:success] +# think = r[:text] +# end +# str = string(split(text, "")[2]) +# end + +# if includethink == true && occursin("", text) +# result = "ModelThought: $think $str" +# return result +# elseif includethink == false && occursin("", text) +# result = str +# return result +# else +# return text +# end +# end + """ Attemp to correct LLM response's incorrect JSON response. @@ -419,7 +423,20 @@ function jsoncorrection(config::T1, input::T2, correctJsonExample::T3; end - +function extractthink(text::String) + think = nothing + str = nothing + if occursin("", text) + r = GeneralUtils.extractTextBetweenString(text, "", "") + if r[:success] + think = r[:text] + end + str = string(split(text, "")[2]) + else + str = text + end + return think, str +end diff --git a/src/util.jl b/src/util.jl index 2d4498b..def7c27 100644 --- a/src/util.jl +++ b/src/util.jl @@ -1305,13 +1305,19 @@ julia> recentElementsIndex(length(a), 0) 5:5 ``` """ -function recentElementsIndex(vectorlength::Integer, n::Integer) +function recentElementsIndex(vectorlength::Integer, n::Integer; includelatest::Bool=false) if n == 0 error("n must be greater than 0") end - start = max(1, vectorlength - n + 1) - return start:vectorlength + if includelatest + start = max(1, vectorlength - n + 1) + return start:vectorlength + else + startind = max(1, vectorlength - n) + endind = vectorlength -1 + return startind:endind + end end From 9e29f611dfb82c2b3a9edc2080ed5797a5c452b2 Mon Sep 17 00:00:00 2001 From: narawat lamaiin Date: Sat, 24 May 2025 08:42:50 +0700 Subject: [PATCH 12/12] update --- src/util.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/util.jl b/src/util.jl index def7c27..3dabd31 100644 --- a/src/util.jl +++ b/src/util.jl @@ -308,7 +308,8 @@ function textToDict(text::String, detectKeywords::Vector{String}; dictKey_ = reverse(dictKey) # process text from back to front - for (i,keyword) in enumerate(reverse(kw)) + rkw = reverse(kw) + for (i,keyword) in enumerate(rkw) # Find the position of the keyword in the text keywordidx = findlast(keyword, remainingtext) dKey = dictKey_[i]