add extractTextBetweenString

This commit is contained in:
narawat lamaiin
2025-04-30 12:59:14 +07:00
parent 5108ad1f6b
commit 150ddac2c0
2 changed files with 81 additions and 1 deletions

View File

@@ -98,6 +98,34 @@ function formatLLMtext_qwen(name::T, text::T;
end end
function formatLLMtext_qwen3(name::T, text::T;
assistantStarter::Bool=false) where {T<:AbstractString}
formattedtext =
if name == "system"
"""
<|im_start|>$name
$text
<|im_end|>
"""
else
"""
<|im_start|>$name
$text
<|im_end|>
"""
end
if assistantStarter
formattedtext *=
"""
<|im_start|>assistant
"""
end
return formattedtext
end
function formatLLMtext_phi4(name::T, text::T; function formatLLMtext_phi4(name::T, text::T;
assistantStarter::Bool=false) where {T<:AbstractString} assistantStarter::Bool=false) where {T<:AbstractString}
formattedtext = formattedtext =
@@ -186,6 +214,8 @@ function formatLLMtext(messages::Vector{Dict{Symbol, T}}, formatname::String
# not define yet # not define yet
elseif formatname == "qwen" elseif formatname == "qwen"
formatLLMtext_qwen formatLLMtext_qwen
elseif formatname == "qwen3"
formatLLMtext_qwen3
elseif formatname == "phi4" elseif formatname == "phi4"
formatLLMtext_phi4 formatLLMtext_phi4
elseif formatname == "granite3" elseif formatname == "granite3"
@@ -230,6 +260,8 @@ function deFormatLLMtext(text::String, formatname::String
f = f =
if formatname == "granite3" if formatname == "granite3"
deFormatLLMtext_granite3 deFormatLLMtext_granite3
elseif formatname == "qwen3"
deFormatLLMtext_qwen3
else else
error("$formatname template not define yet") error("$formatname template not define yet")
end end
@@ -261,7 +293,7 @@ julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3")
""" """
function deFormatLLMtext_granite3(text::String)::Union{Nothing, String} function deFormatLLMtext_granite3(text::String)::Union{Nothing, String}
# check if '{' and '}' are in the text because it's a special format for the LLM response # check if '{' and '}' are in the text because it's a special format for the LLM response
if contains(text, '{') && contains(text, '}') if contains(text, "<|im_start|>assistant")
# get the text between '{' and '}' # get the text between '{' and '}'
text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1] text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1]
return text_between_braces return text_between_braces
@@ -274,6 +306,30 @@ function deFormatLLMtext_granite3(text::String)::Union{Nothing, String}
end end
function deFormatLLMtext_qwen3(text::String; includethink::Bool=false)::Union{Nothing, String}
think = nothing
str = nothing
if occursin("<think>", text)
r = GeneralUtils.extractTextBetweenString(text, "<think>", "</think>")
if r[:success]
think = r[:text]
end
str = string(split(text, "</think>")[2])
end
if includethink == true && occursin("<think>", text)
result = "ModelThought: $think $str"
return result
elseif includethink == false && occursin("<think>", text)
result = str
return result
else
return text
end
end
""" Attemp to correct LLM response's incorrect JSON response. """ Attemp to correct LLM response's incorrect JSON response.
# Arguments # Arguments

View File

@@ -6,6 +6,7 @@ export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, rep
dfToString, dataframe_to_json_list, dictToString, dictToString_noKey, dfToString, dataframe_to_json_list, dictToString, dictToString_noKey,
dictToString_numbering, extract_triple_backtick_text, dictToString_numbering, extract_triple_backtick_text,
countGivenWords, remove_french_accents, detect_keyword, extractTextBetweenCharacter, countGivenWords, remove_french_accents, detect_keyword, extractTextBetweenCharacter,
extractTextBetweenString,
convertCamelSnakeKebabCase, fitrange, recentElementsIndex, nonRecentElementsIndex convertCamelSnakeKebabCase, fitrange, recentElementsIndex, nonRecentElementsIndex
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames
@@ -1070,6 +1071,29 @@ function extractTextBetweenCharacter(text::String, startchar::Char, endchar::Cha
end end
function extractTextBetweenString(text::String, startstr::String, endstr::String)
# check whether startstr is in the text or not
isStartStr = split(text, startstr)
if length(isStartStr) > 2
return (success=false, error="There are more than one occurrences of the start string '$startstr' in the text. Text must has only one start string", errorcode=2, result=nothing)
elseif length(isStartStr) == 1
return (success=false, error="There are no start string '$startstr' in the text. Text must has only one start string", errorcode=1, result=nothing)
end
# check whether endstr is in the text or not
isEndStr = split(text, endstr)
if length(isEndStr) > 2
return (success=false, error="There are more than one occurrences of the end string '$endstr' in the text. Text must has only one end string", errorcode=3, result=nothing)
elseif length(isStartStr) == 1
return (success=false, error="There are no end string '$endstr' in the text. Text must has only one end string", errorcode=4, result=nothing)
end
s = string(split(isStartStr[2], endstr)[1])
return (success=true, error=nothing, errorcode=0, text=s)
end
""" """
Determines if the given string follows camel case naming convention. Determines if the given string follows camel case naming convention.