add extractTextBetweenString
This commit is contained in:
@@ -98,6 +98,34 @@ function formatLLMtext_qwen(name::T, text::T;
|
|||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function formatLLMtext_qwen3(name::T, text::T;
|
||||||
|
assistantStarter::Bool=false) where {T<:AbstractString}
|
||||||
|
formattedtext =
|
||||||
|
if name == "system"
|
||||||
|
"""
|
||||||
|
<|im_start|>$name
|
||||||
|
$text
|
||||||
|
<|im_end|>
|
||||||
|
"""
|
||||||
|
else
|
||||||
|
"""
|
||||||
|
<|im_start|>$name
|
||||||
|
$text
|
||||||
|
<|im_end|>
|
||||||
|
"""
|
||||||
|
end
|
||||||
|
|
||||||
|
if assistantStarter
|
||||||
|
formattedtext *=
|
||||||
|
"""
|
||||||
|
<|im_start|>assistant
|
||||||
|
"""
|
||||||
|
end
|
||||||
|
|
||||||
|
return formattedtext
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
function formatLLMtext_phi4(name::T, text::T;
|
function formatLLMtext_phi4(name::T, text::T;
|
||||||
assistantStarter::Bool=false) where {T<:AbstractString}
|
assistantStarter::Bool=false) where {T<:AbstractString}
|
||||||
formattedtext =
|
formattedtext =
|
||||||
@@ -186,6 +214,8 @@ function formatLLMtext(messages::Vector{Dict{Symbol, T}}, formatname::String
|
|||||||
# not define yet
|
# not define yet
|
||||||
elseif formatname == "qwen"
|
elseif formatname == "qwen"
|
||||||
formatLLMtext_qwen
|
formatLLMtext_qwen
|
||||||
|
elseif formatname == "qwen3"
|
||||||
|
formatLLMtext_qwen3
|
||||||
elseif formatname == "phi4"
|
elseif formatname == "phi4"
|
||||||
formatLLMtext_phi4
|
formatLLMtext_phi4
|
||||||
elseif formatname == "granite3"
|
elseif formatname == "granite3"
|
||||||
@@ -230,6 +260,8 @@ function deFormatLLMtext(text::String, formatname::String
|
|||||||
f =
|
f =
|
||||||
if formatname == "granite3"
|
if formatname == "granite3"
|
||||||
deFormatLLMtext_granite3
|
deFormatLLMtext_granite3
|
||||||
|
elseif formatname == "qwen3"
|
||||||
|
deFormatLLMtext_qwen3
|
||||||
else
|
else
|
||||||
error("$formatname template not define yet")
|
error("$formatname template not define yet")
|
||||||
end
|
end
|
||||||
@@ -261,7 +293,7 @@ julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3")
|
|||||||
"""
|
"""
|
||||||
function deFormatLLMtext_granite3(text::String)::Union{Nothing, String}
|
function deFormatLLMtext_granite3(text::String)::Union{Nothing, String}
|
||||||
# check if '{' and '}' are in the text because it's a special format for the LLM response
|
# check if '{' and '}' are in the text because it's a special format for the LLM response
|
||||||
if contains(text, '{') && contains(text, '}')
|
if contains(text, "<|im_start|>assistant")
|
||||||
# get the text between '{' and '}'
|
# get the text between '{' and '}'
|
||||||
text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1]
|
text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1]
|
||||||
return text_between_braces
|
return text_between_braces
|
||||||
@@ -274,6 +306,30 @@ function deFormatLLMtext_granite3(text::String)::Union{Nothing, String}
|
|||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function deFormatLLMtext_qwen3(text::String; includethink::Bool=false)::Union{Nothing, String}
|
||||||
|
think = nothing
|
||||||
|
str = nothing
|
||||||
|
|
||||||
|
if occursin("<think>", text)
|
||||||
|
r = GeneralUtils.extractTextBetweenString(text, "<think>", "</think>")
|
||||||
|
if r[:success]
|
||||||
|
think = r[:text]
|
||||||
|
end
|
||||||
|
str = string(split(text, "</think>")[2])
|
||||||
|
end
|
||||||
|
|
||||||
|
if includethink == true && occursin("<think>", text)
|
||||||
|
result = "ModelThought: $think $str"
|
||||||
|
return result
|
||||||
|
elseif includethink == false && occursin("<think>", text)
|
||||||
|
result = str
|
||||||
|
return result
|
||||||
|
else
|
||||||
|
return text
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
""" Attemp to correct LLM response's incorrect JSON response.
|
""" Attemp to correct LLM response's incorrect JSON response.
|
||||||
|
|
||||||
# Arguments
|
# Arguments
|
||||||
|
|||||||
24
src/util.jl
24
src/util.jl
@@ -6,6 +6,7 @@ export timedifference, showstracktrace, findHighestIndexKey, uuid4snakecase, rep
|
|||||||
dfToString, dataframe_to_json_list, dictToString, dictToString_noKey,
|
dfToString, dataframe_to_json_list, dictToString, dictToString_noKey,
|
||||||
dictToString_numbering, extract_triple_backtick_text,
|
dictToString_numbering, extract_triple_backtick_text,
|
||||||
countGivenWords, remove_french_accents, detect_keyword, extractTextBetweenCharacter,
|
countGivenWords, remove_french_accents, detect_keyword, extractTextBetweenCharacter,
|
||||||
|
extractTextBetweenString,
|
||||||
convertCamelSnakeKebabCase, fitrange, recentElementsIndex, nonRecentElementsIndex
|
convertCamelSnakeKebabCase, fitrange, recentElementsIndex, nonRecentElementsIndex
|
||||||
|
|
||||||
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames
|
using JSON3, DataStructures, Distributions, Random, Dates, UUIDs, MQTTClient, DataFrames
|
||||||
@@ -1070,6 +1071,29 @@ function extractTextBetweenCharacter(text::String, startchar::Char, endchar::Cha
|
|||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function extractTextBetweenString(text::String, startstr::String, endstr::String)
|
||||||
|
# check whether startstr is in the text or not
|
||||||
|
isStartStr = split(text, startstr)
|
||||||
|
if length(isStartStr) > 2
|
||||||
|
return (success=false, error="There are more than one occurrences of the start string '$startstr' in the text. Text must has only one start string", errorcode=2, result=nothing)
|
||||||
|
elseif length(isStartStr) == 1
|
||||||
|
return (success=false, error="There are no start string '$startstr' in the text. Text must has only one start string", errorcode=1, result=nothing)
|
||||||
|
end
|
||||||
|
|
||||||
|
# check whether endstr is in the text or not
|
||||||
|
isEndStr = split(text, endstr)
|
||||||
|
if length(isEndStr) > 2
|
||||||
|
return (success=false, error="There are more than one occurrences of the end string '$endstr' in the text. Text must has only one end string", errorcode=3, result=nothing)
|
||||||
|
elseif length(isStartStr) == 1
|
||||||
|
return (success=false, error="There are no end string '$endstr' in the text. Text must has only one end string", errorcode=4, result=nothing)
|
||||||
|
end
|
||||||
|
|
||||||
|
s = string(split(isStartStr[2], endstr)[1])
|
||||||
|
|
||||||
|
return (success=true, error=nothing, errorcode=0, text=s)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Determines if the given string follows camel case naming convention.
|
Determines if the given string follows camel case naming convention.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user