This commit is contained in:
narawat lamaiin
2025-04-25 21:12:14 +07:00
parent 14766ae171
commit 5108ad1f6b
2 changed files with 116 additions and 23 deletions

View File

@@ -1,6 +1,6 @@
module llmUtil
export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection
export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection, deFormatLLMtext
using UUIDs, JSON3, Dates
using GeneralUtils
@@ -126,15 +126,38 @@ function formatLLMtext_phi4(name::T, text::T;
end
function formatLLMtext_granite3(name::T, text::T;
assistantStarter::Bool=false) where {T<:AbstractString}
formattedtext =
if name == "system"
"""
<|start_of_role|>system<|end_of_role|>{$text}<|end_of_text|>
"""
else
"""
<|start_of_role|>$name<|end_of_role|>{$text}<|end_of_text|>
"""
end
""" Convert a chat messages in vector of dictionary into LLM model instruct format.
if assistantStarter
formattedtext *=
"""
<|start_of_role|>assistant<|end_of_role|>{
"""
end
return formattedtext
end
""" Convert a vector of chat message dictionaries into LLM model instruct format.
# Arguments
- `messages::Vector{Dict{Symbol, T}}`
message owner name e.f. "system", "user" or "assistant"
A vector of dictionaries where each dictionary contains the keys `:name` (the name of the message owner) and `:text` (the text of the message).
- `formatname::T`
format name to be used
The name of the format to be used for converting the chat messages.
# Return
- `formattedtext::String`
text formatted to model format
@@ -151,12 +174,11 @@ julia> chatmessage = [
julia> formattedtext = YiemAgent.formatLLMtext(chatmessage, "llama3instruct")
"<|begin_of_text|>\n <|start_header_id|>system<|end_header_id|>\n You are a helpful, respectful and honest assistant.\n <|eot_id|>\n <|start_header_id|>user<|end_header_id|>\n list me all planets in our solar system.\n <|eot_id|>\n <|start_header_id|>assistant<|end_header_id|>\n I'm sorry. I don't know. You tell me.\n <|eot_id|>\n"
```
# Signature
"""
function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="llama3instruct"
function formatLLMtext(messages::Vector{Dict{Symbol, T}}, formatname::String
)::String where {T<:AbstractString}
f = if formatname == "llama3instruct"
f =
if formatname == "llama3instruct"
formatLLMtext_llama3instruct
elseif formatname == "mistral"
# not define yet
@@ -166,6 +188,8 @@ function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="ll
formatLLMtext_qwen
elseif formatname == "phi4"
formatLLMtext_phi4
elseif formatname == "granite3"
formatLLMtext_granite3
else
error("$formatname template not define yet")
end
@@ -182,6 +206,73 @@ function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="ll
return str
end
""" Revert LLM-format response back into regular text.
# Arguments
- `text::String`
The LLM formatted string to be converted.
# Return
- `normalText::String`
The original plain text extracted from the given LLM-formatted string.
# Example
```jldoctest
julia> using Revise
julia> using YiemAgent
julia> response = "<|begin_of_text|>This is a sample system instruction.<|eot_id|>"
julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3")
"This is a sample system instruction."
```
"""
function deFormatLLMtext(text::String, formatname::String
)::String
f =
if formatname == "granite3"
deFormatLLMtext_granite3
else
error("$formatname template not define yet")
end
r = f(text)
result = r === nothing ? text : r
return result
end
""" Revert LLM-format response back into regular text for Granite 3 format.
# Arguments
- `text::String`
The LLM formatted string to be converted.
# Return
- `normalText::Union{Nothing, String}`
The original plain text extracted from the given LLM-formatted string.
Returns nothing if the text is not in Granite 3 format.
# Example
```jldoctest
julia> using Revise
julia> using YiemAgent
julia> response = "{This is a sample LLM response.}"
julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3")
"This is a sample LLM response."
"""
function deFormatLLMtext_granite3(text::String)::Union{Nothing, String}
# check if '{' and '}' are in the text because it's a special format for the LLM response
if contains(text, '{') && contains(text, '}')
# get the text between '{' and '}'
text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1]
return text_between_braces
elseif text[end] == '}'
text = "{$text"
text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1]
else
return nothing
end
end
""" Attemp to correct LLM response's incorrect JSON response.

View File

@@ -1042,9 +1042,11 @@ Extracts and returns the text that is enclosed between two specified characters
# Examples
```jldoctest
julia> text = "Hello [World]!"
julia> text = "Hello [World]! [Yay]"
julia> extracted_text = extractTextBetweenCharacter(text, '[', ']')
println(extracted_text) # Output: "World"
2-element Vector{Any}:
"World"
"Yay"
```
"""
function extractTextBetweenCharacter(text::String, startchar::Char, endchar::Char)