update
This commit is contained in:
133
src/llmUtil.jl
133
src/llmUtil.jl
@@ -1,6 +1,6 @@
|
|||||||
module llmUtil
|
module llmUtil
|
||||||
|
|
||||||
export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection
|
export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection, deFormatLLMtext
|
||||||
|
|
||||||
using UUIDs, JSON3, Dates
|
using UUIDs, JSON3, Dates
|
||||||
using GeneralUtils
|
using GeneralUtils
|
||||||
@@ -126,15 +126,38 @@ function formatLLMtext_phi4(name::T, text::T;
|
|||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function formatLLMtext_granite3(name::T, text::T;
|
||||||
|
assistantStarter::Bool=false) where {T<:AbstractString}
|
||||||
|
formattedtext =
|
||||||
|
if name == "system"
|
||||||
|
"""
|
||||||
|
<|start_of_role|>system<|end_of_role|>{$text}<|end_of_text|>
|
||||||
|
"""
|
||||||
|
else
|
||||||
|
"""
|
||||||
|
<|start_of_role|>$name<|end_of_role|>{$text}<|end_of_text|>
|
||||||
|
"""
|
||||||
|
end
|
||||||
|
|
||||||
""" Convert a chat messages in vector of dictionary into LLM model instruct format.
|
if assistantStarter
|
||||||
|
formattedtext *=
|
||||||
|
"""
|
||||||
|
<|start_of_role|>assistant<|end_of_role|>{
|
||||||
|
"""
|
||||||
|
end
|
||||||
|
|
||||||
|
return formattedtext
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
""" Convert a vector of chat message dictionaries into LLM model instruct format.
|
||||||
|
|
||||||
# Arguments
|
# Arguments
|
||||||
- `messages::Vector{Dict{Symbol, T}}`
|
- `messages::Vector{Dict{Symbol, T}}`
|
||||||
message owner name e.f. "system", "user" or "assistant"
|
A vector of dictionaries where each dictionary contains the keys `:name` (the name of the message owner) and `:text` (the text of the message).
|
||||||
- `formatname::T`
|
- `formatname::T`
|
||||||
format name to be used
|
The name of the format to be used for converting the chat messages.
|
||||||
|
|
||||||
# Return
|
# Return
|
||||||
- `formattedtext::String`
|
- `formattedtext::String`
|
||||||
text formatted to model format
|
text formatted to model format
|
||||||
@@ -151,24 +174,25 @@ julia> chatmessage = [
|
|||||||
julia> formattedtext = YiemAgent.formatLLMtext(chatmessage, "llama3instruct")
|
julia> formattedtext = YiemAgent.formatLLMtext(chatmessage, "llama3instruct")
|
||||||
"<|begin_of_text|>\n <|start_header_id|>system<|end_header_id|>\n You are a helpful, respectful and honest assistant.\n <|eot_id|>\n <|start_header_id|>user<|end_header_id|>\n list me all planets in our solar system.\n <|eot_id|>\n <|start_header_id|>assistant<|end_header_id|>\n I'm sorry. I don't know. You tell me.\n <|eot_id|>\n"
|
"<|begin_of_text|>\n <|start_header_id|>system<|end_header_id|>\n You are a helpful, respectful and honest assistant.\n <|eot_id|>\n <|start_header_id|>user<|end_header_id|>\n list me all planets in our solar system.\n <|eot_id|>\n <|start_header_id|>assistant<|end_header_id|>\n I'm sorry. I don't know. You tell me.\n <|eot_id|>\n"
|
||||||
```
|
```
|
||||||
|
|
||||||
# Signature
|
|
||||||
"""
|
"""
|
||||||
function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="llama3instruct"
|
function formatLLMtext(messages::Vector{Dict{Symbol, T}}, formatname::String
|
||||||
)::String where {T<:AbstractString}
|
)::String where {T<:AbstractString}
|
||||||
f = if formatname == "llama3instruct"
|
f =
|
||||||
formatLLMtext_llama3instruct
|
if formatname == "llama3instruct"
|
||||||
elseif formatname == "mistral"
|
formatLLMtext_llama3instruct
|
||||||
# not define yet
|
elseif formatname == "mistral"
|
||||||
elseif formatname == "phi3instruct"
|
# not define yet
|
||||||
# not define yet
|
elseif formatname == "phi3instruct"
|
||||||
elseif formatname == "qwen"
|
# not define yet
|
||||||
formatLLMtext_qwen
|
elseif formatname == "qwen"
|
||||||
elseif formatname == "phi4"
|
formatLLMtext_qwen
|
||||||
formatLLMtext_phi4
|
elseif formatname == "phi4"
|
||||||
else
|
formatLLMtext_phi4
|
||||||
error("$formatname template not define yet")
|
elseif formatname == "granite3"
|
||||||
end
|
formatLLMtext_granite3
|
||||||
|
else
|
||||||
|
error("$formatname template not define yet")
|
||||||
|
end
|
||||||
|
|
||||||
str = ""
|
str = ""
|
||||||
for (i, t) in enumerate(messages)
|
for (i, t) in enumerate(messages)
|
||||||
@@ -182,6 +206,73 @@ function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="ll
|
|||||||
return str
|
return str
|
||||||
end
|
end
|
||||||
|
|
||||||
|
""" Revert LLM-format response back into regular text.
|
||||||
|
|
||||||
|
# Arguments
|
||||||
|
- `text::String`
|
||||||
|
The LLM formatted string to be converted.
|
||||||
|
|
||||||
|
# Return
|
||||||
|
- `normalText::String`
|
||||||
|
The original plain text extracted from the given LLM-formatted string.
|
||||||
|
|
||||||
|
# Example
|
||||||
|
```jldoctest
|
||||||
|
julia> using Revise
|
||||||
|
julia> using YiemAgent
|
||||||
|
julia> response = "<|begin_of_text|>This is a sample system instruction.<|eot_id|>"
|
||||||
|
julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3")
|
||||||
|
"This is a sample system instruction."
|
||||||
|
```
|
||||||
|
"""
|
||||||
|
function deFormatLLMtext(text::String, formatname::String
|
||||||
|
)::String
|
||||||
|
f =
|
||||||
|
if formatname == "granite3"
|
||||||
|
deFormatLLMtext_granite3
|
||||||
|
else
|
||||||
|
error("$formatname template not define yet")
|
||||||
|
end
|
||||||
|
|
||||||
|
r = f(text)
|
||||||
|
result = r === nothing ? text : r
|
||||||
|
return result
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
""" Revert LLM-format response back into regular text for Granite 3 format.
|
||||||
|
|
||||||
|
# Arguments
|
||||||
|
- `text::String`
|
||||||
|
The LLM formatted string to be converted.
|
||||||
|
|
||||||
|
# Return
|
||||||
|
- `normalText::Union{Nothing, String}`
|
||||||
|
The original plain text extracted from the given LLM-formatted string.
|
||||||
|
Returns nothing if the text is not in Granite 3 format.
|
||||||
|
|
||||||
|
# Example
|
||||||
|
```jldoctest
|
||||||
|
julia> using Revise
|
||||||
|
julia> using YiemAgent
|
||||||
|
julia> response = "{This is a sample LLM response.}"
|
||||||
|
julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3")
|
||||||
|
"This is a sample LLM response."
|
||||||
|
"""
|
||||||
|
function deFormatLLMtext_granite3(text::String)::Union{Nothing, String}
|
||||||
|
# check if '{' and '}' are in the text because it's a special format for the LLM response
|
||||||
|
if contains(text, '{') && contains(text, '}')
|
||||||
|
# get the text between '{' and '}'
|
||||||
|
text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1]
|
||||||
|
return text_between_braces
|
||||||
|
elseif text[end] == '}'
|
||||||
|
text = "{$text"
|
||||||
|
text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1]
|
||||||
|
else
|
||||||
|
return nothing
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
""" Attemp to correct LLM response's incorrect JSON response.
|
""" Attemp to correct LLM response's incorrect JSON response.
|
||||||
|
|
||||||
|
|||||||
@@ -1042,9 +1042,11 @@ Extracts and returns the text that is enclosed between two specified characters
|
|||||||
|
|
||||||
# Examples
|
# Examples
|
||||||
```jldoctest
|
```jldoctest
|
||||||
julia> text = "Hello [World]!"
|
julia> text = "Hello [World]! [Yay]"
|
||||||
julia> extracted_text = extractTextBetweenCharacter(text, '[', ']')
|
julia> extracted_text = extractTextBetweenCharacter(text, '[', ']')
|
||||||
println(extracted_text) # Output: "World"
|
2-element Vector{Any}:
|
||||||
|
"World"
|
||||||
|
"Yay"
|
||||||
```
|
```
|
||||||
"""
|
"""
|
||||||
function extractTextBetweenCharacter(text::String, startchar::Char, endchar::Char)
|
function extractTextBetweenCharacter(text::String, startchar::Char, endchar::Char)
|
||||||
|
|||||||
Reference in New Issue
Block a user