update
This commit is contained in:
133
src/llmUtil.jl
133
src/llmUtil.jl
@@ -1,6 +1,6 @@
|
||||
module llmUtil
|
||||
|
||||
export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection
|
||||
export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection, deFormatLLMtext
|
||||
|
||||
using UUIDs, JSON3, Dates
|
||||
using GeneralUtils
|
||||
@@ -126,15 +126,38 @@ function formatLLMtext_phi4(name::T, text::T;
|
||||
end
|
||||
|
||||
|
||||
function formatLLMtext_granite3(name::T, text::T;
|
||||
assistantStarter::Bool=false) where {T<:AbstractString}
|
||||
formattedtext =
|
||||
if name == "system"
|
||||
"""
|
||||
<|start_of_role|>system<|end_of_role|>{$text}<|end_of_text|>
|
||||
"""
|
||||
else
|
||||
"""
|
||||
<|start_of_role|>$name<|end_of_role|>{$text}<|end_of_text|>
|
||||
"""
|
||||
end
|
||||
|
||||
""" Convert a chat messages in vector of dictionary into LLM model instruct format.
|
||||
if assistantStarter
|
||||
formattedtext *=
|
||||
"""
|
||||
<|start_of_role|>assistant<|end_of_role|>{
|
||||
"""
|
||||
end
|
||||
|
||||
return formattedtext
|
||||
end
|
||||
|
||||
|
||||
|
||||
""" Convert a vector of chat message dictionaries into LLM model instruct format.
|
||||
|
||||
# Arguments
|
||||
- `messages::Vector{Dict{Symbol, T}}`
|
||||
message owner name e.f. "system", "user" or "assistant"
|
||||
A vector of dictionaries where each dictionary contains the keys `:name` (the name of the message owner) and `:text` (the text of the message).
|
||||
- `formatname::T`
|
||||
format name to be used
|
||||
|
||||
The name of the format to be used for converting the chat messages.
|
||||
# Return
|
||||
- `formattedtext::String`
|
||||
text formatted to model format
|
||||
@@ -151,24 +174,25 @@ julia> chatmessage = [
|
||||
julia> formattedtext = YiemAgent.formatLLMtext(chatmessage, "llama3instruct")
|
||||
"<|begin_of_text|>\n <|start_header_id|>system<|end_header_id|>\n You are a helpful, respectful and honest assistant.\n <|eot_id|>\n <|start_header_id|>user<|end_header_id|>\n list me all planets in our solar system.\n <|eot_id|>\n <|start_header_id|>assistant<|end_header_id|>\n I'm sorry. I don't know. You tell me.\n <|eot_id|>\n"
|
||||
```
|
||||
|
||||
# Signature
|
||||
"""
|
||||
function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="llama3instruct"
|
||||
function formatLLMtext(messages::Vector{Dict{Symbol, T}}, formatname::String
|
||||
)::String where {T<:AbstractString}
|
||||
f = if formatname == "llama3instruct"
|
||||
formatLLMtext_llama3instruct
|
||||
elseif formatname == "mistral"
|
||||
# not define yet
|
||||
elseif formatname == "phi3instruct"
|
||||
# not define yet
|
||||
elseif formatname == "qwen"
|
||||
formatLLMtext_qwen
|
||||
elseif formatname == "phi4"
|
||||
formatLLMtext_phi4
|
||||
else
|
||||
error("$formatname template not define yet")
|
||||
end
|
||||
f =
|
||||
if formatname == "llama3instruct"
|
||||
formatLLMtext_llama3instruct
|
||||
elseif formatname == "mistral"
|
||||
# not define yet
|
||||
elseif formatname == "phi3instruct"
|
||||
# not define yet
|
||||
elseif formatname == "qwen"
|
||||
formatLLMtext_qwen
|
||||
elseif formatname == "phi4"
|
||||
formatLLMtext_phi4
|
||||
elseif formatname == "granite3"
|
||||
formatLLMtext_granite3
|
||||
else
|
||||
error("$formatname template not define yet")
|
||||
end
|
||||
|
||||
str = ""
|
||||
for (i, t) in enumerate(messages)
|
||||
@@ -182,6 +206,73 @@ function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="ll
|
||||
return str
|
||||
end
|
||||
|
||||
""" Revert LLM-format response back into regular text.
|
||||
|
||||
# Arguments
|
||||
- `text::String`
|
||||
The LLM formatted string to be converted.
|
||||
|
||||
# Return
|
||||
- `normalText::String`
|
||||
The original plain text extracted from the given LLM-formatted string.
|
||||
|
||||
# Example
|
||||
```jldoctest
|
||||
julia> using Revise
|
||||
julia> using YiemAgent
|
||||
julia> response = "<|begin_of_text|>This is a sample system instruction.<|eot_id|>"
|
||||
julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3")
|
||||
"This is a sample system instruction."
|
||||
```
|
||||
"""
|
||||
function deFormatLLMtext(text::String, formatname::String
|
||||
)::String
|
||||
f =
|
||||
if formatname == "granite3"
|
||||
deFormatLLMtext_granite3
|
||||
else
|
||||
error("$formatname template not define yet")
|
||||
end
|
||||
|
||||
r = f(text)
|
||||
result = r === nothing ? text : r
|
||||
return result
|
||||
end
|
||||
|
||||
|
||||
""" Revert LLM-format response back into regular text for Granite 3 format.
|
||||
|
||||
# Arguments
|
||||
- `text::String`
|
||||
The LLM formatted string to be converted.
|
||||
|
||||
# Return
|
||||
- `normalText::Union{Nothing, String}`
|
||||
The original plain text extracted from the given LLM-formatted string.
|
||||
Returns nothing if the text is not in Granite 3 format.
|
||||
|
||||
# Example
|
||||
```jldoctest
|
||||
julia> using Revise
|
||||
julia> using YiemAgent
|
||||
julia> response = "{This is a sample LLM response.}"
|
||||
julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3")
|
||||
"This is a sample LLM response."
|
||||
"""
|
||||
function deFormatLLMtext_granite3(text::String)::Union{Nothing, String}
|
||||
# check if '{' and '}' are in the text because it's a special format for the LLM response
|
||||
if contains(text, '{') && contains(text, '}')
|
||||
# get the text between '{' and '}'
|
||||
text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1]
|
||||
return text_between_braces
|
||||
elseif text[end] == '}'
|
||||
text = "{$text"
|
||||
text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1]
|
||||
else
|
||||
return nothing
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
""" Attemp to correct LLM response's incorrect JSON response.
|
||||
|
||||
|
||||
@@ -1042,9 +1042,11 @@ Extracts and returns the text that is enclosed between two specified characters
|
||||
|
||||
# Examples
|
||||
```jldoctest
|
||||
julia> text = "Hello [World]!"
|
||||
julia> text = "Hello [World]! [Yay]"
|
||||
julia> extracted_text = extractTextBetweenCharacter(text, '[', ']')
|
||||
println(extracted_text) # Output: "World"
|
||||
2-element Vector{Any}:
|
||||
"World"
|
||||
"Yay"
|
||||
```
|
||||
"""
|
||||
function extractTextBetweenCharacter(text::String, startchar::Char, endchar::Char)
|
||||
|
||||
Reference in New Issue
Block a user