diff --git a/src/llmUtil.jl b/src/llmUtil.jl index 2d2ac45..82a3708 100644 --- a/src/llmUtil.jl +++ b/src/llmUtil.jl @@ -1,6 +1,6 @@ module llmUtil -export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection +export formatLLMtext, formatLLMtext_llama3instruct, jsoncorrection, deFormatLLMtext using UUIDs, JSON3, Dates using GeneralUtils @@ -126,15 +126,38 @@ function formatLLMtext_phi4(name::T, text::T; end +function formatLLMtext_granite3(name::T, text::T; + assistantStarter::Bool=false) where {T<:AbstractString} + formattedtext = + if name == "system" + """ + <|start_of_role|>system<|end_of_role|>{$text}<|end_of_text|> + """ + else + """ + <|start_of_role|>$name<|end_of_role|>{$text}<|end_of_text|> + """ + end -""" Convert a chat messages in vector of dictionary into LLM model instruct format. + if assistantStarter + formattedtext *= + """ + <|start_of_role|>assistant<|end_of_role|>{ + """ + end + + return formattedtext +end + + + +""" Convert a vector of chat message dictionaries into LLM model instruct format. # Arguments - `messages::Vector{Dict{Symbol, T}}` - message owner name e.f. "system", "user" or "assistant" + A vector of dictionaries where each dictionary contains the keys `:name` (the name of the message owner) and `:text` (the text of the message). - `formatname::T` - format name to be used - + The name of the format to be used for converting the chat messages. # Return - `formattedtext::String` text formatted to model format @@ -151,24 +174,25 @@ julia> chatmessage = [ julia> formattedtext = YiemAgent.formatLLMtext(chatmessage, "llama3instruct") "<|begin_of_text|>\n <|start_header_id|>system<|end_header_id|>\n You are a helpful, respectful and honest assistant.\n <|eot_id|>\n <|start_header_id|>user<|end_header_id|>\n list me all planets in our solar system.\n <|eot_id|>\n <|start_header_id|>assistant<|end_header_id|>\n I'm sorry. I don't know. You tell me.\n <|eot_id|>\n" ``` - -# Signature """ -function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="llama3instruct" +function formatLLMtext(messages::Vector{Dict{Symbol, T}}, formatname::String )::String where {T<:AbstractString} - f = if formatname == "llama3instruct" - formatLLMtext_llama3instruct - elseif formatname == "mistral" - # not define yet - elseif formatname == "phi3instruct" - # not define yet - elseif formatname == "qwen" - formatLLMtext_qwen - elseif formatname == "phi4" - formatLLMtext_phi4 - else - error("$formatname template not define yet") - end + f = + if formatname == "llama3instruct" + formatLLMtext_llama3instruct + elseif formatname == "mistral" + # not define yet + elseif formatname == "phi3instruct" + # not define yet + elseif formatname == "qwen" + formatLLMtext_qwen + elseif formatname == "phi4" + formatLLMtext_phi4 + elseif formatname == "granite3" + formatLLMtext_granite3 + else + error("$formatname template not define yet") + end str = "" for (i, t) in enumerate(messages) @@ -182,6 +206,73 @@ function formatLLMtext(messages::Vector{Dict{Symbol, T}}; formatname::String="ll return str end +""" Revert LLM-format response back into regular text. + +# Arguments + - `text::String` + The LLM formatted string to be converted. + +# Return + - `normalText::String` + The original plain text extracted from the given LLM-formatted string. + +# Example +```jldoctest +julia> using Revise +julia> using YiemAgent +julia> response = "<|begin_of_text|>This is a sample system instruction.<|eot_id|>" +julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3") +"This is a sample system instruction." +``` +""" +function deFormatLLMtext(text::String, formatname::String + )::String + f = + if formatname == "granite3" + deFormatLLMtext_granite3 + else + error("$formatname template not define yet") + end + + r = f(text) + result = r === nothing ? text : r + return result +end + + +""" Revert LLM-format response back into regular text for Granite 3 format. + +# Arguments + - `text::String` + The LLM formatted string to be converted. + +# Return + - `normalText::Union{Nothing, String}` + The original plain text extracted from the given LLM-formatted string. + Returns nothing if the text is not in Granite 3 format. + +# Example +```jldoctest +julia> using Revise +julia> using YiemAgent +julia> response = "{This is a sample LLM response.}" +julia> normalText = YiemAgent.deFormatLLMtext(response, "granite3") +"This is a sample LLM response." +""" +function deFormatLLMtext_granite3(text::String)::Union{Nothing, String} + # check if '{' and '}' are in the text because it's a special format for the LLM response + if contains(text, '{') && contains(text, '}') + # get the text between '{' and '}' + text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1] + return text_between_braces + elseif text[end] == '}' + text = "{$text" + text_between_braces = GeneralUtils.extractTextBetweenCharacter(text, '{', '}')[1] + else + return nothing + end +end + """ Attemp to correct LLM response's incorrect JSON response. diff --git a/src/util.jl b/src/util.jl index a70e985..a8ddac5 100644 --- a/src/util.jl +++ b/src/util.jl @@ -1042,9 +1042,11 @@ Extracts and returns the text that is enclosed between two specified characters # Examples ```jldoctest -julia> text = "Hello [World]!" +julia> text = "Hello [World]! [Yay]" julia> extracted_text = extractTextBetweenCharacter(text, '[', ']') -println(extracted_text) # Output: "World" +2-element Vector{Any}: + "World" + "Yay" ``` """ function extractTextBetweenCharacter(text::String, startchar::Char, endchar::Char)