update

2025-01-04 16:10:23 +07:00
parent 82167fe006
commit cff0d31ae6
4 changed files with 349 additions and 443 deletions
--- a/src/llmfunction.jl
+++ b/src/llmfunction.jl
@@ -1,7 +1,8 @@
 module llmfunction

 export  virtualWineUserChatbox, jsoncorrection, checkinventory, # recommendbox,
-        virtualWineUserRecommendbox, userChatbox, userRecommendbox, extractWineAttributes_1
+        virtualWineUserRecommendbox, userChatbox, userRecommendbox, extractWineAttributes_1,
+        extractWineAttributes_2

 using HTTP, JSON3, URIs, Random, PrettyPrinting, UUIDs, Dates
 using GeneralUtils, SQLLLM
@@ -550,9 +551,8 @@ function extractWineAttributes_1(a::T1, input::T2)::String where {T1<:agent, T2<

  attributes = ["reasoning", "winery", "wine_name", "vintage", "region", "country", "wine_type", "grape_variety", "tasting_notes", "wine_price", "occasion", "food_to_be_paired_with_wine"]
  errornote = ""
-  maxattempt = 5
-  for attempt in 1:maxattempt
-
+  
+  for attempt in 1:5
    usermsg =
    """
    User's query: $input
@@ -572,70 +572,63 @@ function extractWineAttributes_1(a::T1, input::T2)::String where {T1<:agent, T2<
      <|start_header_id|>assistant<|end_header_id|>
      """

-    try
-      response = a.func[:text2textInstructLLM](prompt)
-      response = GeneralUtils.remove_french_accents(response)
+    response = a.func[:text2textInstructLLM](prompt)
+    response = GeneralUtils.remove_french_accents(response)

-      # check wheter all attributes are in the response
-      for word in attributes
-        if !occursin(word, response)
-          error("$word attribute is missing")
-        end
+    # check wheter all attributes are in the response
+    for word in attributes
+      if !occursin(word, response)
+        errornote = "$word attribute is missing in previous attempts"
+        println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
+        continue
      end
-
-      responsedict = copy(JSON3.read(response))
-
-      delete!(responsedict, :reasoning)
-      delete!(responsedict, :tasting_notes)
-      delete!(responsedict, :occasion)
-      delete!(responsedict, :food_to_be_paired_with_wine)
-
-      # check if winery, wine_name, region, country, wine_type, grape_variety are in the query because sometime AI halucinates
-      for i in [:grape_variety, :winery, :wine_name, :region]
-        content = responsedict[i]
-        if occursin(",", content)
-          content = split(content, ",") # sometime AI generates multiple values e.g. "Chenin Blanc, Riesling"
-          content = strip.(content)
-        else
-          content = [content]
-        end
-
-        for x in content
-          if !occursin("NA", responsedict[i]) && !occursin(x, input)
-            errornote = "$x is not mentioned in the user query, you must only use the info from the query."
-            error(errornote)
-          end
-        end
-      end
-
-      # remove (some text)
-      for (k, v) in responsedict
-        _v = replace(v, r"\(.*?\)" => "")
-        responsedict[k] = _v
-      end
-
-      result = ""
-      for (k, v) in responsedict
-        # some time LLM generate text with "(some comment)". this line removes it
-        if !occursin("NA", v) && v != "" && !occursin("none", v) && !occursin("None", v)
-          result *= "$k: $v, "
-        end
-      end
-
-      #[PENDING] remove halucination. "highend dry white wine" --> "wine_type: white, occasion: special occasion, food_to_be_paired_with_wine: seafood, fish, country: France, Italy, USA, grape_variety: Chardonnay, Sauvignon Blanc, Pinot Grigio\nwine_notes: citrus, green apple, floral"
-      
-      result = result[1:end-2]  # remove the ending ", "
-
-      return result
-    catch e
-      io = IOBuffer()
-      showerror(io, e)
-      errorMsg = String(take!(io))
-      st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
-      println("")
-      println("Attempt $attempt. Error occurred: $errorMsg\n$st ", @__FILE__, " ", @__LINE__)
-      println("")
    end
+
+    responsedict = copy(JSON3.read(response))
+
+    delete!(responsedict, :reasoning)
+    delete!(responsedict, :tasting_notes)
+    delete!(responsedict, :occasion)
+    delete!(responsedict, :food_to_be_paired_with_wine)
+
+    # check if winery, wine_name, region, country, wine_type, grape_variety are in the query because sometime AI halucinates
+    for i in [:grape_variety, :winery, :wine_name, :region]
+      content = responsedict[i]
+      if occursin(",", content)
+        content = split(content, ",") # sometime AI generates multiple values e.g. "Chenin Blanc, Riesling"
+        content = strip.(content)
+      else
+        content = [content]
+      end
+
+      for x in content
+        if !occursin("NA", responsedict[i]) && !occursin(x, input)
+          errornote = "$x is not mentioned in the user query, you must only use the info from the query."
+          println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
+          continue
+        end
+      end
+    end
+
+    # remove (some text)
+    for (k, v) in responsedict
+      _v = replace(v, r"\(.*?\)" => "")
+      responsedict[k] = _v
+    end
+
+    result = ""
+    for (k, v) in responsedict
+      # some time LLM generate text with "(some comment)". this line removes it
+      if !occursin("NA", v) && v != "" && !occursin("none", v) && !occursin("None", v)
+        result *= "$k: $v, "
+      end
+    end
+
+    #[PENDING] remove halucination. "highend dry white wine" --> "wine_type: white, occasion: special occasion, food_to_be_paired_with_wine: seafood, fish, country: France, Italy, USA, grape_variety: Chardonnay, Sauvignon Blanc, Pinot Grigio\nwine_notes: citrus, green apple, floral"
+    
+    result = result[1:end-2]  # remove the ending ", "
+
+    return result
  end
  error("wineattributes_wordToNumber() failed to get a response")
 end
@@ -643,6 +636,7 @@ end
 """
 # TODO
 - [PENDING] "French dry white wines with medium bod" the LLM does not recognize sweetness. use LLM self questioning to solve.
+ - [PENDING] French Syrah, Viognier, under 100. LLM extract intensiry of 3-5. why?
 """
 function extractWineAttributes_2(a::T1, input::T2)::String where {T1<:agent, T2<:AbstractString}

@@ -675,8 +669,6 @@ function extractWineAttributes_2(a::T1, input::T2)::String where {T1<:agent, T2<
    4 to 5: May correspond to "high acidity" or a similar description.
  """

-  # chathistory = vectorOfDictToText(a.chathistory)
-
  systemmsg =
  """
  As an helpful sommelier, your task is to fill out the user's preference form based on the corresponding words from the user's query.
@@ -695,254 +687,135 @@ function extractWineAttributes_2(a::T1, input::T2)::String where {T1<:agent, T2<
    3) Do not generate other comments.

  You should then respond to the user with the following points:
-  - reasoning: State your understanding of the current situation
+  - sweetness_keyword: The exact keywords in the user's query describing the sweetness level of the wine.
  - sweetness: ( S ), where ( S ) represents integers indicating the range of sweetness levels. Example: 1-2
+  - acidity_keyword: The exact keywords in the user's query describing the acidity level of the wine.
  - acidity: ( A ), where ( A ) represents integers indicating the range of acidity level. Example: 3-5
+  - tannin_keyword: The exact keywords in the user's query describing the tannin level of the wine.
  - tannin: ( T ), where ( T ) represents integers indicating the range of tannin level. Example: 1-3
+  - intensity_keyword: The exact keywords in the user's query describing the intensity level of the wine.
  - intensity: ( I ), where ( I ) represents integers indicating the range of intensity level. Example: 2-4
-  - notes: Anything you want to add

-  You should only respond in the form as described below:
-  reasoning: ...
-  sweetness: ...
-  acidity: ...
-  tannin: ...
-  intensity: ...
-  notes: ...
+  You should only respond in the form (JSON) as described below:
+  {
+    "sweetness_keyword": ...,
+    "sweetness": ...,
+    "acidity_keyword": ...,
+    "acidity": ...,
+    "tannin_keyword": ...,
+    "tannin": ...,
+    "intensity_keyword": ...,
+    "intensity": ...
+  }
+
+  Here are some examples:
+  User's query: I want a wine with a medium-bodied, low acidity, medium tannin.
+  {
+    "sweetness_keyword": "NA",
+    "sweetness": "NA",
+    "acidity_keyword": "low acidity",
+    "acidity": "1-2",
+    "tannin_keyword": "medium tannin",
+    "tannin": "3-4",
+    "intensity_keyword": "medium-bodied",
+    "intensity": "3-4"
+  }
+
+
+  User's query: German red wine, under 100, pairs with spicy food
+  {
+    "sweetness_keyword": "NA",
+    "sweetness": "NA",
+    "acidity_keyword": "NA",
+    "acidity": "NA",
+    "tannin_keyword": "NA",
+    "tannin": "NA",
+    "intensity_keyword": "NA",
+    "intensity": "NA"
+  }
+

  Let's begin!
  """

-  # chathistory = vectorOfDictToText(a.chathistory)
-
-  usermsg =
-  """
-  $conversiontable
-  User's query: $input
-  """
-
-  _prompt = 
-  [
-    Dict(:name=> "system", :text=> systemmsg),
-    Dict(:name=> "user", :text=> usermsg)
-  ]
-
-  # put in model format
-  prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
-  prompt *=
-    """
-    <|start_header_id|>assistant<|end_header_id|>
-    """
-
-  attributes = ["reasoning", "sweetness", "acidity", "tannin", "intensity", "notes"]
+  errornote = ""

  for attempt in 1:5
-    try
-      response = a.func[:text2textInstructLLM](prompt)
-      responsedict = GeneralUtils.textToDict(response, attributes, rightmarker=":", symbolkey=true)
+    usermsg =
+    """
+    $conversiontable
+    User's query: $input
+    $errornote
+    """

-      for i ∈ attributes
-        if length(JSON3.write(responsedict[Symbol(i)])) == 0
-          error("$i is empty ", @__LINE__)
-        end
+    _prompt = 
+    [
+      Dict(:name=> "system", :text=> systemmsg),
+      Dict(:name=> "user", :text=> usermsg)
+    ]
+
+    # put in model format
+    prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
+    prompt *=
+      """
+      <|start_header_id|>assistant<|end_header_id|>
+      """
+
+    response = a.func[:text2textInstructLLM](prompt)
+    responsedict = copy(JSON3.read(response))
+
+    # check whether each describing keyword is in the input to prevent halucination
+    for i in ["sweetness", "acidity", "tannin", "intensity"]
+      keyword = Symbol(i * "_keyword")  # e.g. sweetness_keyword
+      value = responsedict[keyword]
+      if value != "NA" && !occursin(value, input)
+        errornote = "WARNING. Keyword $keyword: $value does not appear in the input. You must use information from the input only"
+        println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
+        continue
      end

-      delete!(responsedict, :reasoning)
-      delete!(responsedict, :notes) # LLM traps. so it can add useless info here like comments.
-
-      # some time LLM think the user mentioning acidity and tannin but actually didn't
-      for (k, v) in responsedict
-        if k ∈ [:acidity, :tannin] && !occursin(string(k), input)
-          responsedict[k] = "NA"
-        end
+      # if value == "NA" then responsedict[i] = "NA" 
+      # e.g. if sweetness_keyword == "NA" then sweetness = "NA"
+      if value == "NA"
+        responsedict[Symbol(i)] = "NA"
      end
-
-      # remove (some text)
-      for (k, v) in responsedict
-        _v = replace(v, r"\(.*?\)" => "")
-        responsedict[k] = _v
-      end
-
-      # some time LLM not put integer range
-      for (k, v) in responsedict
-        responsedict[k] = v
-        if length(v) > 5
-          error("non-range is not allowed. $k $v")
-        end
-      end
-
-      # some time LLM says NA-2. Need to convert NA to 1
-      for (k, v) in responsedict
-        if occursin("NA", v) && occursin("-", v)
-          new_v = replace(v, "NA"=>"1")
-          responsedict[k] = new_v
-        end
-      end
-
-      result = ""
-      for (k, v) in responsedict
-        # some time LLM generate text with "(some comment)". this line removes it
-        if !occursin("NA", v)
-          result *= "$k: $v, "
-        end
-      end
-
-      result = result[1:end-2]  # remove the ending ", "
-
-      return result
-    catch e
-      io = IOBuffer()
-      showerror(io, e)
-      errorMsg = String(take!(io))
-      st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
-      println("")
-      println("Attempt $attempt. Error occurred: $errorMsg\n$st")
-      println("")
    end
+
+    # some time LLM not put integer range
+    for (k, v) in responsedict
+      if !occursin("keyword", string(k)) 
+        if !occursin('-', v) || length(v) > 5
+          errornote = "WARNING: The non-range value for $k is not allowed. It should be specified in a range format, such as min-max."
+          println("Attempt $attempt $errornote ", @__FILE__, " ", @__LINE__)
+          continue
+        end
+      end
+    end
+
+    # some time LLM says NA-2. Need to convert NA to 1
+    for (k, v) in responsedict
+      if occursin("NA", v) && occursin("-", v)
+        new_v = replace(v, "NA"=>"1")
+        responsedict[k] = new_v
+      end
+    end
+
+    result = ""
+    for (k, v) in responsedict
+      # some time LLM generate text with "(some comment)". this line removes it
+      if !occursin("NA", v)
+        result *= "$k: $v, "
+      end
+    end
+
+    result = result[1:end-2]  # remove the ending ", "
+
+    return result
  end
  error("wineattributes_wordToNumber() failed to get a response")
 end


-# function recommendbox(a::T1, input::T2)::String where {T1<:agent, T2<:AbstractString}
-#   error("recommendbox")
-#   systemmsg =
-#   """
-#   As an helpful sommelier, your task is to fill out the user's preference form based on the corresponding words from the user's query.
-
-#   At each round of conversation, the user will give you the current situation:
-#   User's query: ...
-
-#   The preference form requires the following information:
-#   wine_type, price, occasion, food_to_be_paired_with_wine, country, grape_variety, flavors, aromas.
-
-#   You must follow the following guidelines:
-#     1) If specific information required in the preference form is not available in the query or there isn't any, mark with 'NA' to indicate this.
-#       Additionally, words like 'any' or 'unlimited' mean no information is available.
-#     2) Use the conversion table to convert the descriptive word level of sweetness, intensity, tannin, and acidity into a corresponding integer.
-#     3) Do not generate other comments.
-
-#   You should then respond to the user with the following points:
-#   - reasoning: State your understanding of the current situation
-#   - wine_type: Can be one of: "red", "white", "sparkling", "rose", "dessert" or "fortified"
-#   - price: Must be an integer representing the cost of the wine.
-#   - occasion: ...
-#   - food_to_be_paired_with_wine: food that the user will be served with wine
-#   - country: wine's country of origin
-#   - region: wine's region of origin such as Burgundy, Napa Valley
-#   - grape variety: a single name of grape used to make wine.
-#   - flavors: Names of items that the wine tastes like.
-#   - aromas: wine's aroma
-
-#   You should only respond in the form as described below:
-#   reasoning: ...
-#   wine_type: ...
-#   price: ...
-#   occasion: ...
-#   food_to_be_paired_with_wine: ...
-#   country: ...
-#   region: ...
-#   grape_variety: ...
-#   flavors: ...
-#   aromas: ...
-
-#   Let's begin!
-#   """
-
-#   attributes = ["reasoning", "wine_type", "price", "occasion", "food_to_be_paired_with_wine", "country", "region", "grape_variety", "flavors", "aromas"]
-#   errornote = ""
-#   for attempt in 1:5
-
-#     usermsg =
-#     """
-#     User's query: $input
-#     $errornote
-#     """
-
-#     _prompt = 
-#     [
-#       Dict(:name=> "system", :text=> systemmsg),
-#       Dict(:name=> "user", :text=> usermsg)
-#     ]
-
-#     # put in model format
-#     prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
-#     prompt *=
-#       """
-#       <|start_header_id|>assistant<|end_header_id|>
-#       """
-
-#     try
-#       response = a.func[:text2textInstructLLM](prompt)
-#       responsedict = GeneralUtils.textToDict(response, attributes, rightmarker=":", symbolkey=true)
-
-#       for i ∈ attributes
-#         if length(JSON3.write(responsedict[Symbol(i)])) == 0
-#           error("$i is empty ", @__LINE__)
-#         end
-#       end
-
-#       #[PENDING] check if the following attributes has more than 1 name
-#       x = length(split(responsedict[:grape_variety], ",")) * length(split(responsedict[:grape_variety], "/"))
-#       if x > 1
-#         errornote = "only a single name in grape_variety is allowed"
-#         error("only a single grape_variety name is allowed")
-#       end
-#       x = length(split(responsedict[:country], ",")) * length(split(responsedict[:country], "/"))
-#       if x > 1
-#         errornote = "only a single name in country is allowed"
-#         error("only a single country name is allowed")
-#       end
-#       x = length(split(responsedict[:region], ",")) * length(split(responsedict[:region], "/"))
-#       if x > 1
-#         errornote = "only a single name in region is allowed"
-#         error("only a single region name is allowed")
-#       end
-
-#       # check if grape_variety is mentioned in the input
-#       if responsedict[:grape_variety] != "NA" && !occursin(responsedict[:grape_variety], input)
-#         error("$(responsedict[:grape_variety]) is not mentioned in the input")
-#       end
-
-#       responsedict[:flavors] = replace(responsedict[:flavors], "notes"=>"")
-#       delete!(responsedict, :reasoning)
-#       delete!(responsedict, :tasting_notes)
-#       delete!(responsedict, :flavors)
-#       delete!(responsedict, :aromas)
-
-#       # remove (some text)
-#       for (k, v) in responsedict
-#         _v = replace(v, r"\(.*?\)" => "")
-#         responsedict[k] = _v
-#       end
-
-#       result = ""
-#       for (k, v) in responsedict
-#         # some time LLM generate text with "(some comment)". this line removes it
-#         if !occursin("NA", v) && v != "" && !occursin("none", v) && !occursin("None", v)
-#           result *= "$k: $v, "
-#         end
-#       end
-
-#       #[PENDING] remove halucination. "highend dry white wine" --> "wine_type: white, occasion: special occasion, food_to_be_paired_with_wine: seafood, fish, country: France, Italy, USA, grape_variety: Chardonnay, Sauvignon Blanc, Pinot Grigio\nwine_notes: citrus, green apple, floral"
-      
-#       result = result[1:end-2]  # remove the ending ", "
-
-#       return result
-#     catch e
-#       io = IOBuffer()
-#       showerror(io, e)
-#       errorMsg = String(take!(io))
-#       st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
-#       println("")
-#       println("Attempt $attempt. Error occurred: $errorMsg\n$st")
-#       println("")
-#     end
-#   end
-#   error("wineattributes_wordToNumber() failed to get a response")
-# end
-
-
 """ Attemp to correct LLM response's incorrect JSON response.

 # Arguments