update

2025-03-21 10:03:08 +07:00
parent 8fc3afe348
commit bfadd53033
5 changed files with 614 additions and 650 deletions
--- a/src/llmfunction.jl
+++ b/src/llmfunction.jl
@@ -326,7 +326,7 @@ julia>

 # TODO
  - [] update docstring
-  - [WORKING] implement the function
+  - implement the function

 # Signature
 """
@@ -382,7 +382,6 @@ function extractWineAttributes_1(a::T1, input::T2)::String where {T1<:agent, T2<
  Let's begin!
  """

-  attributes = 
  header = ["Comprehension:", "Wine_name:", "Winery:", "Vintage:", "Region:", "Country:", "Wine_type:", "Grape_varietal:", "Tasting_notes:", "Wine_price:", "Occasion:", "Food_to_be_paired_with_wine:"]
  dictkey = ["comprehension", "wine_name", "winery", "vintage", "region", "country", "wine_type", "grape_varietal", "tasting_notes", "wine_price", "occasion", "food_to_be_paired_with_wine"]
  errornote = ""
@@ -407,7 +406,7 @@ function extractWineAttributes_1(a::T1, input::T2)::String where {T1<:agent, T2<

    # check wheter all attributes are in the response
    checkFlag = false
-    for word in attributes
+    for word in header
      if !occursin(word, response)
        errornote = "$word attribute is missing in previous attempts"
        println("Attempt $attempt $errornote ", Dates.now(), " ", @__FILE__, " ", @__LINE__)
@@ -416,12 +415,20 @@ function extractWineAttributes_1(a::T1, input::T2)::String where {T1<:agent, T2<
      end
    end
    checkFlag == true ? continue : nothing
+    
+    # check whether response has all header
+    detected_kw = GeneralUtils.detect_keyword(header, response)
+    if sum(values(detected_kw)) < length(header)
+      errornote = "\nYiemAgent extractWineAttributes_1() response does not have all header"
+      continue
+    elseif sum(values(detected_kw)) > length(header)
+      errornote = "\nYiemAgent extractWineAttributes_1() response has duplicated header"
+      continue
+    end
+    responsedict = GeneralUtils.textToDict(response, header; 
+                                            dictKey=dictkey, symbolkey=true)

-    responsedict = copy(JSON3.read(response))
-
-    # convert 
-
-    delete!(responsedict, :reasoning)
+    delete!(responsedict, :comprehension)
    delete!(responsedict, :tasting_notes)
    delete!(responsedict, :occasion)
    delete!(responsedict, :food_to_be_paired_with_wine)
@@ -431,9 +438,9 @@ function extractWineAttributes_1(a::T1, input::T2)::String where {T1<:agent, T2<

    # check if winery, wine_name, region, country, wine_type, grape_varietal's value are in the query because sometime AI halucinates
    checkFlag = false
-    for i in attributes
+    for i in dictkey
      j = Symbol(i)
-      if j ∉ [:reasoning, :tasting_notes, :occasion, :food_to_be_paired_with_wine]
+      if j ∉ [:comprehension, :tasting_notes, :occasion, :food_to_be_paired_with_wine]
        # in case j is wine_price it needs to be checked differently because its value is ranged
        if j == :wine_price
          if responsedict[:wine_price] != "NA" 
@@ -516,7 +523,7 @@ function extractWineAttributes_2(a::T1, input::T2)::String where {T1<:agent, T2<

  conversiontable =
  """
-  Conversion Table:
+  <Conversion Table>
  Intensity level:
    1 to 2: May correspond to "light-bodied" or a similar description.
    2 to 3: May correspond to "med light bodied", "medium light" or a similar description.
@@ -541,6 +548,7 @@ function extractWineAttributes_2(a::T1, input::T2)::String where {T1<:agent, T2<
    3 to 4: May correspond to "medium acidity" or a similar description.
    4 to 5: May correspond to "semi high acidity" or a similar description.
    4 to 5: May correspond to "high acidity" or a similar description.
+  </Conversion Table>
  """

  systemmsg =
@@ -554,67 +562,64 @@ function extractWineAttributes_2(a::T1, input::T2)::String where {T1<:agent, T2<
  The preference form requires the following information:
  sweetness, acidity, tannin, intensity

-  You must follow the following guidelines:
+  <You must follow the following guidelines>
    1) If specific information required in the preference form is not available in the query or there isn't any, mark with 'NA' to indicate this.
      Additionally, words like 'any' or 'unlimited' mean no information is available.
    2) Use the conversion table to convert the descriptive word level of sweetness, intensity, tannin, and acidity into a corresponding integer.
    3) Do not generate other comments.
+  </You must follow the following guidelines>

-  You should then respond to the user with the following points:
-  - sweetness_keyword: The exact keywords in the user's query describing the sweetness level of the wine.
-  - sweetness: ( S ), where ( S ) represents integers indicating the range of sweetness levels. Example: 1-2
-  - acidity_keyword: The exact keywords in the user's query describing the acidity level of the wine.
-  - acidity: ( A ), where ( A ) represents integers indicating the range of acidity level. Example: 3-5
-  - tannin_keyword: The exact keywords in the user's query describing the tannin level of the wine.
-  - tannin: ( T ), where ( T ) represents integers indicating the range of tannin level. Example: 1-3
-  - intensity_keyword: The exact keywords in the user's query describing the intensity level of the wine.
-  - intensity: ( I ), where ( I ) represents integers indicating the range of intensity level. Example: 2-4
+  <You should then respond to the user with>
+    Sweetness_keyword: The exact keywords in the user's query describing the sweetness level of the wine.
+    Sweetness: ( S ), where ( S ) represents integers indicating the range of sweetness levels. Example: 1-2
+    Acidity_keyword: The exact keywords in the user's query describing the acidity level of the wine.
+    Acidity: ( A ), where ( A ) represents integers indicating the range of acidity level. Example: 3-5
+    Tannin_keyword: The exact keywords in the user's query describing the tannin level of the wine.
+    Tannin: ( T ), where ( T ) represents integers indicating the range of tannin level. Example: 1-3
+    Intensity_keyword: The exact keywords in the user's query describing the intensity level of the wine.
+    Intensity: ( I ), where ( I ) represents integers indicating the range of intensity level. Example: 2-4
+  </You should then respond to the user with>

-  You should only respond in the form (JSON) as described below:
-  {
-    "sweetness_keyword": ...,
-    "sweetness": ...,
-    "acidity_keyword": ...,
-    "acidity": ...,
-    "tannin_keyword": ...,
-    "tannin": ...,
-    "intensity_keyword": ...,
-    "intensity": ...
-  }
+  <You should only respond in format as described below>
+    Sweetness_keyword: ...
+    Sweetness: ...
+    Acidity_keyword: ...
+    Acidity: ...
+    Tannin_keyword: ...
+    Tannin: ...
+    Intensity_keyword: ...
+    Intensity: ...
+  </You should only respond in format as described below>

-  Here are some examples:
-  User's query: I want a wine with a medium-bodied, low acidity, medium tannin.
-  {
-    "sweetness_keyword": "NA",
-    "sweetness": "NA",
-    "acidity_keyword": "low acidity",
-    "acidity": "1-2",
-    "tannin_keyword": "medium tannin",
-    "tannin": "3-4",
-    "intensity_keyword": "medium-bodied",
-    "intensity": "3-4"
-  }
-
-
-  User's query: German red wine, under 100, pairs with spicy food
-  {
-    "sweetness_keyword": "NA",
-    "sweetness": "NA",
-    "acidity_keyword": "NA",
-    "acidity": "NA",
-    "tannin_keyword": "NA",
-    "tannin": "NA",
-    "intensity_keyword": "NA",
-    "intensity": "NA"
-  }
+  <Here are some examples>
+    User's query: I want a wine with a medium-bodied, low acidity, medium tannin.
+    Sweetness_keyword: NA
+    Sweetness: NA
+    Acidity_keyword: low acidity
+    Acidity: 1-2
+    Tannin_keyword: medium tannin
+    Tannin: 3-4
+    Intensity_keyword: medium-bodied
+    Intensity: 3-4

+    User's query: German red wine, under 100, pairs with spicy food
+    Sweetness_keyword: NA
+    Sweetness: NA
+    Acidity_keyword: NA
+    Acidity: NA
+    Tannin_keyword: NA
+    Tannin: NA
+    Intensity_keyword: NA
+    Intensity: NA
+  </Here are some examples>

  Let's begin!
  """
-
+  header = ["Sweetness_keyword:", "Sweetness:", "Acidity_keyword:", "Acidity:", "Tannin_keyword:", "Tannin:", "Intensity_keyword:", "Intensity:"]
+  dictkey = ["sweetness_keyword", "sweetness", "acidity_keyword", "acidity", "tannin_keyword", "tannin", "intensity_keyword", "intensity"]
  errornote = ""

-  for attempt in 1:5
+  for attempt in 1:10
    usermsg =
    """
    $conversiontable
@@ -629,14 +634,22 @@ function extractWineAttributes_2(a::T1, input::T2)::String where {T1<:agent, T2<
    ]

    # put in model format
-    prompt = GeneralUtils.formatLLMtext(_prompt; formatname="llama3instruct")
-    prompt *=
-      """
-      <|start_header_id|>assistant<|end_header_id|>
-      """
+    prompt = GeneralUtils.formatLLMtext(_prompt; formatname="qwen")

    response = a.func[:text2textInstructLLM](prompt)
-    responsedict = copy(JSON3.read(response))
+    
+    # check whether response has all header
+    detected_kw = GeneralUtils.detect_keyword(header, response)
+    if sum(values(detected_kw)) < length(header)
+      errornote = "\nYiemAgent extractWineAttributes_2() response does not have all header"
+      continue
+    elseif sum(values(detected_kw)) > length(header)
+      errornote = "\nYiemAgent extractWineAttributes_2() response has duplicated header"
+      continue
+    end
+
+    responsedict = GeneralUtils.textToDict(response, header; 
+                                            dictKey=dictkey, symbolkey=true)

    # check whether each describing keyword is in the input to prevent halucination
    for i in ["sweetness", "acidity", "tannin", "intensity"]