update

2023-12-19 16:30:32 +00:00
parent d90ff8d3fc
commit 59e3b3027f
3 changed files with 222 additions and 172 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -120,19 +120,21 @@ function chat_mistral_openorca(a::agentReflex)
        "
  """

+  conversation = messagesToString(a.messages)
+
  prompt = 
  """
-    <|im_start|>system
+    <|system|>
    $(a.roles[a.role])
    Your earlier talk with the user:
    $(a.earlierConversation)
-    <|im_end|>
-    $(messagesToString(a.messages))
+    <|/s|>
+    $conversation
    <|im_start|>assistant
-
  """
  
  response = sendReceivePrompt(a, prompt)
+  response = split(response, "\n\n")[1]
  response = split(response, "<|im_end|>")[1]

  return response
@@ -181,7 +183,7 @@ function planner_mistral_openorca(a::agentReflex)
  
  assistant_plan_prompt =
    """
-    <|im_start|>system
+    <|system|>
    $(a.roles[a.role])
    The required info you need for wine recommendation:
        - type of food: ask the user
@@ -190,41 +192,115 @@ function planner_mistral_openorca(a::agentReflex)
        - wine price range: ask the user
        - ambient temperature at the serving location: ask the user
        - wines we have in stock
-    You provide a personalized recommendation of wine based on the user's info above by describing the benefits of each wine in detail.
    
    You have access to the following tools:
    $toollines

-    Your work:
+    Your earlier work:
    $shorttermMemory

-    Use the following format:
-    Objective: the objective you intend to do
-    Aware: according to your work and your conversation with the user, ask yourself what info you have and what info you doesn't have?
-    Plan: first you should always think about the objective, the info you have, the info you doesn't have thoroughly then extract and devise a complete, step by step plan (pay attention to correct numeral calculation and commonsense).
+    Your task is to do the following:
+    Plan: first you should always think about your conversation with the user and your earlier work thoroughly then extract and devise a complete, step by step plan to achieve your objective (pay attention to correct numeral calculation and commonsense).
    P.S.1 each step of the plan should be a single action.
    P.S.2 ask the user if you don't have info.
-
-    <|im_end|>
+    <|/s|>
    $conversation
-    <|im_start|>assistant
-    Objective: 
+    <|assistant|>
+    Plan: 
    """
-  #WORKING remove () in steps as LLM sometimes use for (addtional info) but interfere with updatePlan
-  

-  result = sendReceivePrompt(a, assistant_plan_prompt, max_tokens=512, temperature=0.1)
+  plan = sendReceivePrompt(a, assistant_plan_prompt, max_tokens=512, temperature=0.1)
+  plan = split(plan, "<|user|>")[1]
+  plan = split(plan, "<|assistant|>")[1]
+  plan = split(plan, "\n\n")[1]

-  x = split(result, "<|im_end|>")[1]
-  @show x
-  x = split(x, "Step")[1]
-  x = split(x, "Plan:")
-  objective = x[1]
-  plan = x[2]
-
-  return objective, plan
+  return plan
 end

+# function planner_mistral_openorca(a::agentReflex)
+#   """
+#     general prompt format:
+
+#     "
+#     <|im_start|>system
+#     {role}
+#     {tools}
+#     {thinkingFormat}
+#     <|im_end|>
+#     {context}
+#     <|im_start|>user
+#     {usermsg}
+#     <|im_end|>
+#     <|im_start|>assistant
+
+#     "
+
+#     Note:
+#       {context} = 
+#         "
+#         {earlierConversation}
+#         {env state}
+#         {shortterm memory}
+#         {longterm memory}
+#         "
+#   """
+
+#   conversation = messagesToString(a.messages)
+#   toollines = ""
+#   for (toolname, v) in a.tools
+#     if toolname ∉ [""]
+#       toolline = "$toolname: $(v[:description]) $(v[:input]) $(v[:output])\n"
+#       toollines *= toolline
+#     end
+#   end
+
+#   # skip objective and plan because LLM is going to generate new plan
+#   shorttermMemory = dictToString(a.memory[:shortterm], skiplist=["Objective:", "Plan 0:"])
+  
+#   assistant_plan_prompt =
+#     """
+#     <|system|>
+#     $(a.roles[a.role])
+#     The required info you need for wine recommendation:
+#         - type of food: ask the user
+#         - occasion: ask the user
+#         - user's personal taste of wine: ask the user
+#         - wine price range: ask the user
+#         - ambient temperature at the serving location: ask the user
+#         - wines we have in stock
+#     You provide a personalized recommendation of wine based on the user's info above by describing the benefits of each wine in detail.
+    
+#     You have access to the following tools:
+#     $toollines
+
+#     Your earlier work:
+#     $shorttermMemory
+
+#     Use the following format:
+#     Objective: what do you think the user needs?
+#     Plan: first you should always think about your conversation with the user and your earlier work thoroughly then extract and devise a complete, step by step plan (pay attention to correct numeral calculation and commonsense).
+#     P.S.1 each step of the plan should be a single action.
+#     P.S.2 ask the user if you don't have info.
+#     <|/s|>
+#     $conversation
+#     <|assistant|>
+#     Objective: 
+#     """
+
+
+#   result = sendReceivePrompt(a, assistant_plan_prompt, max_tokens=512, temperature=0.1)
+
+#   x = split(result, "<|user|>")[1]
+#   x = split(result, "<|assistant|>")[1]
+
+#   x = split(x, "Step")[1]
+#   x = split(x, "Plan:")
+#   objective = x[1]
+#   plan = x[2]
+
+#   return objective, plan
+# end
+
 """ Update the current plan.
 """
 function updatePlan(a::agentReflex)
@@ -242,7 +318,7 @@ function updatePlan(a::agentReflex)

  prompt =
  """
-    <|im_start|>system
+    <|system|>
    $(a.roles[a.role])
    The required info you need for wine recommendation:
        - wine price range: ask the user
@@ -266,13 +342,14 @@ function updatePlan(a::agentReflex)
    Plan: 1. Ask the user for their food type.
    Obs: It will be Thai dishes.
    Updated plan: 1. Ask the user for their food type (Thai dishes).
-    <|im_end|>
+    </s|>
    Updated plan: 
  """

  result = sendReceivePrompt(a, prompt, max_tokens=512, temperature=0.1)
  @show updatedPlan = result
  a.memory[:shortterm]["Plan 0:"] = result
+  
 end

 function actor_mistral_openorca(a::agentReflex)
@@ -323,13 +400,15 @@ function actor_mistral_openorca(a::agentReflex)

  prompt = 
  """
-    <|im_start|>system
+    <|system|>
    $(a.roles[a.role])
    You have access to the following tools:
    $toollines
-    $(a.thinkingFormat[:actor])
-    <|im_end|>
+    Your earlier work:
    $shorttermMemory
+    $(a.thinkingFormat[:actor])
+    <|/s|>
+    <|assistant|>
    Thought $(a.step): 
  """
  prompt = replace(prompt, "{toolnames}" => toolnames)
@@ -342,7 +421,7 @@ function actor_mistral_openorca(a::agentReflex)
  chunkedtext = nothing

  tempcounter = 0.0
-  while true  # while Thought is empty, run actor again
+  while true  # while Thought or Act is empty, run actor again
    tempcounter += 0.1
    @show tempcounter
    response = sendReceivePrompt(a, prompt, temperature=tempcounter)
@@ -375,8 +454,10 @@ function actor_mistral_openorca(a::agentReflex)
    chunkedtext = chunktext(response, headers) 

    # assuming length more than 10 character means LLM has valid thinking
-    if length(chunkedtext["Thought $(a.step):"]) > 10 
-      break
+    if haskey(chunkedtext, "Thought $(a.step):") && haskey(chunkedtext, "Act $(a.step):")
+      if length(chunkedtext["Thought $(a.step):"]) > 10 && length(chunkedtext["Act $(a.step):"]) > 10
+        break
+      end
    end
  end
  
@@ -483,12 +564,14 @@ end
 function work(a::agentReflex)
  workstate = nothing
  response = nothing
-
-  #BUG there is no Obs 2:
+  
  # user answering LLM -> Obs
-  if haskey(a.memory[:shortterm], "Act $(a.step):")
-    if occursin("chatbox", a.memory[:shortterm]["Act $(a.step):"])
-      a.memory[:shortterm]["Obs $(a.step):"] = a.messages[end][:content]
+  if length(a.memory[:shortterm]) != 0
+    latest_step = dictLatestStep(a.memory[:shortterm])
+    if haskey(a.memory[:shortterm], "Act $latest_step:")
+      if occursin("chatbox", a.memory[:shortterm]["Act $latest_step:"])
+        a.memory[:shortterm]["Obs $latest_step:"] = a.messages[end][:content]
+      end
    end
  end

@@ -499,41 +582,15 @@ function work(a::agentReflex)
    if a.attempt <= a.attemptlimit
      toolname = nothing
      toolinput = nothing
-
-      # if length(a.memory[:shortterm]) != 0
-      #   updatePlan(a)
-      #   @show updatedPlan = a.memory[:shortterm]["Plan 0:"]
-      # else
-      #   objective, plan = planner_mistral_openorca(a)
-      #   a.memory[:shortterm]["Objective:"] = objective
-      #   a.memory[:shortterm]["Plan $(a.attempt):"] = plan
-      #   a.memory[:log]["Plan $(a.attempt):"] = plan
-
-      #   println("")
-      #   @show objective
-      #   @show plan
-      # end
      
-      objective, plan = planner_mistral_openorca(a)
-      a.memory[:shortterm]["Objective:"] = objective
+      plan = planner_mistral_openorca(a)
      a.memory[:shortterm]["Plan $(a.attempt):"] = plan
      a.memory[:log]["Plan $(a.attempt):"] = plan
      a.step = 0  # reset because new plan is created

      println("")
-      @show objective
      @show plan
      
-
-      # sometimes LLM add not-need word I don't want
-      # plan = splittext(response, ["Step 1", "<|im_end|>", "Response", "Execution", 
-      #                           "Result", "Recommendation", "My response"])
-      # plan = replace(plan, "Plan:"=>"")
-      # println("")
-      # @show plan
-      
-      
-      
      println("")
      @show a.attempt

@@ -636,24 +693,25 @@ function actor(a::agentReflex)
  while true  # Actor loop

    # decide whether to repeat step or do the next step
-    decision, reason = goNogo(a)
-    println("")
-    @show decision
-    @show reason
-    # a.memory[:shortterm]["Check $(a.step):"] = reason
+    decision = "Yes"  # yes because a.step start at 0
+    if a.step != 0
+      decision, reason = goNogo(a)
+    end
+
    if decision == "Yes"   # in case there is a cancel, go straight to evaluation
      a.step += 1
-    elseif decision == "No"
-      # repeat the latest step
+    elseif decision == "No" # repeat the latest step
      a.memory[:shortterm] = removeHeaders(a.memory[:shortterm], a.step, ["Plan"])
      a.memory[:log] = removeHeaders(a.memory[:log], a.step, ["Plan"])
      println("repeating step $(a.step)")
    else
      error("undefined condition decision = $decision $(@__LINE__)")
    end
-    
-
    @show a.step
+    #WORKING checkStepCompletion
+    iscomplete = checkStepCompletion(a::agentReflex)
+    if iscomplete
+    
    if a.step < totalsteps   # the last step of the plan is responding, let work() do this part

      toolname, toolinput = actor_mistral_openorca(a)
@@ -933,7 +991,7 @@ function formulateUserresponse(a)

  prompt =
  """
-    <|im_start|>system
+    <|system|>
    Symbol:
    Plan: a plan
    Thought: your thought
@@ -948,8 +1006,9 @@ function formulateUserresponse(a)
    $work
    
    From your talk with the user and your work, formulate a response for the user.
-    <|im_end|>
-    response: 
+    <|/s|>
+    <|assistant|>
+    response:
  """
  response = sendReceivePrompt(a, prompt)
  return response
@@ -1080,7 +1139,7 @@ function goNogo(a)

  prompt =
  """
-    <|im_start|>system
+    <|system|>
    Symbol meaning:
    Plan: a plan
    Thought: your thought
@@ -1088,18 +1147,23 @@ function goNogo(a)
    Actinput: the input to the action
    Obs: the result of the action

-    Your work:
+    Your earlier work:
    $work
    
-    Your job is to check whether step $(a.step) of your work is completed according to the plan and choose only one of the following choices.
-    choice 1: If you get what you intend to do and you are ready to do the next step of the plan say, "{Yes}". And what is the rationale behind the decision to do the next step?
-    choice 2: If you didn't get what you intend to do and you need to repeat the latest step say, "{No}". And what is the rationale behind the decision to repeat the latest step?
-    <|im_end|>
-    <|im_start|>assistant
-
+    Your job is to check whether step $(a.step) of your work is completed according to the plan.
+    So for instance the following:
+    
+      step 2 of the plan: Ask user about the occasion type. But you can't find any relevant info of occasion type in your work.
+      assistant: Step 2 isn't done yet. {No}
+      step 5 of the plan: Ask user if they have any preference for the style of wine. And you found relevant info in your work such as the user like full-bodied wine.
+      assistant: Step 5 is done. {Yes}
+    </s|>
+    <|assistant|>
  """

  response = sendReceivePrompt(a, prompt)
+  @show goNogo_response = response
+
  decision = nothing
  reason = nothing
  if occursin("Yes", response)
@@ -1132,7 +1196,7 @@ function checkStepCompletion(a::agentReflex)

  prompt =
  """
-    <|im_start|>system
+    <|system|>
    Symbol meaning:
    Plan: a plan
    Thought: your thought
@@ -1144,8 +1208,8 @@ function checkStepCompletion(a::agentReflex)
    $plan
    
    What is step $(a.step) of the plan?
-    <|im_end|>
-    <|im_start|>assistant
+    <|/s|>
+    <|assistant|>

  """

@@ -1153,17 +1217,7 @@ function checkStepCompletion(a::agentReflex)
  
  response = split(response, "<|im_end|>")[1] 

-  # if occursin("N/A", response)
-  #   response = replace(response, "(N/A)"=>"")
-  # elseif occursin(response, "(not specified, assume casual)")
-  #   response = replace(response, "(not specified, assume casual)"=>"")
-  # else
-  # end
  
-  # mistral 7B already know info example: 2. Determine the occasion (wedding party).
-  if occursin("(", response) && occursin(")", response) 
-    result = true
-  end
  
  return result
 end