update

2024-05-09 09:02:34 +07:00
parent 46bbb31699
commit 6ce4b90d26
3 changed files with 73 additions and 110 deletions
--- a/src/interface.jl
+++ b/src/interface.jl
@@ -69,12 +69,12 @@ julia> output_thoughtDict = Dict(
 ```
 # TODO
-  [x] update docstring
+  - [x] update docstring
-  [x] implement the function
+  - [x] implement the function
-  [] implement RAG to pull similar experience
+  - [] implement RAG to pull similar experience
-  [] use customerinfo
+  - [] use customerinfo
-  [] user storeinfo
+  - [] user storeinfo
-  [] add reflect
+  - [] add try block. check result that it is expected before returning
 # Signature
 """
@@ -143,7 +143,7 @@ function decisionMaker(a::T1, state::T2)::Dict{Symbol, Any} where {T1<:agent, T2
  Let's begin!
  $(JSON3.write(state[:thoughtHistory]))
-  {thought
+  {"thought"
  """
  # apply LLM specific instruct format
@@ -213,6 +213,9 @@ serving as a heuristic to steer the search algorithm towards the most promising
 julia> 
 ```
 # TODO
 - [] add try block. check result that it is expected before returning
 # Signature
 """
 function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where {T1<:agent, T2<:AbstractDict}
@@ -250,7 +253,7 @@ function progressValueEstimator(a::T1, state::T2)::Tuple{String, Integer} where
  Let's begin!:
  $(JSON3.write(state[:thoughtHistory]))
-  {evaluation
+  {"evaluation"
  """
  # apply LLM specific instruct format
@@ -311,12 +314,14 @@ julia>
 # TODO
  - [] update docstring
  - [WORKING] implement the function
  - [] add try block. check result that it is expected before returning
 # Signature
 """
-function reflector()
+function reflector(a::T1, state::T2)::String where {T1<:agent, T2<:AbstractDict}
  # https://github.com/andyz245/LanguageAgentTreeSearch/blob/main/hotpot/hotpot.py
-  n_prompt = 
+
  _prompt = 
  """
  You are an advanced reasoning agent that can improve based on self refection. 
  You will be given the information related to the previous help you've done for a user.
@@ -324,6 +329,9 @@ function reflector()
  In a few sentences, Diagnose a possible reason for failure and devise a new, concise, high level plan that aims to mitigate the same failure. 
  Use complete sentences.  
  You should only respond in JSON format as describe below:
  {"reflection": "your relection"}
  Here are some examples:
  Previous Trial:
  {
@@ -352,75 +360,14 @@ function reflector()
    "action_6": {"name": "recommendbox", "input": "El Enemigo Cabernet Franc 2019"},
    "observation_6": "I don't like the one you recommend. I want dry wine."
  }
  Reflection: I searched one of the prime ministers involved in the signing, then attemted to answer right away. I should have searched each of the prime ministers, then looked up 'death' on each of their pages in order to get more information before answering.
-
+  {
-
+    "reflection": "I asked the user about the occasion, food type, and budget, and then searched for wine in the inventory right away. However, I should have asked the user for the specific wine type and their preferences in order to gather more information before making a recommendation."
-
+  }
  Question: The Rome Protocols were signed by three Prime Ministers one of which was assassinated as part of what?
  Thought 1: I need to search Rome Protocols, find the three Prime Ministers, then find what they were assassinated as part of.
  Action 1: Search[Rome Protocols]
  Observation 1: The Rome Protocols were a series of three international agreements concluded in Rome on 17 March 1934 between the governments of Austria, Hungary and Italy. They were signed by Italian Prime Minister Benito Mussolini, Austrian Chancellor Engelbert Dollfuss and Hungarian Prime Minister Gyula Gömbös. All of the protocols went into effect on 12 July 1934 and were registered in League of Nations Treaty Series on 12 December 1934.
  Thought 2: The three Prime Ministers were Benito Mussolini, Engelbert Dollfuss, and Gyula Gömbös. I need to search Benito Mussolini and find what he was assassinated as part of.
  Action 2: Search[Benito Mussolini]
  Observation 2: Benito Amilcare Andrea Mussolini (UK:  MU(U)SS-ə-LEE-nee, US:  MOOSS-, Italian: [beˈniːto aˈmilkare anˈdrɛːa mussoˈliːni]; 29 July 1883 – 28 April 1945) was an Italian politician and journalist who founded and led the National Fascist Party (PNF). He was Prime Minister of Italy from the March on Rome in 1922 until his deposition in 1943, as well as "Duce" of Italian fascism from the establishment of the Italian Fasces of Combat in 1919 until his summary execution in 1945 by Italian partisans. As dictator of Italy and principal founder of fascism, Mussolini inspired and supported the international spread of fascist movements during the inter-war period.Mussolini was originally a socialist politician and a journalist at the Avanti! newspaper. In 1912, he became a member of the National Directorate of the Italian Socialist Party (PSI), but he was expelled from the PSI for advocating military intervention in World War I, in opposition to the party's stance on neutrality. In 1914, Mussolini founded a new journal, Il Popolo d'Italia, and served in the Royal Italian Army during the war until he was wounded and discharged in 1917. Mussolini denounced the PSI, his views now centering on Italian nationalism instead of socialism, and later founded the fascist movement which came to oppose egalitarianism and class conflict, instead advocating "revolutionary nationalism" transcending class lines. On 31 October 1922, following the March on Rome (28–30 October), Mussolini was appointed prime minister by King Victor Emmanuel III, becoming the youngest individual to hold the office up to that time. After removing all political opposition through his secret police and outlawing labor strikes, Mussolini and his followers consolidated power through a series of laws that transformed the nation into a one-party dictatorship. Within five years, Mussolini had established dictatorial authority by both legal and illegal means and aspired to create a totalitarian state. In 1929, Mussolini signed the Lateran Treaty with the Holy See to establish Vatican City.
  Mussolini's foreign policy aimed to restore the ancient grandeur of the Roman Empire by expanding Italian colonial possessions and the fascist sphere of influence. In the 1920s, he ordered the Pacification of Libya, instructed the bombing of Corfu over an incident with Greece, established a protectorate over Albania, and incorporated the city of Fiume into the Italian state via agreements with Yugoslavia. In 1936, Ethiopia was conquered following the Second Italo-Ethiopian War and merged into Italian East Africa (AOI) with Eritrea and Somalia. In 1939, Italian forces annexed Albania. Between 1936 and 1939, Mussolini ordered the successful Italian military intervention in Spain in favor of Francisco Franco during the Spanish Civil War. Mussolini's Italy initially tried to avoid the outbreak of a second global war, sending troops at the Brenner Pass to delay Anschluss and taking part in the Stresa Front, the Lytton Report, the Treaty of Lausanne, the Four-Power Pact and the Munich Agreement. However, Italy then alienated itself from Britain and France by aligning with Germany and Japan. Germany invaded Poland on 1 September 1939, resulting in declarations of war by France and the UK and the start of World War II.
  On 10 June 1940, Mussolini decided to enter the war on the Axis side. Despite initial success, the subsequent Axis collapse on multiple fronts and eventual Allied invasion of Sicily made Mussolini lose the support of the population and members of the Fascist Party. As a consequence, early on 25 July 1943, the Grand Council of Fascism passed a motion of no confidence in Mussolini; later that day King Victor Emmanuel III dismissed him as head of government and had him placed in custody, appointing Pietro Badoglio to succeed him as Prime Minister. After the king agreed to an armistice with the Allies, on 12 September 1943 Mussolini was rescued from captivity in the Gran Sasso raid by German paratroopers and Waffen-SS commandos led by Major Otto-Harald Mors. Adolf Hitler, after meeting with the rescued former dictator, then put Mussolini in charge of a puppet regime in northern Italy, the Italian Social Republic (Italian: Repubblica Sociale Italiana, RSI), informally known as the Salò Republic, causing a civil war. In late April 1945, in the wake of near total defeat, Mussolini and his mistress Clara Petacci attempted to flee to Switzerland, but both were captured by Italian communist partisans and summarily executed by firing squad on 28 April 1945 near Lake Como. The bodies of Mussolini and his mistress were then taken to Milan, where they were hung upside down at a service station to publicly confirm their demise.
  Thought 3: Benito Mussolini was assassinated as part of the collapse of the Axis powers during World War II. So the Rome Protocols were signed by three Prime Ministers one of which was assassinated as part of World War II.
  Action 3: Finish[World War II]
  Reflection: I searched one of the prime ministers involved in the signing, then attemted to answer right away. I should have searched each of the prime ministers, then looked up 'death' on each of their pages in order to get more information before answering.
  Previous Trial:
  Question: Kam Heskin plays Paige Morgan in a 2004 film directed by who?
  Thought 1: I need to search Kam Heskin and Paige Morgan, find the 2004 film they are in, then find the director of the film.
  Action 1: Search[Kam Heskin]
  Observation 1: Kam Erika Heskin (born May 8, 1973) is an American actress. She began her career playing Caitlin Deschanel on the NBC daytime soap opera Sunset Beach (1998–1999), before appearing in films Planet of the Apes (2001) and Catch Me If You Can (2002). Heskin went on in 2003 to play Elizabeth Bennet in an independent film Pride & Prejudice: A Latter-Day Comedy, Robin in Charmed (2004),  and Paige Morgan in The Prince and Me film franchise (2006–2010).
  Thought 2: Kam Heskin plays Paige Morgan in The Prince and Me film franchise. I need to search The Prince and Me film franchise and find the director of the 2004 film.
  Action 2: Search[The Prince and Me film franchise]
  Observation 2: Could not find [The Prince and Me film franchise]. Similar: ['Kam Heskin', 'Prince of Persia', 'Harry Potter and the Half-Blood Prince (film)', 'Prince of Persia: The Sands of Time (film)', 'The Chronicles of Narnia (film series)', 'Majestic Prince (franchise)', 'Kingsman (franchise)', 'The Little Prince (2015 film)', 'Shrek (franchise)', 'Harry Potter (film series)']
  Thought 3: I need to search The Prince and Me (2004 film) instead.
  Action 3: Search[The Prince and Me (2004 film)]
  Observation 3: Could not find [The Prince and Me (2004 film)]. Similar: ['The Prince & Me', 'The Prince & Me 2: The Royal Wedding', 'Kam Heskin', 'Prince of Darkness (film)', 'Prince of Persia: The Sands of Time (film)', 'Rob Knox', 'Alexander (2004 film)', 'Prince of Persia', 'Brooklynn Prince', 'Prince (musician)']
  Thought 4: I need to search The Prince & Me (2004 film).
  Action 4: Search[The Prince & Me (2004 film)]
  Observation 4: Could not find [The Prince & Me (2004 film)]. Similar: ['The Prince & Me', 'The Prince & Me 2: The Royal Wedding', 'Prince of Darkness (film)', 'Prince of Persia: The Sands of Time (film)', 'Rob Knox', 'Alexander (2004 film)', 'Prince (musician)', 'Prince of Persia', 'Kam Heskin', 'Brooklynn Prince']
  Thought 5: I need to search The Prince & Me (2004 film) instead.
  Action 5: Search[The Prince & Me (2004 film)]
  Observation 5: Could not find [The Prince & Me (2004 film)]. Similar: ['The Prince & Me', 'The Prince & Me 2: The Royal Wedding', 'Prince of Darkness (film)', 'Prince of Persia: The Sands of Time (film)', 'Rob Knox', 'Alexander (2004 film)', 'Prince (musician)', 'Prince of Persia', 'Kam Heskin', 'Brooklynn Prince']
  Thought 6: I need to search The Prince & Me (2004 film).
  Action 6: Search[The Prince & Me (2004 film)]
  Observation 6: Could not find [The Prince & Me (2004 film)]. Similar: ['The Prince & Me', 'The Prince & Me 2: The Royal Wedding', 'Prince of Darkness (film)', 'Prince of Persia: The Sands of Time (film)', 'Rob Knox', 'Alexander (2004 film)', 'Prince (musician)', 'Prince of Persia', 'Kam Heskin', 'Brooklynn Prince']
  Reflection: I got stuck in a loop where I kept trying to search 'The Prince & Me (2004 film)' but the page could not be found. Instead I should have tried to search the similar results that had a similar name to see and they were made in 2004.
  Previous Trial:
  Question: The Oberoi family is part of a hotel company that has a head office in what city?
  Thought 1: I need to search the Oberoi family and find the city where the hotel company's head office is located.
  Action 1: Search[Oberoi family]
  Observation 1: The Oberoi Group is a luxury hotel group with its head office in New Delhi, India.[2] Founded in 1934, the company owns and operates 32 luxury hotels and two river cruise ships in 7 countries, primarily under its Oberoi Hotels & Resorts and Trident brands.[3] The group also operates The Oberoi Centre for Learning and Development, which is regarded as one of Asia's top institutions for hospitality education.[4]. The foundations of the Oberoi Group date back to 1934 when The Rai Bahadur Mohan Singh Oberoi, the founder of the group, bought two properties: the Maidens in Delhi and the Clarke's in Shimla.[5] In the following years Oberoi, assisted by his two sons, Tilak Raj Singh Oberoi and Prithvi Raj Singh Oberoi (P.R.S. Oberoi
  Thought 2: The Oberoi Group has its head office in New Delhi, India. I just need to finish with New Delhi.
  Action 2: Finish[New Delhi]
  Observation 2: Episode finished, reward = 0
  Reflection: I appeared to have retrieved the correct information about The Oberoi Family and the location of it's head office, and provided a corresponding answer. However this answer does not exactly match the ground truth answer so I should try a different wording, such as Delhi.
  Previous trial:
-  {trajectory}Reflection:
+  $(JSON3.write(state[:thoughtHistory]))
  {"reflection"
  """
  # apply LLM specific instruct format
@@ -434,12 +381,12 @@ function reflector()
  end
  msgMeta = GeneralUtils.generate_msgMeta(
-    a.config[:externalservice][:text2textinstruct][:mqtttopic],
+  a.config[:externalservice][:text2textinstruct][:mqtttopic],
-    senderName= "progressValueEstimator",
+  senderName= "reflector",
-    senderId= a.id,
+  senderId= a.id,
-    receiverName= "text2textinstruct",
+  receiverName= "text2textinstruct",
-    mqttBroker= a.config[:mqttServerInfo][:broker],
+  mqttBroker= a.config[:mqttServerInfo][:broker],
-    mqttBrokerPort= a.config[:mqttServerInfo][:port],
+  mqttBrokerPort= a.config[:mqttServerInfo][:port],
  )
  outgoingMsg = Dict(
@@ -593,6 +540,7 @@ function conversation(a::T, userinput::Dict) where {T<:agent}
        :select=> nothing,
        :reward=> 0,
        :isterminal=> false,
        :evaluation=> nothing,
        :feedback=> nothing,
        :thoughtHistory=> OrderedDict{Symbol, Any}( # contain question, thought_1, action_1, observation_1, thought_2, ...
          # :recap=>,
--- a/src/mcts.jl
+++ b/src/mcts.jl
@@ -51,7 +51,6 @@ mutable struct MCTSNode{T1<:AbstractDict, T2<:AbstractString}
  nodekey::T2
  state::T1
  visits::Integer
  stateevaluation::T2
  statevalue::Number
  reward::Number
  isterminal::Bool
@@ -134,29 +133,12 @@ julia>
 # Signature
 """
 function expand(a::T1, node::MCTSNode, decisionMaker::Function, 
-                progressValueEstimator::Function; n::Integer=3) where {T1<:agent}
+                progressValueEstimator::Function, reflector::Function; n::Integer=3) where {T1<:agent}
  nthSample = 0
  while nthSample < n
    try
      thoughtDict = decisionMaker(a, node.state)
      newNodeKey, newstate, reward, isterminalstate = 
                                      MCTStransition(a, node.state, thoughtDict)
      # add progressValueEstimator
      stateevaluation, statevalue = progressValueEstimator(a, newstate)
      if reward < 0
        pprint(newstate[:thoughtHistory])
        newstate.feedback = stateevaluation
      end
      if newNodeKey ∉ keys(node.children)
        node.children[newNodeKey] = MCTSNode(newNodeKey, newstate, 0, stateevaluation, statevalue, 
                            reward, isterminalstate, node, Dict{String, MCTSNode}())
      end
      nthSample += 1
    catch e
      io = IOBuffer()
      showerror(io, e)
@@ -166,6 +148,27 @@ function expand(a::T1, node::MCTSNode, decisionMaker::Function,
      @warn "Error occurred: $errorMsg\n$st"
      println("")
    end
    thoughtDict = decisionMaker(a, node.state)
    newNodeKey, newstate, reward, isterminalstate = 
                                    MCTStransition(a, node.state, thoughtDict)
    # add progressValueEstimator
    stateevaluation, statevalue = progressValueEstimator(a, newstate)
    if reward < 0
      pprint(newstate[:thoughtHistory])
      newstate[:evaluation] = stateevaluation
      newstate[:feedback] = reflector(a, newstate)
      print("done reflection")
    end
    if newNodeKey ∉ keys(node.children)
      node.children[newNodeKey] = MCTSNode(newNodeKey, newstate, 0, statevalue, 
                          reward, isterminalstate, node, Dict{String, MCTSNode}())
    end
    nthSample += 1
  end
 end
@@ -192,7 +195,7 @@ julia>
 # Signature
 """
 function simulate(a::T, node::MCTSNode, decisionMaker::Function, progressValueEstimator::Function, 
-  maxDepth::Int; n=3)::Number where {T<:agent}
+  reflector::Function; maxDepth::Integer=3, n::Integer=3)::Number where {T<:agent}
  simTrajectoryReward = 0.0
@@ -201,7 +204,7 @@ function simulate(a::T, node::MCTSNode, decisionMaker::Function, progressValueEs
    if node.isterminal
      break
    else
-      expand(a, node, decisionMaker, progressValueEstimator, n=n)
+      expand(a, node, decisionMaker, progressValueEstimator, reflector; n=n)
      node = selectChildNode(node)
    end
  end
@@ -456,7 +459,7 @@ function runMCTS(
  maxIterations::Integer, 
  w::Float64) where {T1<:agent}
-  root = MCTSNode("root", initialState, 0, "N/A", 0, 0, false, nothing, Dict{String, MCTSNode}())
+  root = MCTSNode("root", initialState, 0, 0, 0, false, nothing, Dict{String, MCTSNode}())
  for nth in 1:maxIterations
    node = root
@@ -468,10 +471,10 @@ function runMCTS(
      # do nothing then go directly to backpropagation
      backpropagate(leafNode, node.reward)
    else
-      expand(a, node, decisionMaker, progressValueEstimator, n=n)
+      expand(a, node, decisionMaker, progressValueEstimator, reflector; n=n)
      leafNode = UCTselect(node, w)
      simTrajectoryReward = simulate(a, leafNode, decisionMaker, progressValueEstimator, 
-                                    maxDepth, n=n)
+                                    reflector; maxDepth=maxDepth, n=n)
      backpropagate(leafNode, simTrajectoryReward)
    end
  end
--- a/test/test_1.jl
+++ b/test/test_1.jl
@@ -112,13 +112,25 @@ outgoingMsg = Dict(
    :text=> "You did not gave me any choice.",
    :select=> nothing,
    :reward=> -1,
-    :isterminal=> true,
+    :isterminal=> false,
  )
 )
 result = GeneralUtils.sendMqttMsg(outgoingMsg)
 outgoingMsg = Dict(
  :msgMeta=> msgMeta,
  :payload=> Dict(
    :text=> "You don't need to ask me. Just tell me already!",
    :select=> nothing,
    :reward=> -1,
    :isterminal=> false,
  )
 )
 result = GeneralUtils.sendMqttMsg(outgoingMsg)
 outgoingMsg = Dict(
  :msgMeta=> msgMeta,
@@ -137,7 +149,7 @@ result = GeneralUtils.sendMqttMsg(outgoingMsg)
 outgoingMsg = Dict(
  :msgMeta=> msgMeta,
  :payload=> Dict(
-    :text=> "I don't have any thing specific",
+    :text=> "Dry please.",
    :select=> nothing,
    :reward=> 0,
    :isterminal=> false,
@@ -164,10 +176,10 @@ result = GeneralUtils.sendMqttMsg(outgoingMsg)
 outgoingMsg = Dict(
  :msgMeta=> msgMeta,
  :payload=> Dict(
-    :text=> "I don't like the one you recommend. I want dry wine.",
+    :text=> "I didn't like the one you recommend. You like dry wine.",
    :select=> nothing,
    :reward=> -1,
-    :isterminal=> true,
+    :isterminal=> false,
  )
 )
 result = GeneralUtils.sendMqttMsg(outgoingMsg)