This commit is contained in:
narawat lamaiin
2024-05-31 11:47:43 +07:00
parent 3f38fdbb70
commit 3196842296
17 changed files with 5607 additions and 179 deletions

View File

@@ -0,0 +1,537 @@
https://github.com/andyz245/LanguageAgentTreeSearch/blob/main/hotpot/hotpot.py
standard_prompt = '''
Write a coherent passage of 4 short paragraphs. The end sentence of each paragraph must be: {input}
'''
reflection_prompt = '''You are an advanced reasoning agent that can improve based on self refection. You will be given a previous reasoning trial in which you were given access to an Docstore API environment and a question to answer. You were unsuccessful in answering the question either because you guessed the wrong answer with Finish[<answer>], or you used up your set number of reasoning steps. In a few sentences, Diagnose a possible reason for failure and devise a new, concise, high level plan that aims to mitigate the same failure. Use complete sentences.
Here are some examples:
Previous Trial:
Question: The Rome Protocols were signed by three Prime Ministers one of which was assassinated as part of what?
Thought 1: I need to search Rome Protocols, find the three Prime Ministers, then find what they were assassinated as part of.
Action 1: Search[Rome Protocols]
Observation 1: The Rome Protocols were a series of three international agreements concluded in Rome on 17 March 1934 between the governments of Austria, Hungary and Italy. They were signed by Italian Prime Minister Benito Mussolini, Austrian Chancellor Engelbert Dollfuss and Hungarian Prime Minister Gyula Gömbös. All of the protocols went into effect on 12 July 1934 and were registered in League of Nations Treaty Series on 12 December 1934.
Thought 2: The three Prime Ministers were Benito Mussolini, Engelbert Dollfuss, and Gyula Gömbös. I need to search Benito Mussolini and find what he was assassinated as part of.
Action 2: Search[Benito Mussolini]
Observation 2: Benito Amilcare Andrea Mussolini (UK: MU(U)SS-ə-LEE-nee, US: MOOSS-, Italian: [beˈniːto aˈmilkare anˈdrɛːa mussoˈliːni]; 29 July 1883 28 April 1945) was an Italian politician and journalist who founded and led the National Fascist Party (PNF). He was Prime Minister of Italy from the March on Rome in 1922 until his deposition in 1943, as well as "Duce" of Italian fascism from the establishment of the Italian Fasces of Combat in 1919 until his summary execution in 1945 by Italian partisans. As dictator of Italy and principal founder of fascism, Mussolini inspired and supported the international spread of fascist movements during the inter-war period.Mussolini was originally a socialist politician and a journalist at the Avanti! newspaper. In 1912, he became a member of the National Directorate of the Italian Socialist Party (PSI), but he was expelled from the PSI for advocating military intervention in World War I, in opposition to the party's stance on neutrality. In 1914, Mussolini founded a new journal, Il Popolo d'Italia, and served in the Royal Italian Army during the war until he was wounded and discharged in 1917. Mussolini denounced the PSI, his views now centering on Italian nationalism instead of socialism, and later founded the fascist movement which came to oppose egalitarianism and class conflict, instead advocating "revolutionary nationalism" transcending class lines. On 31 October 1922, following the March on Rome (2830 October), Mussolini was appointed prime minister by King Victor Emmanuel III, becoming the youngest individual to hold the office up to that time. After removing all political opposition through his secret police and outlawing labor strikes, Mussolini and his followers consolidated power through a series of laws that transformed the nation into a one-party dictatorship. Within five years, Mussolini had established dictatorial authority by both legal and illegal means and aspired to create a totalitarian state. In 1929, Mussolini signed the Lateran Treaty with the Holy See to establish Vatican City.
Mussolini's foreign policy aimed to restore the ancient grandeur of the Roman Empire by expanding Italian colonial possessions and the fascist sphere of influence. In the 1920s, he ordered the Pacification of Libya, instructed the bombing of Corfu over an incident with Greece, established a protectorate over Albania, and incorporated the city of Fiume into the Italian state via agreements with Yugoslavia. In 1936, Ethiopia was conquered following the Second Italo-Ethiopian War and merged into Italian East Africa (AOI) with Eritrea and Somalia. In 1939, Italian forces annexed Albania. Between 1936 and 1939, Mussolini ordered the successful Italian military intervention in Spain in favor of Francisco Franco during the Spanish Civil War. Mussolini's Italy initially tried to avoid the outbreak of a second global war, sending troops at the Brenner Pass to delay Anschluss and taking part in the Stresa Front, the Lytton Report, the Treaty of Lausanne, the Four-Power Pact and the Munich Agreement. However, Italy then alienated itself from Britain and France by aligning with Germany and Japan. Germany invaded Poland on 1 September 1939, resulting in declarations of war by France and the UK and the start of World War II.
On 10 June 1940, Mussolini decided to enter the war on the Axis side. Despite initial success, the subsequent Axis collapse on multiple fronts and eventual Allied invasion of Sicily made Mussolini lose the support of the population and members of the Fascist Party. As a consequence, early on 25 July 1943, the Grand Council of Fascism passed a motion of no confidence in Mussolini; later that day King Victor Emmanuel III dismissed him as head of government and had him placed in custody, appointing Pietro Badoglio to succeed him as Prime Minister. After the king agreed to an armistice with the Allies, on 12 September 1943 Mussolini was rescued from captivity in the Gran Sasso raid by German paratroopers and Waffen-SS commandos led by Major Otto-Harald Mors. Adolf Hitler, after meeting with the rescued former dictator, then put Mussolini in charge of a puppet regime in northern Italy, the Italian Social Republic (Italian: Repubblica Sociale Italiana, RSI), informally known as the Salò Republic, causing a civil war. In late April 1945, in the wake of near total defeat, Mussolini and his mistress Clara Petacci attempted to flee to Switzerland, but both were captured by Italian communist partisans and summarily executed by firing squad on 28 April 1945 near Lake Como. The bodies of Mussolini and his mistress were then taken to Milan, where they were hung upside down at a service station to publicly confirm their demise.
Thought 3: Benito Mussolini was assassinated as part of the collapse of the Axis powers during World War II. So the Rome Protocols were signed by three Prime Ministers one of which was assassinated as part of World War II.
Action 3: Finish[World War II]
Reflection: I searched one of the prime ministers involved in the signing, then attemted to answer right away. I should have searched each of the prime ministers, then looked up 'death' on each of their pages in order to get more information before answering.
Previous Trial:
Question: Kam Heskin plays Paige Morgan in a 2004 film directed by who?
Thought 1: I need to search Kam Heskin and Paige Morgan, find the 2004 film they are in, then find the director of the film.
Action 1: Search[Kam Heskin]
Observation 1: Kam Erika Heskin (born May 8, 1973) is an American actress. She began her career playing Caitlin Deschanel on the NBC daytime soap opera Sunset Beach (19981999), before appearing in films Planet of the Apes (2001) and Catch Me If You Can (2002). Heskin went on in 2003 to play Elizabeth Bennet in an independent film Pride & Prejudice: A Latter-Day Comedy, Robin in Charmed (2004), and Paige Morgan in The Prince and Me film franchise (20062010).
Thought 2: Kam Heskin plays Paige Morgan in The Prince and Me film franchise. I need to search The Prince and Me film franchise and find the director of the 2004 film.
Action 2: Search[The Prince and Me film franchise]
Observation 2: Could not find [The Prince and Me film franchise]. Similar: ['Kam Heskin', 'Prince of Persia', 'Harry Potter and the Half-Blood Prince (film)', 'Prince of Persia: The Sands of Time (film)', 'The Chronicles of Narnia (film series)', 'Majestic Prince (franchise)', 'Kingsman (franchise)', 'The Little Prince (2015 film)', 'Shrek (franchise)', 'Harry Potter (film series)']
Thought 3: I need to search The Prince and Me (2004 film) instead.
Action 3: Search[The Prince and Me (2004 film)]
Observation 3: Could not find [The Prince and Me (2004 film)]. Similar: ['The Prince & Me', 'The Prince & Me 2: The Royal Wedding', 'Kam Heskin', 'Prince of Darkness (film)', 'Prince of Persia: The Sands of Time (film)', 'Rob Knox', 'Alexander (2004 film)', 'Prince of Persia', 'Brooklynn Prince', 'Prince (musician)']
Thought 4: I need to search The Prince & Me (2004 film).
Action 4: Search[The Prince & Me (2004 film)]
Observation 4: Could not find [The Prince & Me (2004 film)]. Similar: ['The Prince & Me', 'The Prince & Me 2: The Royal Wedding', 'Prince of Darkness (film)', 'Prince of Persia: The Sands of Time (film)', 'Rob Knox', 'Alexander (2004 film)', 'Prince (musician)', 'Prince of Persia', 'Kam Heskin', 'Brooklynn Prince']
Thought 5: I need to search The Prince & Me (2004 film) instead.
Action 5: Search[The Prince & Me (2004 film)]
Observation 5: Could not find [The Prince & Me (2004 film)]. Similar: ['The Prince & Me', 'The Prince & Me 2: The Royal Wedding', 'Prince of Darkness (film)', 'Prince of Persia: The Sands of Time (film)', 'Rob Knox', 'Alexander (2004 film)', 'Prince (musician)', 'Prince of Persia', 'Kam Heskin', 'Brooklynn Prince']
Thought 6: I need to search The Prince & Me (2004 film).
Action 6: Search[The Prince & Me (2004 film)]
Observation 6: Could not find [The Prince & Me (2004 film)]. Similar: ['The Prince & Me', 'The Prince & Me 2: The Royal Wedding', 'Prince of Darkness (film)', 'Prince of Persia: The Sands of Time (film)', 'Rob Knox', 'Alexander (2004 film)', 'Prince (musician)', 'Prince of Persia', 'Kam Heskin', 'Brooklynn Prince']
Reflection: I got stuck in a loop where I kept trying to search 'The Prince & Me (2004 film)' but the page could not be found. Instead I should have tried to search the similar results that had a similar name to see and they were made in 2004.
Previous Trial:
Question: The Oberoi family is part of a hotel company that has a head office in what city?
Thought 1: I need to search the Oberoi family and find the city where the hotel company's head office is located.
Action 1: Search[Oberoi family]
Observation 1: The Oberoi Group is a luxury hotel group with its head office in New Delhi, India.[2] Founded in 1934, the company owns and operates 32 luxury hotels and two river cruise ships in 7 countries, primarily under its Oberoi Hotels & Resorts and Trident brands.[3] The group also operates The Oberoi Centre for Learning and Development, which is regarded as one of Asia's top institutions for hospitality education.[4]. The foundations of the Oberoi Group date back to 1934 when The Rai Bahadur Mohan Singh Oberoi, the founder of the group, bought two properties: the Maidens in Delhi and the Clarke's in Shimla.[5] In the following years Oberoi, assisted by his two sons, Tilak Raj Singh Oberoi and Prithvi Raj Singh Oberoi (P.R.S. Oberoi
Thought 2: The Oberoi Group has its head office in New Delhi, India. I just need to finish with New Delhi.
Action 2: Finish[New Delhi]
Observation 2: Episode finished, reward = 0
Reflection: I appeared to have retrieved the correct information about The Oberoi Family and the location of it's head office, and provided a corresponding answer. However this answer does not exactly match the ground truth answer so I should try a different wording, such as Delhi.
Previous trial:
{trajectory}Reflection:'''
cot_prompt = '''
Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, and Action can be three types:
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.
After each observation, provide the next Thought and next Action. Here are some examples:
Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?
Thought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.
Action 1: Search[Colorado orogeny]
Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas. This took place from 1780 to 1650 million years ago (Mya), during the Paleoproterozoic (Statherian Period). It is recorded in the Colorado orogen, a >500-km-wide belt of oceanic arc rock that extends southward into New Mexico. The Colorado orogeny was likely part of the larger Yavapai orogeny.
Thought 2: It does not mention the eastern sector of the Colorado orogeny. I need to look up eastern sector.
Action 2: Lookup[eastern sector]
Observation 2: (Result 1 / 1) The Colorado orogen, formerly called the Colorado province, is a >500-km-wide belt of oceanic arc rock (1.78\u20131.65 Ga) that extends southward into New Mexico and composes a major part of the Proterozoic provinces of southwestern United States. This transcontinental collisional event occurred during the Paleoproterozoic (Statherian Period).[1] The Wyoming sector of the Colorado orogeny was formerly called the Medicine Bow orogeny. The eastern sector extends into the High Plains and is called the Central Plains orogeny. The boundary between the Colorado orogeny and the Wyoming craton is the Cheyenne belt, a 5-km-wide mylonitic shear zone that verges northward. The Cheyenne belt transects and cuts off the south edge of the older Trans-Hudson orogeny.[2]
Thought 3: The eastern sector of Colorado orogeny extends into the High Plains, so High Plains is the area. I need to search High Plains and find its elevation range.
Action 3: Search[High Plains]Observation 3: High Plains refers to one of two distinct land regions:
Thought 4: I need to search High Plains (United States).
Action 4: Search[High Plains (United States)]
Observation 4: The High Plains are a subregion of the Great Plains, mainly in the Western United States, but also partly in the Midwest states of Nebraska, Kansas, and South Dakota, generally encompassing the western part of the Great Plains before the region reaches the Rocky Mountains. The High Plains are located in eastern Montana, southeastern Wyoming, southwestern South Dakota, western Nebraska, eastern Colorado, western Kansas, eastern New Mexico, western Oklahoma, and to just south of the Texas Panhandle.[4] The southern region of the Western High Plains ecology region contains the geological formation known as Llano Estacado which can be seen from a short distance or on satellite maps.[5] From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]
Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.
Action 5: Finish[1,800 to 7,000 ft]
Question: Guitars for Wounded Warriors is an album that was recorded in the village in which New York county?
Thought 1: I need to search Guitars for Wounded Warriors, then find in which New York county it was recorded.
Action 1: Search[Guitars for Wounded Warriors]
Observation 1: Guitars for Wounded Warriors is the sixth solo studio album by guitarist Brian Tarquin, released in August 2014 by Cleopatra Records. In 2016, it received a Best Compilation Album nomination by the Independent Music Awards. All tracks were recorded at Tarquin's Jungle Room Studios in New Paltz (village), New York. Being moved by the lack of support for returning veterans through his life Tarquin decided to compose a heavy instrumental rock album as a way to show his appreciation to all veterans. So once again he enlisted top notch players to guest with him on the album, Billy Sheehan on Black Hawk, Ron \"Bumblefoot\" Thal and Reb Beach on Taliban Terror, Steve Morse on Freedom, Chris Poland on Charlie Surfs and Hunting, Chuck Loeb on Escape Kabul, Hal Lindes on Sand & Blood, Gary Hoey on 5 Klicks To Hell and Baghdad, Alex De Rosso Dokken on Broken Arrow, and The Flyin' Ryan Brothers on Surgical Strike. The entire album was engineered, produced and composed by Tarquin especially for each one of the guest guitarists. Partial proceeds are donated to the Fisher House Foundation from sales.
Thought 2: Guitars for Wounded Warriors was recorded in New Paltz (village), New York. I need to search New Paltz (village), New York and find which county it is in
Action 2: Search[New Paltz (village), New York]
Observation 2: New Paltz is a village in Ulster County located in the U.S. state of New York. It is approximately 80 miles (130 km) north of New York City and 70 miles (110 km) south of Albany. The population was 7,324 at the 2020 census.[3]
Thought 3: New Paltz (village), New York is in Ulster County, so the answer is Ulster County.
Action 3: Finish[Ulster County]
Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?
Thought 1: The question simplifies to "The Simpsons" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.
Action 1: Search[Milhouse]
Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.
Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up "named after".
Action 2: Lookup[named after]
Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous.
Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.
Action 3: Finish[President Richard Nixon]
{input}
'''
cot_prompt_short = '''
Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, and Action can be three types:
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.
After each observation, provide the next Thought and next Action. Here are some examples:
Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?
Thought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.
Action 1: Search[Colorado orogeny]
Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas. This took place from 1780 to 1650 million years ago (Mya), during the Paleoproterozoic (Statherian Period). It is recorded in the Colorado orogen, a >500-km-wide belt of oceanic arc rock that extends southward into New Mexico. The Colorado orogeny was likely part of the larger Yavapai orogeny.
Thought 2: It does not mention the eastern sector of the Colorado orogeny. I need to look up eastern sector.
Action 2: Lookup[eastern sector]
Observation 2: (Result 1 / 1) The Colorado orogen, formerly called the Colorado province, is a >500-km-wide belt of oceanic arc rock (1.78\u20131.65 Ga) that extends southward into New Mexico and composes a major part of the Proterozoic provinces of southwestern United States. This transcontinental collisional event occurred during the Paleoproterozoic (Statherian Period).[1] The Wyoming sector of the Colorado orogeny was formerly called the Medicine Bow orogeny. The eastern sector extends into the High Plains and is called the Central Plains orogeny. The boundary between the Colorado orogeny and the Wyoming craton is the Cheyenne belt, a 5-km-wide mylonitic shear zone that verges northward. The Cheyenne belt transects and cuts off the south edge of the older Trans-Hudson orogeny.[2]
Thought 3: The eastern sector of Colorado orogeny extends into the High Plains, so High Plains is the area. I need to search High Plains and find its elevation range.
Action 3: Search[High Plains]Observation 3: High Plains refers to one of two distinct land regions:
Thought 4: I need to search High Plains (United States).
Action 4: Search[High Plains (United States)]
Observation 4: The High Plains are a subregion of the Great Plains, mainly in the Western United States, but also partly in the Midwest states of Nebraska, Kansas, and South Dakota, generally encompassing the western part of the Great Plains before the region reaches the Rocky Mountains. The High Plains are located in eastern Montana, southeastern Wyoming, southwestern South Dakota, western Nebraska, eastern Colorado, western Kansas, eastern New Mexico, western Oklahoma, and to just south of the Texas Panhandle.[4] The southern region of the Western High Plains ecology region contains the geological formation known as Llano Estacado which can be seen from a short distance or on satellite maps.[5] From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]
Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.
Action 5: Finish[1,800 to 7,000 ft]
Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?
Thought 1: The question simplifies to "The Simpsons" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.
Action 1: Search[Milhouse]
Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.
Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up "named after".
Action 2: Lookup[named after]
Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous.
Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.
Action 3: Finish[President Richard Nixon]
{input}
'''
cot_prompt_feedback_short = '''You are also an advanced reasoning agent that can improve based on self refection. Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, and Action can be three types:
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.
After each observation, provide the next Thought and next Action. Here are some examples:
Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?
Thought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.
Action 1: Search[Colorado orogeny]
Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas. This took place from 1780 to 1650 million years ago (Mya), during the Paleoproterozoic (Statherian Period). It is recorded in the Colorado orogen, a >500-km-wide belt of oceanic arc rock that extends southward into New Mexico. The Colorado orogeny was likely part of the larger Yavapai orogeny.
Thought 2: It does not mention the eastern sector of the Colorado orogeny. I need to look up eastern sector.
Action 2: Lookup[eastern sector]
Observation 2: (Result 1 / 1) The Colorado orogen, formerly called the Colorado province, is a >500-km-wide belt of oceanic arc rock (1.78\u20131.65 Ga) that extends southward into New Mexico and composes a major part of the Proterozoic provinces of southwestern United States. This transcontinental collisional event occurred during the Paleoproterozoic (Statherian Period).[1] The Wyoming sector of the Colorado orogeny was formerly called the Medicine Bow orogeny. The eastern sector extends into the High Plains and is called the Central Plains orogeny. The boundary between the Colorado orogeny and the Wyoming craton is the Cheyenne belt, a 5-km-wide mylonitic shear zone that verges northward. The Cheyenne belt transects and cuts off the south edge of the older Trans-Hudson orogeny.[2]
Thought 3: The eastern sector of Colorado orogeny extends into the High Plains, so High Plains is the area. I need to search High Plains and find its elevation range.
Action 3: Search[High Plains]Observation 3: High Plains refers to one of two distinct land regions:
Thought 4: I need to search High Plains (United States).
Action 4: Search[High Plains (United States)]
Observation 4: The High Plains are a subregion of the Great Plains, mainly in the Western United States, but also partly in the Midwest states of Nebraska, Kansas, and South Dakota, generally encompassing the western part of the Great Plains before the region reaches the Rocky Mountains. The High Plains are located in eastern Montana, southeastern Wyoming, southwestern South Dakota, western Nebraska, eastern Colorado, western Kansas, eastern New Mexico, western Oklahoma, and to just south of the Texas Panhandle.[4] The southern region of the Western High Plains ecology region contains the geological formation known as Llano Estacado which can be seen from a short distance or on satellite maps.[5] From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]
Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.
Action 5: Finish[1,800 to 7,000 ft]
Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?
Thought 1: The question simplifies to "The Simpsons" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.
Action 1: Search[Milhouse]
Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.
Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up "named after".
Action 2: Lookup[named after]
Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous.
Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.
Action 3: Finish[President Richard Nixon]
You have attempted to answer the following question before and failed. The following reflection(s) give a plan to avoid failing to answer the question in the same way you did previously. Use them to improve your strategy of correctly answering the given question.
{trajectories}
{input}
'''
cot_prompt_feedback = '''You are also an advanced reasoning agent that can improve based on self refection. Solve a question answering task with interleaving Thought, Action, Observation steps. Thought can reason about the current situation, and Action can be three types:
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.
After each observation, provide the next Thought and next Action. Here are some examples:
Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?
Thought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.
Action 1: Search[Colorado orogeny]
Observation 1: The Colorado orogeny was an episode of mountain building (an orogeny) in Colorado and surrounding areas. This took place from 1780 to 1650 million years ago (Mya), during the Paleoproterozoic (Statherian Period). It is recorded in the Colorado orogen, a >500-km-wide belt of oceanic arc rock that extends southward into New Mexico. The Colorado orogeny was likely part of the larger Yavapai orogeny.
Thought 2: It does not mention the eastern sector of the Colorado orogeny. I need to look up eastern sector.
Action 2: Lookup[eastern sector]
Observation 2: (Result 1 / 1) The Colorado orogen, formerly called the Colorado province, is a >500-km-wide belt of oceanic arc rock (1.78\u20131.65 Ga) that extends southward into New Mexico and composes a major part of the Proterozoic provinces of southwestern United States. This transcontinental collisional event occurred during the Paleoproterozoic (Statherian Period).[1] The Wyoming sector of the Colorado orogeny was formerly called the Medicine Bow orogeny. The eastern sector extends into the High Plains and is called the Central Plains orogeny. The boundary between the Colorado orogeny and the Wyoming craton is the Cheyenne belt, a 5-km-wide mylonitic shear zone that verges northward. The Cheyenne belt transects and cuts off the south edge of the older Trans-Hudson orogeny.[2]
Thought 3: The eastern sector of Colorado orogeny extends into the High Plains, so High Plains is the area. I need to search High Plains and find its elevation range.
Action 3: Search[High Plains]Observation 3: High Plains refers to one of two distinct land regions:
Thought 4: I need to search High Plains (United States).
Action 4: Search[High Plains (United States)]
Observation 4: The High Plains are a subregion of the Great Plains, mainly in the Western United States, but also partly in the Midwest states of Nebraska, Kansas, and South Dakota, generally encompassing the western part of the Great Plains before the region reaches the Rocky Mountains. The High Plains are located in eastern Montana, southeastern Wyoming, southwestern South Dakota, western Nebraska, eastern Colorado, western Kansas, eastern New Mexico, western Oklahoma, and to just south of the Texas Panhandle.[4] The southern region of the Western High Plains ecology region contains the geological formation known as Llano Estacado which can be seen from a short distance or on satellite maps.[5] From east to west, the High Plains rise in elevation from around 1,800 to 7,000 ft (550 to 2,130 m).[3]
Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.
Action 5: Finish[1,800 to 7,000 ft]
Question: Guitars for Wounded Warriors is an album that was recorded in the village in which New York county?
Thought 1: I need to search Guitars for Wounded Warriors, then find in which New York county it was recorded.
Action 1: Search[Guitars for Wounded Warriors]
Observation 1: Guitars for Wounded Warriors is the sixth solo studio album by guitarist Brian Tarquin, released in August 2014 by Cleopatra Records. In 2016, it received a Best Compilation Album nomination by the Independent Music Awards. All tracks were recorded at Tarquin's Jungle Room Studios in New Paltz (village), New York. Being moved by the lack of support for returning veterans through his life Tarquin decided to compose a heavy instrumental rock album as a way to show his appreciation to all veterans. So once again he enlisted top notch players to guest with him on the album, Billy Sheehan on Black Hawk, Ron \"Bumblefoot\" Thal and Reb Beach on Taliban Terror, Steve Morse on Freedom, Chris Poland on Charlie Surfs and Hunting, Chuck Loeb on Escape Kabul, Hal Lindes on Sand & Blood, Gary Hoey on 5 Klicks To Hell and Baghdad, Alex De Rosso Dokken on Broken Arrow, and The Flyin' Ryan Brothers on Surgical Strike. The entire album was engineered, produced and composed by Tarquin especially for each one of the guest guitarists. Partial proceeds are donated to the Fisher House Foundation from sales.
Thought 2: Guitars for Wounded Warriors was recorded in New Paltz (village), New York. I need to search New Paltz (village), New York and find which county it is in
Action 2: Search[New Paltz (village), New York]
Observation 2: New Paltz is a village in Ulster County located in the U.S. state of New York. It is approximately 80 miles (130 km) north of New York City and 70 miles (110 km) south of Albany. The population was 7,324 at the 2020 census.[3]
Thought 3: New Paltz (village), New York is in Ulster County, so the answer is Ulster County.
Action 3: Finish[Ulster County]
Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?
Thought 1: The question simplifies to "The Simpsons" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.
Action 1: Search[Milhouse]
Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.
Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up "named after".
Action 2: Lookup[named after]
Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous.
Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.
Action 3: Finish[President Richard Nixon]
You have attempted to answer the following question before and failed, either because your reasoning for the answer was incorrect or the phrasing of your response did not exactly match the answer. The following reflection(s) give a plan to avoid failing to answer the question in the same way you did previously. Use them to improve your strategy of correctly answering the given question.
{trajectories}
When providing the thought and action for the current trial, that into account these failed trajectories and make sure not to repeat the same mistakes and incorrect answers.
{input}
'''
vote_prompt = '''Analyze the trajectories of a solution to a question answering task. The trajectories are labeled by pairs of thoughts that can reason about the current situation and actions that can be three types:
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.
Given a question and a list of trajectories, decide which trajectory is most promising. Analyze each trajectory in detail and consider possible errors, then conclude in the last line "The best trajectory is {s}", where s the integer id of the trajectory.
'''
compare_prompt = '''Analyze the trajectories of a solution to a question answering task. The trajectories are labeled by pairs of thoughts that can reason about the current situation and actions that can be three types:
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.
Briefly analyze the correctness of the following two trajectories. Conclude in the last line "The more correct trajectory is 1", "The more correct trajectory is 2", or "The two trajectories are similarly correct".
'''
score_prompt = '''Analyze the trajectories of a solution to a question answering task. The trajectories are labeled by pairs of thoughts that can reason about the current situation and actions that can be three types:
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.
Given a question and a trajectory, analyze the following trajectory, then at the last line conclude "Thus the correctness score is {s}", where s is an integer from 1 to 10.
'''
value_prompt = '''Analyze the trajectories of a solution to a question answering task. The trajectories are labeled by environmental observations about the situation, thoughts that can reason about the current situation and actions that can be three types:
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.
Given a question and a trajectory, evaluate its correctness and provide your reasoning and analysis in detail. Focus on the latest thought, action, and observation. Incomplete trajectories can be correct if the thoughts and actions so far are correct, even if the answer is not found yet. Do not generate additional thoughts or actions. Then at the last line conclude "Thus the correctness score is {s}", where s is an integer from 1 to 10.
Question: Which other Mexican Formula One race car driver has held the podium besides the Force India driver born in 1990?
Thought 1: I need to search for a list of Mexican Formula One race car drivers, then eliminate the one who drove for Force India and was born in 1990.
Action 1: Search[Formula One drivers from Mexico]
Observation 1: There have been six Formula One drivers from Mexico who have taken part in races since the championship began in 1950. Sergio P\u00e9rez is the most successful, having won six Grands Prix. Since 2021, P\u00e9rez drives for the Red Bull team.. P\u00e9rez is the only Mexican F1 driver to achieve a pole position. He took his first pole position at the 2022 Saudi Arabian Grand Prix, in his 219th Grand Prix meeting (a record for the most Grands Prix before a pole).
Thought 2: Sergio P\u00e9rez, born in 1990 and drove for Force India, is not the Mexican driver we are looking for. The passage does not give me enough information about other Mexican drivers who have achieved a podium finish. I need to look up more information.
Action 2: Lookup[Formula One drivers from Mexico]
Observation 2: No more results.
Thought 3: The available information doesn't allow me to determine which other Mexican Formula One race car driver, besides Sergio P\u00e9rez, has held the podium. With the search function failing to provide a full list of Mexican Formula One drivers, I need to return different search results. Let's try searching for Mexican Grand Prix winners, as they would have held the podium.
Action 3: Search[Mexican Grand Prix winners]
Observation 3: Could not find Mexican Grand Prix winners. Similar: ['Mexican Grand Prix', 'List of Formula One Grand Prix winners', '1990 Mexican Grand Prix', '2018 Mexican Grand Prix', '2019 Mexican Grand Prix'].
Thought 4: Given the difficulties in finding a direct list of Mexican Grand Prix winners, I should search for the Mexican Grand Prix to get a broader picture of the race's history. This might include winners.
Action 4: Search[Mexican Grand Prix]
Thus the correctness score is 3
Question: The football manager who recruited David Beckham managed Manchester United during what timeframe?Question: The football manager who recruited David Beckham managed Manchester United during what timeframe?
Thought 1: I need to search for the football manager who recruited David Beckham, and then find out the timeframe in which he managed Manchester United.
Action 1: Search[David Beckham's football manager]
Observation 1: Could not find David Beckham's football manager. Similar: ['David Beckham', 'Inter Miami CF (redirect from Miami Beckham United)', 'Victoria Beckham', 'The Beckham Experiment', 'Miami Freedom Park'].
Thought 2: I couldn't find the information on David Beckham's football manager. I need to search Manchester United and find out the timeframe of the manager.
Action 2: Search[Manchester United]
Observation 2: Manchester United Football Club, commonly referred to as Man United (often stylised as Man Utd), or simply United, is a professional football club based in Old Trafford, Greater Manchester, England. The club competes in the Premier League, the top division in the English football league system. Nicknamed the Red Devils, they were founded as Newton Heath LYR Football Club in 1878, but changed their name to Manchester United in 1902. After a spell playing in Clayton, Manchester, the club moved to their current stadium, Old Trafford, in 1910.. Domestically, Manchester United have won a record 20 league titles, 12 FA Cups, six League Cups and a record 21 FA Community Shields.
Thought 3: The information about David Beckham's football manager is not available. I will search for Manchester United's managers to find the timeframe.
Action 3: Search[Manchester United managers]
Observation 3: Manchester United Football Club is a professional football club based in Old Trafford, Greater Manchester, England. The club was formed in Newton Heath in 1878 as Newton Heath LYR F.C., and played their first competitive match in October 1886, when they entered the First Round of the 188687 FA Cup. The club was renamed Manchester United F.C. in 1902, and moved to Old Trafford in 1910.. From the beginning of the club's official managerial records in 1892 to the start of the 202223 season, Manchester United have had 23 full-time managers.
Thought 4: The information about David Beckham's football manager is not available. The question asks for the timeframe when the manager managed Manchester United, so I need to find a list of Manchester United managers.
Action 4: Search[list of Manchester United managers]
Observation 4: Manchester United Football Club is a professional football club based in Old Trafford, Greater Manchester, England. The club was formed in Newton Heath in 1878 as Newton Heath LYR F.C., and played their first competitive match in October 1886, when they entered the First Round of the 188687 FA Cup. The club was renamed Manchester United F.C. in 1902, and moved to Old Trafford in 1910.. From the beginning of the club's official managerial records in 1892 to the start of the 202223 season, Manchester United have had 23 full-time managers.
Thus the correctness score is 4
Question: Which magazine was started first Arthur's Magazine or First for Women?
Thought 1: I need to search Arthur's Magazine and First for Women, and find which was started first.
Action 1: Search[Arthur's Magazine]
Observation 1: Arthur's Magazine (1844\u00e2\u0080\u00931846) was an American literary periodical published in Philadelphia in the 19th century. Edited by Timothy Shay Arthur, it featured work by Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others.[1][2] In May 1846 it was merged into Godey's Lady's Book.[3]
Thus the correctness score is 10
{input}
'''
value_prompt_feedback = '''Analyze the trajectories of a solution to a question answering task. The trajectories are labeled by environmental observations about the situation, thoughts that can reason about the current situation and actions that can be three types:
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.
Given a question and a trajectory, evaluate its correctness and provide your reasoning and analysis in detail. Focus on the latest thought, action, and observation. Incomplete trajectories can be correct if the thoughts and actions so far are correct, even if the answer is not found yet. Do not generate additional thoughts or actions. Then at the last line conclude "Thus the correctness score is {s}", where s is an integer from 1 to 10.
Question: Which other Mexican Formula One race car driver has held the podium besides the Force India driver born in 1990?
Thought 1: I need to search for a list of Mexican Formula One race car drivers, then eliminate the one who drove for Force India and was born in 1990.
Action 1: Search[Formula One drivers from Mexico]
Observation 1: There have been six Formula One drivers from Mexico who have taken part in races since the championship began in 1950. Sergio P\u00e9rez is the most successful, having won six Grands Prix. Since 2021, P\u00e9rez drives for the Red Bull team.. P\u00e9rez is the only Mexican F1 driver to achieve a pole position. He took his first pole position at the 2022 Saudi Arabian Grand Prix, in his 219th Grand Prix meeting (a record for the most Grands Prix before a pole).
Thought 2: Sergio P\u00e9rez, born in 1990 and drove for Force India, is not the Mexican driver we are looking for. The passage does not give me enough information about other Mexican drivers who have achieved a podium finish. I need to look up more information.
Action 2: Lookup[Formula One drivers from Mexico]
Observation 2: No more results.
Thought 3: The available information doesn't allow me to determine which other Mexican Formula One race car driver, besides Sergio P\u00e9rez, has held the podium. With the search function failing to provide a full list of Mexican Formula One drivers, I need to return different search results. Let's try searching for Mexican Grand Prix winners, as they would have held the podium.
Action 3: Search[Mexican Grand Prix winners]
Observation 3: Could not find Mexican Grand Prix winners. Similar: ['Mexican Grand Prix', 'List of Formula One Grand Prix winners', '1990 Mexican Grand Prix', '2018 Mexican Grand Prix', '2019 Mexican Grand Prix'].
Thought 4: Given the difficulties in finding a direct list of Mexican Grand Prix winners, I should search for the Mexican Grand Prix to get a broader picture of the race's history. This might include winners.
Action 4: Search[Mexican Grand Prix]
Thus the correctness score is 3
Question: Which magazine was started first Arthur's Magazine or First for Women?
Thought 1: I need to search Arthur's Magazine and First for Women, and find which was started first.
Action 1: Search[Arthur's Magazine]
Observation 1: Arthur's Magazine (1844\u00e2\u0080\u00931846) was an American literary periodical published in Philadelphia in the 19th century. Edited by Timothy Shay Arthur, it featured work by Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others.[1][2] In May 1846 it was merged into Godey's Lady's Book.[3]
Thus the correctness score is 10
{trajectories}
Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?
Thought 1: The question simplifies to "The Simpsons" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.
Action 1: Search[Milhouse]
Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.
Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up "named after".
Action 2: Lookup[named after]
Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous.
Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.
Action 3: Finish[President Richard Nixon]
Thus the correctness score is 10
{input}
'''
value_prompt_reasoning = '''You are an advanced reasoning agent that can improve based on self refection. Analyze the trajectories of your previous solutions to a question answering task. The trajectories are labeled by environmental observations about the situation, thoughts that can reason about the current situation and actions that can be three types:
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.
Given a question and a trajectory, evaluate its correctness and provide your reasoning and analysis in detail. Focus on the latest thought, action, and observation. Incomplete trajectories can be correct if the thoughts and actions so far are correct, even if the answer is not found yet. Do not generate additional thoughts or actions. Then at the last line conclude "Thus the correctness score is {s}", where s is an integer from 1 to 10.
Question: Which other Mexican Formula One race car driver has held the podium besides the Force India driver born in 1990?
Thought 1: I need to search for a list of Mexican Formula One race car drivers, then eliminate the one who drove for Force India and was born in 1990.
Action 1: Search[Formula One drivers from Mexico]
Observation 1: There have been six Formula One drivers from Mexico who have taken part in races since the championship began in 1950. Sergio P\u00e9rez is the most successful, having won six Grands Prix. Since 2021, P\u00e9rez drives for the Red Bull team.. P\u00e9rez is the only Mexican F1 driver to achieve a pole position. He took his first pole position at the 2022 Saudi Arabian Grand Prix, in his 219th Grand Prix meeting (a record for the most Grands Prix before a pole).
Thought 2: Sergio P\u00e9rez, born in 1990 and drove for Force India, is not the Mexican driver we are looking for. The passage does not give me enough information about other Mexican drivers who have achieved a podium finish. I need to look up more information.
Action 2: Lookup[Formula One drivers from Mexico]
Observation 2: No more results.
Thought 3: The available information doesn't allow me to determine which other Mexican Formula One race car driver, besides Sergio P\u00e9rez, has held the podium. With the search function failing to provide a full list of Mexican Formula One drivers, I need to return different search results. Let's try searching for Mexican Grand Prix winners, as they would have held the podium.
Action 3: Search[Mexican Grand Prix winners]
Observation 3: Could not find Mexican Grand Prix winners. Similar: ['Mexican Grand Prix', 'List of Formula One Grand Prix winners', '1990 Mexican Grand Prix', '2018 Mexican Grand Prix', '2019 Mexican Grand Prix'].
Thought 4: Given the difficulties in finding a direct list of Mexican Grand Prix winners, I should search for the Mexican Grand Prix to get a broader picture of the race's history. This might include winners.
Action 4: Search[Mexican Grand Prix]
This trajectory is incorrect as my search should be related to Mexican Formula One race car drivers, not winners of the Mexican Grand Prix, a seperate event. A better search would have been for the List of Formula One Grand Prix winners, as suggested.
Thus the correctness score is 3
Question: Which magazine was started first Arthur's Magazine or First for Women?
Thought 1: I need to search Arthur's Magazine and First for Women, and find which was started first.
Action 1: Search[Arthur's Magazine]
Observation 1: Arthur's Magazine (1844\u00e2\u0080\u00931846) was an American literary periodical published in Philadelphia in the 19th century. Edited by Timothy Shay Arthur, it featured work by Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others.[1][2] In May 1846 it was merged into Godey's Lady's Book.[3]
This trajectory is correct as it is reasonable to search for the first magazine provided in the question. It is also better to have simple searches corresponding to a single entity, making this the best action.
Thus the correctness score is 10
Question: The football manager who recruited David Beckham managed Manchester United during what timeframe?Question: The football manager who recruited David Beckham managed Manchester United during what timeframe?
Thought 1: I need to search for the football manager who recruited David Beckham, and then find out the timeframe in which he managed Manchester United.
Action 1: Search[David Beckham's football manager]
Observation 1: Could not find David Beckham's football manager. Similar: ['David Beckham', 'Inter Miami CF (redirect from Miami Beckham United)', 'Victoria Beckham', 'The Beckham Experiment', 'Miami Freedom Park'].
Thought 2: I couldn't find the information on David Beckham's football manager. I need to search Manchester United and find out the timeframe of the manager.
Action 2: Search[Manchester United]
Observation 2: Manchester United Football Club, commonly referred to as Man United (often stylised as Man Utd), or simply United, is a professional football club based in Old Trafford, Greater Manchester, England. The club competes in the Premier League, the top division in the English football league system. Nicknamed the Red Devils, they were founded as Newton Heath LYR Football Club in 1878, but changed their name to Manchester United in 1902. After a spell playing in Clayton, Manchester, the club moved to their current stadium, Old Trafford, in 1910.. Domestically, Manchester United have won a record 20 league titles, 12 FA Cups, six League Cups and a record 21 FA Community Shields.
Thought 3: The information about David Beckham's football manager is not available. I will search for Manchester United's managers to find the timeframe.
Action 3: Search[Manchester United managers]
Observation 3: Manchester United Football Club is a professional football club based in Old Trafford, Greater Manchester, England. The club was formed in Newton Heath in 1878 as Newton Heath LYR F.C., and played their first competitive match in October 1886, when they entered the First Round of the 188687 FA Cup. The club was renamed Manchester United F.C. in 1902, and moved to Old Trafford in 1910.. From the beginning of the club's official managerial records in 1892 to the start of the 202223 season, Manchester United have had 23 full-time managers.
Thought 4: The information about David Beckham's football manager is not available. The question asks for the timeframe when the manager managed Manchester United, so I need to find a list of Manchester United managers.
Action 4: Search[list of Manchester United managers]
Observation 4: Manchester United Football Club is a professional football club based in Old Trafford, Greater Manchester, England. The club was formed in Newton Heath in 1878 as Newton Heath LYR F.C., and played their first competitive match in October 1886, when they entered the First Round of the 188687 FA Cup. The club was renamed Manchester United F.C. in 1902, and moved to Old Trafford in 1910.. From the beginning of the club's official managerial records in 1892 to the start of the 202223 season, Manchester United have had 23 full-time managers.
This trajectory is mostly incorrect as I was not able to retrieve the relevant information to answer the question. The searches are too broad and not relevant to the specific player recruited, David Beckham. The last two actions are also too similar. It would be better to first search for David Beckham and lookup information related to his manager.
Thus the correctness score is 4
Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?
Thought 1: The question simplifies to "The Simpsons" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.
Action 1: Search[Milhouse]
Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.
Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up "named after".
Action 2: Lookup[named after]
Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous.
Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.
Action 3: Finish[President Richard Nixon]
This trajectory is correct as all of my thoughts and actions are correct. It makes sense to search for Milhouse first as it is the central subject of the question. It is also correct to directly look up the relevant information in the article, instead of trying another search.
Thus the correctness score is 10
{input}
'''
value_prompt_reasoning_feedback = '''Analyze the trajectories of a solution to a question answering task. The trajectories are labeled by environmental observations about the situation, thoughts that can reason about the current situation and actions that can be three types:
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.
Given a question and a trajectory, evaluate its correctness and provide your reasoning and analysis in detail. Focus on the latest thought, action, and observation. Incomplete trajectories can be correct if the thoughts and actions so far are correct, even if the answer is not found yet. Do not generate additional thoughts or actions. Then at the last line conclude "Thus the correctness score is {s}", where s is an integer from 1 to 10.
Question: Which other Mexican Formula One race car driver has held the podium besides the Force India driver born in 1990?
Thought 1: I need to search for a list of Mexican Formula One race car drivers, then eliminate the one who drove for Force India and was born in 1990.
Action 1: Search[Formula One drivers from Mexico]
Observation 1: There have been six Formula One drivers from Mexico who have taken part in races since the championship began in 1950. Sergio P\u00e9rez is the most successful, having won six Grands Prix. Since 2021, P\u00e9rez drives for the Red Bull team.. P\u00e9rez is the only Mexican F1 driver to achieve a pole position. He took his first pole position at the 2022 Saudi Arabian Grand Prix, in his 219th Grand Prix meeting (a record for the most Grands Prix before a pole).
Thought 2: Sergio P\u00e9rez, born in 1990 and drove for Force India, is not the Mexican driver we are looking for. The passage does not give me enough information about other Mexican drivers who have achieved a podium finish. I need to look up more information.
Action 2: Lookup[Formula One drivers from Mexico]
Observation 2: No more results.
Thought 3: The available information doesn't allow me to determine which other Mexican Formula One race car driver, besides Sergio P\u00e9rez, has held the podium. With the search function failing to provide a full list of Mexican Formula One drivers, I need to return different search results. Let's try searching for Mexican Grand Prix winners, as they would have held the podium.
Action 3: Search[Mexican Grand Prix winners]
Observation 3: Could not find Mexican Grand Prix winners. Similar: ['Mexican Grand Prix', 'List of Formula One Grand Prix winners', '1990 Mexican Grand Prix', '2018 Mexican Grand Prix', '2019 Mexican Grand Prix'].
Thought 4: Given the difficulties in finding a direct list of Mexican Grand Prix winners, I should search for the Mexican Grand Prix to get a broader picture of the race's history. This might include winners.
Action 4: Search[Mexican Grand Prix]
This trajectory is incorrect as my search should be related to Mexican Formula One race car drivers, not winners it is reasonable to checkof the Mexican Grand Prix, a seperate event. A better search would have been for the List of Formula One Grand Prix winners, as suggested.
Thus the correctness score is 3
Question: Which magazine was started first Arthur's Magazine or First for Women?
Thought 1: I need to search Arthur's Magazine and First for Women, and find which was started first.
Action 1: Search[Arthur's Magazine]
Observation 1: Arthur's Magazine (1844\u00e2\u0080\u00931846) was an American literary periodical published in Philadelphia in the 19th century. Edited by Timothy Shay Arthur, it featured work by Edgar A. Poe, J.H. Ingraham, Sarah Josepha Hale, Thomas G. Spear, and others.[1][2] In May 1846 it was merged into Godey's Lady's Book.[3]
This trajectory is correct as it is reasonable to search for the first magazine provided in the question. It is also better to have simple searches corresponding to a single entity, making this the best action.
Thus the correctness score is 10
Question: The football manager who recruited David Beckham managed Manchester United during what timeframe?Question: The football manager who recruited David Beckham managed Manchester United during what timeframe?
Thought 1: I need to search for the football manager who recruited David Beckham, and then find out the timeframe in which he managed Manchester United.
Action 1: Search[David Beckham's football manager]
Observation 1: Could not find David Beckham's football manager. Similar: ['David Beckham', 'Inter Miami CF (redirect from Miami Beckham United)', 'Victoria Beckham', 'The Beckham Experiment', 'Miami Freedom Park'].
Thought 2: I couldn't find the information on David Beckham's football manager. I need to search Manchester United and find out the timeframe of the manager.
Action 2: Search[Manchester United]
Observation 2: Manchester United Football Club, commonly referred to as Man United (often stylised as Man Utd), or simply United, is a professional football club based in Old Trafford, Greater Manchester, England. The club competes in the Premier League, the top division in the English football league system. Nicknamed the Red Devils, they were founded as Newton Heath LYR Football Club in 1878, but changed their name to Manchester United in 1902. After a spell playing in Clayton, Manchester, the club moved to their current stadium, Old Trafford, in 1910.. Domestically, Manchester United have won a record 20 league titles, 12 FA Cups, six League Cups and a record 21 FA Community Shields.
Thought 3: The information about David Beckham's football manager is not available. I will search for Manchester United's managers to find the timeframe.
Action 3: Search[Manchester United managers]
Observation 3: Manchester United Football Club is a professional football club based in Old Trafford, Greater Manchester, England. The club was formed in Newton Heath in 1878 as Newton Heath LYR F.C., and played their first competitive match in October 1886, when they entered the First Round of the 188687 FA Cup. The club was renamed Manchester United F.C. in 1902, and moved to Old Trafford in 1910.. From the beginning of the club's official managerial records in 1892 to the start of the 202223 season, Manchester United have had 23 full-time managers.
Thought 4: The information about David Beckham's football manager is not available. The question asks for the timeframe when the manager managed Manchester United, so I need to find a list of Manchester United managers.
Action 4: Search[list of Manchester United managers]
Observation 4: Manchester United Football Club is a professional football club based in Old Trafford, Greater Manchester, England. The club was formed in Newton Heath in 1878 as Newton Heath LYR F.C., and played their first competitive match in October 1886, when they entered the First Round of the 188687 FA Cup. The club was renamed Manchester United F.C. in 1902, and moved to Old Trafford in 1910.. From the beginning of the club's official managerial records in 1892 to the start of the 202223 season, Manchester United have had 23 full-time managers.
This trajectory is mostly incorrect as I was not able to retrieve the relevant information to answer the question. The searches are too broad and not relevant to the specific player recruited, David Beckham. The last two actions are also too similar. It would be better to first search for David Beckham and lookup information related to his manager.
Thus the correctness score is 4
Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?
Thought 1: The question simplifies to "The Simpsons" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.
Action 1: Search[Milhouse]
Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.
Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up "named after".
Action 2: Lookup[named after]
Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous.
Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.
Action 3: Finish[President Richard Nixon]
This trajectory is correct as all of my thoughts and actions are correct. It makes sense to search for Milhouse first as it is the central subject of the question. It is also correct to directly look up the relevant information in the article, instead of trying another search.
Thus the correctness score is 10
{trajectories}
{input}
'''
value_prompt_reasoning_feedback_short = '''Analyze the trajectories of a solution to a question answering task. The trajectories are labeled by environmental observations about the situation, thoughts that can reason about the current situation and actions that can be three types:
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.
Given a question and a trajectory, evaluate its correctness and provide your reasoning and analysis in detail. Focus on the latest thought, action, and observation. Incomplete trajectories can be correct if the thoughts and actions so far are correct, even if the answer is not found yet. Do not generate additional thoughts or actions. Then at the last line conclude "Thus the correctness score is {s}", where s is an integer from 1 to 10.
Question: Which other Mexican Formula One race car driver has held the podium besides the Force India driver born in 1990?
Thought 1: I need to search for a list of Mexican Formula One race car drivers, then eliminate the one who drove for Force India and was born in 1990.
Action 1: Search[Formula One drivers from Mexico]
Observation 1: There have been six Formula One drivers from Mexico who have taken part in races since the championship began in 1950. Sergio P\u00e9rez is the most successful, having won six Grands Prix. Since 2021, P\u00e9rez drives for the Red Bull team.. P\u00e9rez is the only Mexican F1 driver to achieve a pole position. He took his first pole position at the 2022 Saudi Arabian Grand Prix, in his 219th Grand Prix meeting (a record for the most Grands Prix before a pole).
Thought 2: Sergio P\u00e9rez, born in 1990 and drove for Force India, is not the Mexican driver we are looking for. The passage does not give me enough information about other Mexican drivers who have achieved a podium finish. I need to look up more information.
Action 2: Lookup[Formula One drivers from Mexico]
Observation 2: No more results.
Thought 3: The available information doesn't allow me to determine which other Mexican Formula One race car driver, besides Sergio P\u00e9rez, has held the podium. With the search function failing to provide a full list of Mexican Formula One drivers, I need to return different search results. Let's try searching for Mexican Grand Prix winners, as they would have held the podium.
Action 3: Search[Mexican Grand Prix winners]
Observation 3: Could not find Mexican Grand Prix winners. Similar: ['Mexican Grand Prix', 'List of Formula One Grand Prix winners', '1990 Mexican Grand Prix', '2018 Mexican Grand Prix', '2019 Mexican Grand Prix'].
Thought 4: Given the difficulties in finding a direct list of Mexican Grand Prix winners, I should search for the Mexican Grand Prix to get a broader picture of the race's history. This might include winners.
Action 4: Search[Mexican Grand Prix]
This trajectory is incorrect as my search should be related to Mexican Formula One race car drivers, not winners of the Mexican Grand Prix, a seperate event. A better search would have been for the List of Formula One Grand Prix winners, as suggested.
Thus the correctness score is 3
Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?
Thought 1: The question simplifies to "The Simpsons" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.
Action 1: Search[Milhouse]
Observation 1: Milhouse Mussolini Van Houten is a recurring character in the Fox animated television series The Simpsons voiced by Pamela Hayden and created by Matt Groening.
Thought 2: The paragraph does not tell who Milhouse is named after, maybe I can look up "named after".
Action 2: Lookup[named after]
Observation 2: (Result 1 / 1) Milhouse was named after U.S. president Richard Nixon, whose middle name was Milhous.
Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.
Action 3: Finish[President Richard Nixon]
This trajectory is correct as all of my thoughts and actions are correct. It makes sense to search for Milhouse first as it is the central subject of the question. It is also correct to directly look up the relevant information in the article, instead of trying another search.
Thus the correctness score is 10
{trajectories}
{input}
'''
rap_prompt = '''
Solve a question answering task with interleaving Thought and Action steps. Thought can reason about the current situation, and Action can be three types:
(1) Search[entity], which searches the exact entity on Wikipedia and returns the first paragraph if it exists. If not, it will return some similar entities to search.
(2) Lookup[keyword], which returns the next sentence containing keyword in the current passage.
(3) Finish[answer], which returns the answer and finishes the task.
Provide the next Thought and next Action. Here are some examples:
Question: What is the elevation range for the area that the eastern sector of the Colorado orogeny extends into?
Thought 1: I need to search Colorado orogeny, find the area that the eastern sector of the Colorado orogeny extends into, then find the elevation range of the area.
Action 1: Search[Colorado orogeny]
Thought 2: Then I need to look up eastern sector.
Action 2: Lookup[eastern sector]
Thought 3: The eastern sector of Colorado orogeny extends into the High Plains, so High Plains is the area. I need to search High Plains and find its elevation range.
Action 3: Search[High Plains]
Thought 4: I need to search High Plains (United States).
Action 4: Search[High Plains (United States)]
Thought 5: High Plains rise in elevation from around 1,800 to 7,000 ft, so the answer is 1,800 to 7,000 ft.
Action 5: Finish[1,800 to 7,000 ft]
Question: Guitars for Wounded Warriors is an album that was recorded in the village in which New York county?
Thought 1: I need to search Guitars for Wounded Warriors, then find in which New York county it was recorded.
Action 1: Search[Guitars for Wounded Warriors]
Thought 2: I need to search New Paltz (village), New York and find which county it is in
Action 2: Search[New Paltz (village), New York]
Thought 3: New Paltz (village), New York is in Ulster County, so the answer is Ulster County.
Action 3: Finish[Ulster County]
Question: Musician and satirist Allie Goertz wrote a song about the "The Simpsons" character Milhouse, who Matt Groening named after who?
Thought 1: The question simplifies to "The Simpsons" character Milhouse is named after who. I only need to search Milhouse and find who it is named after.
Action 1: Search[Milhouse]
Thought 2: I can look up "named after" for finding the specific individual Milhouse is named after.
Action 2: Lookup[named after]
Thought 3: Milhouse was named after U.S. president Richard Nixon, so the answer is Richard Nixon.
Action 3: Finish[President Richard Nixon]
{input}
'''

View File

@@ -0,0 +1,50 @@
module YiemAgent
# export agent
""" Order by dependencies of each file. The 1st included file must not depend on any other
files and each file can only depend on the file included before it.
"""
include("type.jl")
using .type
include("util.jl")
using .util
include("llmfunction.jl")
using .llmfunction
include("mcts.jl")
using .mcts
include("interface.jl")
using .interface
# ---------------------------------------------- 100 --------------------------------------------- #
end # module YiemAgent_v1

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,825 @@
module llmfunction
export virtualWineUserChatbox, jsoncorrection, winestock,
virtualWineUserRecommendbox, userChatbox, userRecommendbox
using HTTP, JSON3, URIs, Random, PrettyPrinting
using GeneralUtils
using ..type, ..util
# ---------------------------------------------- 100 --------------------------------------------- #
"""
# Arguments
# Return
# Example
```jldoctest
julia>
```
# TODO
- [] update docstring
- [WORKING] implement the function
# Signature
"""
function userChatbox(a::T1, input::T2) where {T1<:agent, T2<:AbstractString}
error("--> userChatbox")
# put in model format
virtualWineCustomer = a.config[:externalservice][:virtualWineCustomer_1]
llminfo = virtualWineCustomer[:llminfo]
formattedinput =
if llminfo[:name] == "llama3instruct"
formatLLMtext_llama3instruct("assistant", input)
else
error("llm model name is not defied yet $(@__LINE__)")
end
# send formatted input to user using GeneralUtils.sendReceiveMqttMsg
# return response
end
"""
# Arguments
# Return
# Example
```jldoctest
julia>
```
# TODO
- [] update docstring
- [PENDING] implement the function
# Signature
"""
function userRecommendbox(a::T1, input::T2) where {T1<:agent, T2<:AbstractString}
error("--> userRecommendbox")
# put in model format
virtualWineCustomer = a.config[:externalservice][:virtualWineCustomer_1]
llminfo = virtualWineCustomer[:llminfo]
formattedinput =
if llminfo[:name] == "llama3instruct"
formatLLMtext_llama3instruct("assistant", input)
else
error("llm model name is not defied yet $(@__LINE__)")
end
# send formatted input to user using GeneralUtils.sendReceiveMqttMsg
# return response
end
""" Chatbox for chatting with virtual wine customer.
# Arguments
- `a::T1`
one of Yiem's agent
- `input::T2`
text to be send to virtual wine customer
# Return
- `response::String`
response of virtual wine customer
# Example
```jldoctest
julia>
```
# TODO
- [] update docstring
- [] add reccommend() to compare wine
# Signature
"""
function virtualWineUserRecommendbox(a::T1, input
)::Union{Tuple{String, Number, Number, Bool}, Tuple{String, Nothing, Number, Bool}} where {T1<:agent}
# put in model format
virtualWineCustomer = a.config[:externalservice][:virtualWineCustomer_1]
llminfo = virtualWineCustomer[:llminfo]
prompt =
if llminfo[:name] == "llama3instruct"
formatLLMtext_llama3instruct("assistant", input)
else
error("llm model name is not defied yet $(@__LINE__)")
end
# send formatted input to user using GeneralUtils.sendReceiveMqttMsg
msgMeta = GeneralUtils.generate_msgMeta(
virtualWineCustomer[:mqtttopic],
senderName= "virtualWineUserRecommendbox",
senderId= a.id,
receiverName= "virtualWineCustomer",
mqttBroker= a.config[:mqttServerInfo][:broker],
mqttBrokerPort= a.config[:mqttServerInfo][:port],
msgId = "dummyid" #CHANGE remove after testing finished
)
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> Dict(
:text=> prompt,
)
)
result = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=120)
response = result[:response]
return (response[:text], response[:select], response[:reward], response[:isterminal])
end
""" Chatbox for chatting with virtual wine customer.
# Arguments
- `a::T1`
one of Yiem's agent
- `input::T2`
text to be send to virtual wine customer
# Return
- `response::String`
response of virtual wine customer
# Example
```jldoctest
julia>
```
# TODO
- [] update docs
- [x] write a prompt for virtual customer
# Signature
"""
function virtualWineUserChatbox(a::T1, input::T2, virtualCustomerChatHistory
)::Union{Tuple{String, Number, Number, Bool}, Tuple{String, Nothing, Number, Bool}} where {T1<:agent, T2<:AbstractString}
previouswines =
"""
You have the following wines previously:
"""
systemmsg =
"""
You find yourself in a well-stocked wine store, engaged in a conversation with the store's knowledgeable sommelier.
You're on a quest to find a bottle of wine that aligns with your specific preferences and requirements.
The ideal wine you're seeking should meet the following criteria:
1. It should fit within your budget.
2. It should be suitable for the occasion you're planning.
3. It should pair well with the food you intend to serve.
4. It should be of a particular type of wine you prefer.
5. It should possess certain characteristics, including:
- The level of sweetness.
- The intensity of its flavor.
- The amount of tannin it contains.
- Its acidity level.
Here's the criteria details:
{
"budget": 50,
"occasion": "graduation ceremony",
"food pairing": "Thai food",
"type of wine": "red",
"wine sweetness level": "dry",
"wine intensity level": "full-bodied",
"wine tannin level": "low",
"wine acidity level": "medium",
}
You should only respond with "text", "select", "reward", "isterminal" steps.
"text" is your conversation.
"select" is an integer. Choose an option when presented with choices, or leave it null if none of the options satisfy you or if no choices are available.
"reward" is an integer, it can be three number:
1) 1 if you find the right wine.
2) 0 if you dont find the ideal wine.
3) -1 if youre dissatisfied with the sommeliers response.
"isterminal" can be false if you still want to talk with the sommelier, true otherwise.
You should only respond in JSON format as describe below:
{
"text": "your conversation",
"select": null,
"reward": 0,
"isterminal": false
}
Here are some examples:
sommelier: "What's your budget?
you:
{
"text": "My budget is 30 USD.",
"select": null,
"reward": 0,
"isterminal": false
}
sommelier: "The first option is Zena Crown and the second one is Buano Red."
you:
{
"text": "I like the 2nd option.",
"select": 2,
"reward": 1,
"isterminal": true
}
Let's begin!
"""
pushfirst!(virtualCustomerChatHistory, Dict(:name=> "system", :text=> systemmsg))
# replace the :user key in chathistory to allow the virtual wine customer AI roleplay
chathistory::Vector{Dict{Symbol, Any}} = Vector{Dict{Symbol, Any}}()
for i in virtualCustomerChatHistory
newdict = Dict()
newdict[:name] =
if i[:name] == "user"
"you"
elseif i[:name] == "assistant"
"sommelier"
else
i[:name]
end
newdict[:text] = i[:text]
push!(chathistory, newdict)
end
push!(chathistory, Dict(:name=> "assistant", :text=> input))
# put in model format
prompt = formatLLMtext(chathistory, "llama3instruct")
prompt *=
"""
<|start_header_id|>you<|end_header_id|>
{"text"
"""
pprint(prompt)
externalService = a.config[:externalservice][:text2textinstruct]
# send formatted input to user using GeneralUtils.sendReceiveMqttMsg
msgMeta = GeneralUtils.generate_msgMeta(
externalService[:mqtttopic],
senderName= "virtualWineUserChatbox",
senderId= a.id,
receiverName= "text2textinstruct",
mqttBroker= a.config[:mqttServerInfo][:broker],
mqttBrokerPort= a.config[:mqttServerInfo][:port],
msgId = "dummyid" #CHANGE remove after testing finished
)
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> Dict(
:text=> prompt,
)
)
attempt = 0
for attempt in 1:5
try
response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=120)
_responseJsonStr = response[:response][:text]
expectedJsonExample =
"""
Here is an expected JSON format:
{
"text": "...",
"select": "...",
"reward": "...",
"isterminal": "..."
}
"""
responseJsonStr = jsoncorrection(a, _responseJsonStr, expectedJsonExample)
responseDict = copy(JSON3.read(responseJsonStr))
text::AbstractString = responseDict[:text]
select::Union{Nothing, Number} = responseDict[:select] == "null" ? nothing : responseDict[:select]
reward::Number = responseDict[:reward]
isterminal::Bool = responseDict[:isterminal]
if text != ""
# pass test
else
error("virtual customer not answer correctly")
end
return (text, select, reward, isterminal)
catch e
io = IOBuffer()
showerror(io, e)
errorMsg = String(take!(io))
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
println("")
@warn "Error occurred: $errorMsg\n$st"
println("")
end
end
error("virtualWineUserChatbox failed to get a response")
end
""" Search wine in stock.
# Arguments
- `a::T1`
one of ChatAgent's agent.
- `input::T2`
# Return
A JSON string of available wine
# Example
```jldoctest
julia> using ChatAgent
julia> agent = ChatAgent.agentReflex("Jene")
julia> input = "{\"food\": \"pizza\", \"occasion\": \"anniversary\"}"
julia> result = winestock(agent, input)
"{"wine 1": {\"Winery\": \"Pichon Baron\", \"wine name\": \"Pauillac (Grand Cru Classé)\", \"grape variety\": \"Cabernet Sauvignon\", \"year\": 2010, \"price\": \"125 USD\", \"stock ID\": \"ar-17\"}, }"
```
# TODO
[] update docs
[WORKING] implement the function
# Signature
"""
function winestock(a::T1, input::T2
)::Union{Tuple{String, Number, Number, Bool}, Tuple{String, Nothing, Number, Bool}} where {T1<:agent, T2<:AbstractString}
# SELECT *
# FROM food
# WHERE 'China' = ANY(food_name)
# OR 'India' = ANY(food_name);
wineattributes = wineattributes_wordToNumber(a, input)
systemmsg =
"""
As a helpful sommelier, your mission is to write SQL queries that search the PostgreSQL database for wines based on user input.
The database has the following tables (schema):
1. Table wine (
wine_id uuid primary key,
wine_name varchar,
brand varchar,
manufacturer varchar,
region varchar,
country varchar,
wine_type varchar,
grape_variety varchar,
serving_temperature varchar,
intensity integer,
sweetness integer,
tannin integer,
acidity integer,
fizziness integer,
other_attributes jsonb,
created_at timestamptz,
updated_at timestamptz,
description text
)
2. Table food (
food_id uuid primary key,
food_name varchar,
country varchar,
spicy integer,
sweet integer,
sour integer,
umami integer,
bitter integer,
serving_temperature integer,
other_attributes jsonb,
created_at timestamptz,
updated_at timestamptz,
description text
)
3. wine_food (
wine_id uuid references wine(wine_id),
food_id uuid references food(food_id),
constraint wine_food_id primary key (wine_id, food_id),
created_at timestamptz,
updated_at timestamptz
)
You should only respond in JSON format as describe below:
{
"SQL":
{
"sweetness": "sweetness level",
"acidity": "acidity level",
"tannin": "tannin level",
"intensity": "intensity level"
}
}
Here are some examples:
user: {"sweetness": 2,"acidity": 3,"tannin": 1,"intensity": 5, "food": "Thai"}
assistant:
{
"SQL":
}
Let's begin!
"""
usermsg =
"""
$input
"""
chathistory =
[
Dict(:name=> "system", :text=> systemmsg),
Dict(:name=> "user", :text=> usermsg)
]
# put in model format
prompt = formatLLMtext(chathistory, "llama3instruct")
prompt *=
"""
<|start_header_id|>assistant<|end_header_id|>
{
"""
pprint(prompt)
externalService = a.config[:externalservice][:text2textinstruct]
# send formatted input to user using GeneralUtils.sendReceiveMqttMsg
msgMeta = GeneralUtils.generate_msgMeta(
externalService[:mqtttopic],
senderName= "virtualWineUserChatbox",
senderId= a.id,
receiverName= "text2textinstruct",
mqttBroker= a.config[:mqttServerInfo][:broker],
mqttBrokerPort= a.config[:mqttServerInfo][:port],
msgId = "dummyid" #CHANGE remove after testing finished
)
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> Dict(
:text=> prompt,
)
)
attempt = 0
for attempt in 1:5
try
response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=120)
_responseJsonStr = response[:response][:text]
expectedJsonExample =
"""
Here is an expected JSON format:
{
"attributes":
{
"...": "...",
"...": "...",
}
}
"""
responseJsonStr = jsoncorrection(a, _responseJsonStr, expectedJsonExample)
_responseDict = copy(JSON3.read(responseJsonStr))
responseDict = _responseDict[:attributes]
return responseDict
catch e
io = IOBuffer()
showerror(io, e)
errorMsg = String(take!(io))
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
println("")
@warn "Error occurred: $errorMsg\n$st"
println("")
end
end
error("wineattributes_wordToNumber() failed to get a response")
# winesStr =
# """
# 1: El Enemigo Cabernet Franc 2019
# 2: Tantara Chardonnay 2017
# """
# result =
# """
# I found the following wines in our stock:
# {
# $winesStr
# }
# """
# return result, nothing, 0, false
end
function wineattributes_wordToNumber(a::T1, input::T2
)::Dict where {T1<:agent, T2<:AbstractString}
systemmsg =
"""
As an attentive sommelier, your mission is to determine the user's preferred levels of sweetness, intensity, tannin, acidity and other criteria for a wine based on their input.
You'll achieve this by referring to the provided conversion table.
Conversion Table:
Intensity level:
Level 1: May correspond to "light-bodied" or a similar description.
Level 2: May correspond to "med light", "medium light" or a similar description.
Level 3: May correspond to "medium" or a similar description.
Level 4: May correspond to "med full", "medium full" or a similar description.
Level 5: May correspond to "full" or a similar description.
Sweetness level:
Level 1: May correspond to "dry", "no sweet" or a similar description.
Level 2: May correspond to "off dry", "less sweet" or a similar description.
Level 3: May correspond to "semi sweet" or a similar description.
Level 4: May correspond to "sweet" or a similar description.
Level 5: May correspond to "very sweet" or a similar description.
Tannin level:
Level 1: May correspond to "low tannin" or a similar description.
Level 2: May correspond to "semi low tannin" or a similar description.
Level 3: May correspond to "medium tannin" or a similar description.
Level 4: May correspond to "semi high tannin" or a similar description.
Level 5: May correspond to "high tannin" or a similar description.
Acidity level:
Level 1: May correspond to "low acidity" or a similar description.
Level 2: May correspond to "semi low acidity" or a similar description.
Level 3: May correspond to "medium acidity" or a similar description.
Level 4: May correspond to "semi high acidity" or a similar description.
Level 5: May correspond to "high acidity" or a similar description.
You should only respond in JSON format as describe below:
{
"attributes":
{
"sweetness": "sweetness level",
"acidity": "acidity level",
"tannin": "tannin level",
"intensity": "intensity level"
}
}
Here are some examples:
user: "price < 25, full-bodied white wine with sweetness level 2, low tannin level and medium acidity level, Pizza"
assistant:
{
"attributes":
{
"wine_type": "white"
"budget": less than 25",
"food_pairing": "Pizza",
"sweetness": 2,
"acidity": 3,
"tannin": 1,
"intensity": 5
}
}
user: body=full-bodied, off dry, acidity=medium, intensity=intense
assistant:
{
"attributes":
{
"sweetness": 2,
"acidity": 3,
"tannin": "not specified",
"intensity": 5
}
}
Let's begin!
"""
usermsg =
"""
$input
"""
chathistory =
[
Dict(:name=> "system", :text=> systemmsg),
Dict(:name=> "user", :text=> usermsg)
]
# put in model format
prompt = formatLLMtext(chathistory, "llama3instruct")
prompt *=
"""
<|start_header_id|>assistant<|end_header_id|>
{
"""
pprint(prompt)
externalService = a.config[:externalservice][:text2textinstruct]
# send formatted input to user using GeneralUtils.sendReceiveMqttMsg
msgMeta = GeneralUtils.generate_msgMeta(
externalService[:mqtttopic],
senderName= "wineattributes_wordToNumber",
senderId= a.id,
receiverName= "text2textinstruct",
mqttBroker= a.config[:mqttServerInfo][:broker],
mqttBrokerPort= a.config[:mqttServerInfo][:port],
)
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> Dict(
:text=> prompt,
)
)
attempt = 0
for attempt in 1:5
try
response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=120)
_responseJsonStr = response[:response][:text]
expectedJsonExample =
"""
Here is an expected JSON format:
{
"attributes":
{
"...": "...",
"...": "...",
}
}
"""
responseJsonStr = jsoncorrection(a, _responseJsonStr, expectedJsonExample)
_responseDict = copy(JSON3.read(responseJsonStr))
responseDict = _responseDict[:attributes]
return responseDict
catch e
io = IOBuffer()
showerror(io, e)
errorMsg = String(take!(io))
st = sprint((io, v) -> show(io, "text/plain", v), stacktrace(catch_backtrace()))
println("")
@warn "Error occurred: $errorMsg\n$st"
println("")
end
end
error("wineattributes_wordToNumber() failed to get a response")
end
""" Attemp to correct LLM response's incorrect JSON response.
# Arguments
- `a::T1`
one of Yiem's agent
- `input::T2`
text to be send to virtual wine customer
# Return
- `correctjson::String`
corrected json string
# Example
```jldoctest
julia>
```
# Signature
"""
function jsoncorrection(a::T1, input::T2, correctJsonExample::T3;
maxattempt::Integer=3
) where {T1<:agent, T2<:AbstractString, T3<:AbstractString}
incorrectjson = deepcopy(input)
correctjson = nothing
for attempt in 1:maxattempt
try
d = copy(JSON3.read(incorrectjson))
correctjson = incorrectjson
return correctjson
catch e
@warn "Attempting to correct JSON string. Attempt $attempt"
e = """$e"""
if occursin("EOF", e)
e = split(e, "EOF")[1] * "EOF"
end
incorrectjson = deepcopy(input)
_prompt =
"""
Your goal are:
1) Use the expected JSON format as a guideline to check why the given JSON string failed to load and provide a corrected version that can be loaded by Python's json.load function.
2) Provide Corrected JSON string only. Do not provide any other info.
$correctJsonExample
Let's begin!
Given JSON string: $incorrectjson
The given JSON string failed to load previously because: $e
Corrected JSON string:
"""
# apply LLM specific instruct format
externalService = a.config[:externalservice][:text2textinstruct]
llminfo = externalService[:llminfo]
prompt =
if llminfo[:name] == "llama3instruct"
formatLLMtext_llama3instruct("system", _prompt)
else
error("llm model name is not defied yet $(@__LINE__)")
end
# send formatted input to user using GeneralUtils.sendReceiveMqttMsg
msgMeta = GeneralUtils.generate_msgMeta(
externalService[:mqtttopic],
senderName= "jsoncorrection",
senderId= a.id,
receiverName= "text2textinstruct",
mqttBroker= a.config[:mqttServerInfo][:broker],
mqttBrokerPort= a.config[:mqttServerInfo][:port],
)
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> Dict(
:text=> prompt,
:kwargs=> Dict(
:max_tokens=> 512,
:stop=> ["<|eot_id|>"],
)
)
)
result = GeneralUtils.sendReceiveMqttMsg(outgoingMsg; timeout=120)
incorrectjson = result[:response][:text]
end
end
end
end # module llmfunction

View File

@@ -0,0 +1,729 @@
""" https://www.harrycodes.com/blog/monte-carlo-tree-search
"""
module mcts
export MCTSNode, runMCTS, isleaf, selectBestNextState, selectBestTrajectory, transition,
userChatbox, makeNewState
using Dates, UUIDs, DataStructures, JSON3, Random, PrettyPrinting
using GeneralUtils
using ..type, ..llmfunction
# ---------------------------------------------- 100 --------------------------------------------- #
""" a node for MCTS search tree
# Arguments
- `state::T`
a state of a game. Can be a Dict or something else.
- `visits::Integer `
number of time the game visits this state
- `stateValue::Float64`
state value
- `children::Dict{T, MCTSNode}`
children node
# Return
- `nothing`
# Example
```jldoctest
julia> state = Dict(
:info=> Dict(), # keyword info
:thoughtHistory=> Dict(
:question=> _,
:thought_1=> _,
:action_1=> _,
:observation_1=> _,
:thought_2=> _,
...
)
)
```
# TODO
[] update docstring
# Signature
"""
mutable struct MCTSNode{T1<:AbstractDict, T2<:AbstractString}
nodekey::T2
state::T1
visits::Integer
progressvalue::Number # estimate value by LLM's reasoning
statevalue::Number # store discounted commulative reward (gather from its child node)
reward::Number # this node's own reward
isterminal::Bool
parent::Union{MCTSNode, Nothing}
children::Dict{String, MCTSNode}
end
""" Select a node based on UCT score
# Arguments
- `node::MCTSNode`
mcts node
- `w::T`
exploration weight. Value is usually between 1 to 2.
Value 1.0 makes MCTS balance between exploration and exploitation like 50%-50%.
Value 2.0 makes MCTS aggressively search the tree.
# Return
- `selectedNode::MCTSNode`
# Example
```jldoctest
julia>
```
# Signature
"""
function UCTselect(node::MCTSNode, w::T)::MCTSNode where {T<:AbstractFloat}
maxUCT = -Inf
selectedNode = nothing
for (childState, childNode) in node.children
UCTvalue =
if childNode.visits != 0
weightedterm = w * sqrt(log(node.visits) / childNode.visits) # explore term
childNode.statevalue + weightedterm
else # node.visits == 0 makes sqrt() in explore term error
childNode.progressvalue # exploit term
end
if UCTvalue > maxUCT
maxUCT = UCTvalue
selectedNode = childNode
end
end
return selectedNode
end
""" Expand selected node
# Arguments
- `a::T1`
One of YiemAgent's agent
- `node::MCTSNode`
MCTS node
- `state::T2`
a state of a game. Can be a Dict or something else.
- `decisionMaker::Function`
a function that output Thought and Action
- `evaluator::Function`
a function that output trajectory progress score
# Return
# Example
```jldoctest
julia>
```
# TODO
[] update docstring
[] try loop should limit to 3 times. if not succeed, skip
[] newNodeKey ∉ keys(node.children). New state may have semantic vector close enought to one of existing child state. Which can be assume that they are the same state semantically-wise.
[x] store feedback -> state -> agent.
# Signature
"""
function expand(a::T1, node::MCTSNode, decisionMaker::Function,
evaluator::Function, reflector::Function; totalsample::Integer=3
) where {T1<:agent}
nthSample = 0
while true
nthSample += 1
if nthSample <= totalsample
thoughtDict = decisionMaker(a, node.state)
println("---> expand() sample $nthSample")
pprintln(node.state[:thoughtHistory])
pprintln(thoughtDict)
newNodeKey, newstate = MCTStransition(a, node.state, thoughtDict)
stateevaluation, progressvalue = evaluator(a, newstate)
if newstate[:reward] < 0
pprint(newstate[:thoughtHistory])
newstate[:evaluation] = stateevaluation
newstate[:lesson] = reflector(a, newstate)
# store new lesson for later use
lessonDict = copy(JSON3.read("lesson.json"))
latestLessonKey, latestLessonIndice =
GeneralUtils.findHighestIndexKey(lessonDict, "lesson")
nextIndice = latestLessonKey == :NA ? 1 : latestLessonIndice + 1
newLessonKey = Symbol("lesson_$(nextIndice)")
lessonDict[newLessonKey] = newstate
open("lesson.json", "w") do io
JSON3.pretty(io, lessonDict)
end
print("---> reflector()")
end
if newNodeKey keys(node.children)
node.children[newNodeKey] =
MCTSNode(newNodeKey, newstate, 0, progressvalue, 0, newstate[:reward],
newstate[:isterminal], node, Dict{String, MCTSNode}())
end
else
break
end
end
end
""" Simulate interactions between agent and environment
# Arguments
- `a::T`
one of YiemAgent's agent
- `node::MCTSNode`
node that will be a simulation starting point.
- `decisionMaker::Function`
function that receive state return Thought and Action
# Return
- `simTrajectoryReward::Number`
# Example
```jldoctest
julia>
```
# TODO
- [] update docs
# Signature
"""
function simulate(a::T, node::MCTSNode, decisionMaker::Function, evaluator::Function,
reflector::Function; maxDepth::Integer=3, totalsample::Integer=3
)::Union{Tuple{Number, Dict{Symbol, <:Any}}, Tuple{Number, Nothing}} where {T<:agent}
simTrajectoryReward = 0.0
terminalstate = nothing
for depth in 1:maxDepth
simTrajectoryReward += node.reward
if node.isterminal
terminalstate = node.state
break
else
expand(a, node, decisionMaker, evaluator, reflector; totalsample=totalsample)
node = selectChildNode(node)
end
end
return (simTrajectoryReward, terminalstate)
end
""" Backpropagate reward along the simulation chain
# Arguments
- `node::MCTSNode`
leaf node of a search tree
- `simTrajectoryReward::T`
total reward from trajectory simulation
# Return
- `No return`
# Example
```jldoctest
julia>
```
# Signature
"""
function backpropagate(node::MCTSNode, simTrajectoryReward::T;
discountRewardCoeff::AbstractFloat=0.9) where {T<:Number}
while !isroot(node)
# Update the statistics of the current node based on the result of the playout
node.visits += 1
node.statevalue += ((node.statevalue * (node.visits-1)) + simTrajectoryReward) / node.visits
simTrajectoryReward *= discountRewardCoeff # discount because future reward is uncertain
node = node.parent
end
end
""" Get a new state
# Arguments
- `a::T1`
one of YiemAgent's agent
- `state::T2`
current game state
- `thoughtDict::T3`
contain Thought, Action, Observation
- `isterminal::Function`
a function to determine terminal state
# Return
- `(newNodeKey, newstate, isterminalstate, reward)::Tuple{String, Dict{Symbol, <:Any}, Bool, <:Number}`
# Example
```jldoctest
julia> state = Dict{Symbol, Dict{Symbol, Any}}(
:thoughtHistory => Dict(:question => "Hello, I want to buy a bottle of wine."),
:storeinfo => Dict(),
:customerinfo => Dict()
)
julia> thoughtDict = Dict(
:question=> "I want to buy a bottle of wine.",
:thought_1=> "The customer wants to buy a bottle of wine.",
:action_1=> Dict{Symbol, Any}(
:name=>"Chatbox",
:input=>"What occasion are you buying the wine for?",
),
:observation_1 => ""
)
```
# TODO
- [x] add other actions
- [] add embedding of newstate and store in newstate[:embedding]
# Signature
"""
function MCTStransition(a::T1, state::T2, thoughtDict::T2
)::Tuple{String, Dict{Symbol, <:Any}} where {T1<:agent, T2<:AbstractDict}
actionname = thoughtDict[:action][:name]
actioninput = thoughtDict[:action][:input]
# map action and input() to llm function
response, select, reward, isterminal =
if actionname == "chatbox"
# deepcopy(state[:virtualCustomerChatHistory]) because I want to keep it clean
# so that other simulation start from this same node is not contaminated with actioninput
virtualWineUserChatbox(a, actioninput, deepcopy(state[:virtualCustomerChatHistory])) # virtual customer
elseif actionname == "winestock"
winestock(a, actioninput)
elseif actionname == "recommendbox"
virtualWineUserRecommendbox(a, actioninput)
else
error("undefined LLM function. Requesting $actionname")
end
newNodeKey, newstate = makeNewState(state, thoughtDict, response, select, reward, isterminal)
if actionname == "chatbox"
push!(newstate[:virtualCustomerChatHistory], Dict(:name=>"assistant", :text=> actioninput) )
push!(newstate[:virtualCustomerChatHistory], Dict(:name=>"user", :text=> response))
end
return (newNodeKey, newstate)
end
""" Get a new state
# Arguments
- `a::T1`
one of YiemAgent's agent
- `state::T2`
current game state
- `thoughtDict::T3`
contain Thought, Action, Observation
- `isterminal::Function`
a function to determine terminal state
# Return
- `(newNodeKey, newstate, isterminalstate, reward)::Tuple{String, Dict{Symbol, <:Any}, Bool, <:Number}`
# Example
```jldoctest
julia> state = Dict{Symbol, Dict{Symbol, Any}}(
:thoughtHistory => Dict(:question => "Hello, I want to buy a bottle of wine."),
:storeinfo => Dict(),
:customerinfo => Dict()
)
julia> thoughtDict = Dict(
:question=> "I want to buy a bottle of wine.",
:thought_1=> "The customer wants to buy a bottle of wine.",
:action_1=> Dict{Symbol, Any}(
:name=>"Chatbox",
:input=>"What occasion are you buying the wine for?",
),
:observation_1 => ""
)
```
# TODO
- [x] add other actions
- [] add embedding of newstate and store in newstate[:embedding]
# Signature
"""
function transition(a::T1, state::T2, thoughtDict::T2
)::Dict{Symbol, <:Any} where {T1<:agent, T2<:AbstractDict}
thoughtDict = state[:thoughtDict]
actionname = thoughtDict[:action][:name]
actioninput = thoughtDict[:action][:input]
# map action and input() to llm function
response, select, reward, isterminal =
if actionname == "winestock"
winestock(a, actioninput)
else
error("undefined LLM function. Requesting $actionname")
end
return makeNewState(state, thoughtDict, response, select, reward, isterminal)
end
"""
# Arguments
# Return
# Example
```jldoctest
julia>
```
# TODO
- [] update docstring
- [x] implement the function
# Signature
"""
function makeNewState(currentstate::T1, thoughtDict::T4, response::T2, select::Union{T3, Nothing},
reward::T3, isterminal::Bool
)::Tuple{String, Dict{Symbol, <:Any}} where {T1<:AbstractDict, T2<:AbstractString, T3<:Number, T4<:AbstractDict}
currentstate_latestThoughtKey, currentstate_latestThoughtIndice =
GeneralUtils.findHighestIndexKey(currentstate[:thoughtHistory], "thought")
currentstate_nextIndice = currentstate_latestThoughtKey == :NA ? 1 : currentstate_latestThoughtIndice + 1
currentstate_latestThoughtKey = Symbol("thought_$currentstate_nextIndice")
latestActionKey = Symbol("action_$currentstate_nextIndice")
_, thoughtDict_latestThoughtIndice =
GeneralUtils.findHighestIndexKey(thoughtDict, "thought")
thoughtDict_latestThoughtKey, thoughtDict_latestActionKey =
if thoughtDict_latestThoughtIndice == -1
(:thought, :action)
else
(
Symbol("thought_$thoughtDict_latestThoughtIndice"),
Symbol("action_$thoughtDict_latestThoughtIndice"),
)
end
# add Thought, action, observation to thoughtHistory
newstate = deepcopy(currentstate)
newstate[:thoughtHistory][currentstate_latestThoughtKey] =
thoughtDict[thoughtDict_latestThoughtKey]
newstate[:thoughtHistory][latestActionKey] = thoughtDict[thoughtDict_latestActionKey]
newObservationKey = Symbol("observation_$(currentstate_nextIndice)")
newstate[:thoughtHistory][newObservationKey] = response
newstate[:reward] = reward
newstate[:select] = select
newstate[:isterminal] = isterminal
newNodeKey = GeneralUtils.uuid4snakecase()
return (newNodeKey, newstate)
end
""" Determine whether a node is a leaf node of a search tree.
# Arguments
- `node::MCTSNode`
a search tree node
# Return
- `result::Bool`
true if it is a leaf node, false otherwise.
# Example
```jldoctest
julia> using Revise
julia> using YiemAgent, DataStructures
julia> initialState = Dict{Symbol, Any}(
:customerinfo=> Dict{Symbol, Any}(),
:storeinfo=> Dict{Symbol, Any}(),
:thoughtHistory=> OrderedDict{Symbol, Any}(
:question=> "How are you?",
)
)
julia> statetype = typeof(initialState)
julia> root = YiemAgent.MCTSNode(initialState, 0, 0.0, Dict{statetype, YiemAgent.MCTSNode}())
julia> YiemAgent.isleaf(root)
true
```
# TODO
[] update docs
# Signature
"""
isleaf(node::MCTSNode)::Bool = isempty(node.children)
""" Select child node based on the highest statevalue
# Arguments
- `node::MCTSNode`
node of a search tree
# Return
- `childNode::MCTSNode`
the highest value child node
# Example
```jldoctest
julia>
```
# Signature
"""
function selectChildNode(node::MCTSNode)::MCTSNode
highestProgressValue = 0
nodekey = nothing
# loop thought node children dictionary to find the highest progress value
for (k, childNode) in node.children
potential = childNode.progressvalue + childNode.reward
if childNode.reward > 0 #XXX for testing. remove when done.
println("")
end
if potential > highestProgressValue
highestProgressValue = potential
nodekey = childNode.nodekey
end
end
return node.children[nodekey]
end
"""
# Arguments
- `node::MCTSNode`
node of a search tree
# Return
- `childNode::MCTSNode`
the highest value child node
# Example
```jldoctest
julia>
```
# TODO
- [] update docs
- [x] implement the function
# Signature
"""
function selectBestNextState(node::MCTSNode)::MCTSNode
highestProgressValue = 0
nodekey = nothing
# if all childnode has statevalue == 0, use progressvalue + reward to select the best node
stateValueSum = sum([v.statevalue for (k, v) in node.children])
if stateValueSum != 0
for (k, childnode) in node.children
potential = childnode.statevalue / childnode.visits
if potential > highestProgressValue
highestProgressValue = potential
nodekey = childnode.nodekey
end
end
else
for (k, childnode) in node.children
potential = childnode.progressvalue + childnode.reward
if potential > highestProgressValue
highestProgressValue = potential
nodekey = childnode.nodekey
end
end
end
return node.children[nodekey]
end
"""
# Arguments
- `node::MCTSNode`
node of a search tree
# Return
- `childNode::MCTSNode`
the highest value child node
# Example
```jldoctest
julia>
```
# TODO
- [] update docs
- [x] implement the function
# Signature
"""
function selectBestTrajectory(node::MCTSNode)::MCTSNode
while !isleaf(node)
node = selectBestNextState(node)
end
return node
end
""" Determine wheter a given node is a root node
# Arguments
- `node::MCTSNode`
node of a search tree
# Return
- `isrootnode::Bool`
true if the given node is root node, false otherwise
# Example
```jldoctest
julia>
```
# Signature
"""
isroot(node::MCTSNode)::Bool = node.nodekey == "root" ? true : false
# ------------------------------------------------------------------------------------------------ #
# Create a complete example using the defined MCTS functions #
# ------------------------------------------------------------------------------------------------ #
""" Search the best action to take for a given state and task
# Arguments
- `a::agent`
one of Yiem's agents
- `initial state`
initial state
- `decisionMaker::Function`
decide what action to take
- `evaluator::Function`
assess the value of the state
- `reflector::Function`
generate lesson from trajectory and reward
- `isterminal::Function`
determine whether a given state is a terminal state
- `n::Integer`
how many times action will be sampled from decisionMaker
- `w::Float64`
exploration weight. Value is usually between 1 to 2.
Value 1.0 makes MCTS balance between exploration and exploitation like 50%-50%
Value 2.0 makes MCTS aggressively search the tree
# Return
- `plan::Vector{Dict}`
best plan
# Example
```jldoctest
julia>
```
# TODO
[] update docstring
[x] return best action
# Signature
"""
function runMCTS(
a::T1,
initialState,
decisionMaker::Function,
evaluator::Function,
reflector::Function;
totalsample::Integer=3,
maxDepth::Integer=3,
maxiterations::Integer=10,
explorationweight::Number=1.0,
) where {T1<:agent}
root = MCTSNode("root", initialState, 0, 0, 0, 0, false, nothing, Dict{String, MCTSNode}())
for nth in 1:maxiterations
node = root
node.visits += 1
while !isleaf(node)
node = UCTselect(node, explorationweight)
end
if node.isterminal
# MCTS arrive at the leaf node that is also a terminal state,
# do nothing then go directly to backpropagation
backpropagate(leafNode, node.reward)
else
expand(a, node, decisionMaker, evaluator, reflector; totalsample=totalsample)
leafNode = selectChildNode(node)
simTrajectoryReward, terminalstate = simulate(a, leafNode, decisionMaker, evaluator,
reflector; maxDepth=maxDepth, totalsample=totalsample)
if terminalstate !== nothing #XXX not sure why I need this
terminalstate[:totalTrajectoryReward] = simTrajectoryReward
end
#[] write best state to file if it has higher simTrajectoryReward. Use to improve evaluation
# open("trajectory.json", "w") do io
# JSON3.pretty(io, terminalstate)
# end
backpropagate(leafNode, simTrajectoryReward)
end
end
bestNextState = selectBestNextState(root)
besttrajectory = selectBestTrajectory(root)
return (bestNextState.state, besttrajectory.state)
end
end # module mcts

View File

@@ -0,0 +1,232 @@
module type
export agent, sommelier
using Dates, UUIDs, DataStructures, JSON3
using GeneralUtils
# ---------------------------------------------- 100 --------------------------------------------- #
abstract type agent end
""" A sommelier agent.
# Arguments
- `mqttClient::Client`
MQTTClient's client
- `msgMeta::Dict{Symbol, Any}`
A dict contain info about a message.
- `config::Dict{Symbol, Any}`
Config info for an agent. Contain mqtt topic for internal use and other info.
# Keyword Arguments
- `name::String`
Agent's name
- `id::String`
Agent's ID
- `tools::Dict{Symbol, Any}`
Agent's tools
- `maxHistoryMsg::Integer`
max history message
# Return
- `nothing`
# Example
```jldoctest
julia> using YiemAgent, MQTTClient, GeneralUtils
julia> msgMeta = GeneralUtils.generate_msgMeta(
"N/A",
replyTopic = "/testtopic/prompt"
)
julia> tools= Dict(
:chatbox=>Dict(
:name => "chatbox",
:description => "Useful only for when you need to ask the user for more info or context. Do not ask the user their own question.",
:input => "Input should be a text.",
:output => "" ,
:func => nothing,
),
)
julia> agentConfig = Dict(
:receiveprompt=>Dict(
:mqtttopic=> "/testtopic/prompt", # topic to receive prompt i.e. frontend send msg to this topic
),
:receiveinternal=>Dict(
:mqtttopic=> "/testtopic/internal", # receive topic for model's internal
),
:text2text=>Dict(
:mqtttopic=> "/text2text/receive",
),
)
julia> client, connection = MakeConnection("test.mosquitto.org", 1883)
julia> agent = YiemAgent.bsommelier(
client,
msgMeta,
agentConfig,
name= "assistant",
id= "555", # agent instance id
tools=tools,
)
```
# TODO
- [] update docstring
- [x] implement the function
# Signature
"""
mutable struct sommelier <: agent
name::String # agent name
id::String # agent id
config::Dict # agent config
tools::Dict
maxiterations::Integer # how many thinking round
totalsample::Integer # how many sample in each thinking round
maxDepth::Integer # how many step ahead to be simulated start from current state into the future
maxHistoryMsg::Integer # 21th and earlier messages will get summarized
""" Memory
Ref: Chat prompt format https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGML/discussions/3
NO "system" message in chathistory because I want to add it at the inference time
chathistory= [
Dict(:name=>"user", :text=> "Wassup!", :timestamp=> Dates.now()),
Dict(:name=>"assistant", :text=> "Hi I'm your assistant.", :timestamp=> Dates.now()),
]
"""
chathistory::Vector{Dict{Symbol, Any}}
keywordinfo::Dict{Symbol, Any}
# 1-historyPoint is in Dict{Symbol, Any} and compose of:
# state, statevalue, thought, action, observation
plan::Dict{Symbol, Any}
end
function sommelier(
config::Dict = Dict(
:mqttServerInfo=> Dict(
:broker=> nothing,
:port=> nothing,
),
:receivemsg=> Dict(
:prompt=> nothing, # topic to receive prompt i.e. frontend send msg to this topic
:internal=> nothing,
),
:thirdPartyService=> Dict(
:text2textinstruct=> nothing,
:text2textchat=> nothing,
),
)
;
name::String= "Assistant",
id::String= string(uuid4()),
tools::Dict= Dict(
:chatbox=> Dict(
:name => "chatbox",
:description => "Useful for when you need to communicate with the user.",
:input => "Input should be a conversation to the user.",
:output => "" ,
:func => nothing,
),
),
maxiterations::Integer= 3,
totalsample::Integer= 3,
maxDepth::Integer= 3,
maxHistoryMsg::Integer= 20,
chathistory::Vector{Dict{Symbol, Any}} = Vector{Dict{Symbol, Any}}(),
keywordinfo::Dict{Symbol, Any} = Dict{Symbol, Any}(
:customerinfo => Dict{Symbol, Any}(),
:storeinfo => Dict{Symbol, Any}(),
),
plan::Dict{Symbol, Any} = Dict{Symbol, Any}(
# store 3 to 5 best plan AI frequently used to avoid having to search MCTS all the time
# each plan is in [historyPoint_1, historyPoint_2, ...] format
:existingplan => Vector(),
:activeplan => Dict{Symbol, Any}(), # current using plan
:currenttrajectory=> Dict{Symbol, Any}(), # store question, thought, action, observation, ...
)
)
#[NEXTVERSION] publish to a.config[:configtopic] to get a config.
#[NEXTVERSION] get a config message in a.mqttMsg_internal
#[NEXTVERSION] set agent according to config
newAgent = sommelier(
name,
id,
config,
tools,
maxiterations,
totalsample,
maxDepth,
maxHistoryMsg,
chathistory,
keywordinfo,
plan,
)
return newAgent
end
end # module type

View File

@@ -0,0 +1,373 @@
module util
export clearhistory, addNewMessage, formatLLMtext, iterativeprompting,
formatLLMtext_llama3instruct, formatLLMtext_phi3instruct
using UUIDs, Dates, DataStructures, HTTP, MQTTClient, JSON3
using GeneralUtils
using ..type
# ---------------------------------------------- 100 --------------------------------------------- #
""" Clear agent chat history.
# Arguments
- `a::agent`
an agent
# Return
- nothing
# Example
```jldoctest
julia> using YiemAgent, MQTTClient, GeneralUtils
julia> client, connection = MakeConnection("test.mosquitto.org", 1883)
julia> connect(client, connection)
julia> msgMeta = GeneralUtils.generate_msgMeta("testtopic")
julia> agentConfig = Dict(
:receiveprompt=>Dict(
:mqtttopic=> "testtopic/receive",
),
:receiveinternal=>Dict(
:mqtttopic=> "testtopic/internal",
),
:text2text=>Dict(
:mqtttopic=> "testtopic/text2text",
),
)
julia> a = YiemAgent.sommelier(
client,
msgMeta,
agentConfig,
)
julia> YiemAgent.addNewMessage(a, "user", "hello")
julia> YiemAgent.clearhistory(a)
```
# Signature
"""
function clearhistory(a::T) where {T<:agent}
empty!(a.chathistory)
empty!(a.mctstree)
empty!(a.plan[:activeplan])
empty!(a.plan[:currenttrajectory])
end
""" Add new message to agent.
Arguments\n
-----
a::agent
an agent
role::String
message sender role i.e. system, user or assistant
text::String
message text
Return\n
-----
nothing
Example\n
-----
```jldoctest
julia> using YiemAgent, MQTTClient, GeneralUtils
julia> client, connection = MakeConnection("test.mosquitto.org", 1883)
julia> connect(client, connection)
julia> msgMeta = GeneralUtils.generate_msgMeta("testtopic")
julia> agentConfig = Dict(
:receiveprompt=>Dict(
:mqtttopic=> "testtopic/receive",
),
:receiveinternal=>Dict(
:mqtttopic=> "testtopic/internal",
),
:text2text=>Dict(
:mqtttopic=> "testtopic/text2text",
),
)
julia> a = YiemAgent.sommelier(
client,
msgMeta,
agentConfig,
)
julia> YiemAgent.addNewMessage(a, "user", "hello")
```
Signature\n
-----
"""
function addNewMessage(a::T1, name::String, text::T2;
maximumMsg::Integer=20) where {T1<:agent, T2<:AbstractString}
if name ["system", "user", "assistant"] # guard against typo
error("name is not in agent.availableRole $(@__LINE__)")
end
#[] summarize the oldest 10 message
if length(a.chathistory) > maximumMsg
summarize(a.chathistory)
else
d = Dict(:name=> name, :text=> text, :timestamp=> Dates.now())
push!(a.chathistory, d)
end
end
""" Convert a single chat dictionary into LLM model instruct format.
# Llama 3 instruct format example
<|system|>
You are a helpful AI assistant.<|end|>
<|user|>
I am going to Paris, what should I see?<|end|>
<|assistant|>
Paris, the capital of France, is known for its stunning architecture, art museums."<|end|>
<|user|>
What is so great about #1?<|end|>
<|assistant|>
# Arguments
- `name::T`
message owner name e.f. "system", "user" or "assistant"
- `text::T`
# Return
- `formattedtext::String`
text formatted to model format
# Example
```jldoctest
julia> using Revise
julia> using YiemAgent
julia> d = Dict(:name=> "system",:text=> "You are a helpful, respectful and honest assistant.",)
julia> formattedtext = YiemAgent.formatLLMtext_phi3instruct(d[:name], d[:text])
```
Signature
"""
function formatLLMtext_phi3instruct(name::T, text::T) where {T<:AbstractString}
formattedtext =
"""
<|$name|>
$text<|end|>\n
"""
return formattedtext
end
""" Convert a single chat dictionary into LLM model instruct format.
# Llama 3 instruct format example
<|begin_of_text|>
<|start_header_id|>system<|end_header_id|>
You are a helpful assistant.
<|eot_id|>
<|start_header_id|>user<|end_header_id|>
Get me an icecream.
<|eot_id|>
<|start_header_id|>assistant<|end_header_id|>
Go buy it yourself at 7-11.
<|eot_id|>
# Arguments
- `name::T`
message owner name e.f. "system", "user" or "assistant"
- `text::T`
# Return
- `formattedtext::String`
text formatted to model format
# Example
```jldoctest
julia> using Revise
julia> using YiemAgent
julia> d = Dict(:name=> "system",:text=> "You are a helpful, respectful and honest assistant.",)
julia> formattedtext = YiemAgent.formatLLMtext_llama3instruct(d[:name], d[:text])
"<|begin_of_text|>\n <|start_header_id|>system<|end_header_id|>\n You are a helpful, respectful and honest assistant.\n <|eot_id|>\n"
```
Signature
"""
function formatLLMtext_llama3instruct(name::T, text::T) where {T<:AbstractString}
formattedtext =
if name == "system"
"""
<|begin_of_text|>
<|start_header_id|>$name<|end_header_id|>
$text
<|eot_id|>
"""
else
"""
<|start_header_id|>$name<|end_header_id|>
$text
<|eot_id|>
"""
end
return formattedtext
end
""" Convert a chat messages in vector of dictionary into LLM model instruct format.
# Arguments
- `messages::Vector{Dict{Symbol, T}}`
message owner name e.f. "system", "user" or "assistant"
- `formatname::T`
format name to be used
# Return
- `formattedtext::String`
text formatted to model format
# Example
```jldoctest
julia> using Revise
julia> using YiemAgent
julia> chatmessage = [
Dict(:name=> "system",:text=> "You are a helpful, respectful and honest assistant.",),
Dict(:name=> "user",:text=> "list me all planets in our solar system.",),
Dict(:name=> "assistant",:text=> "I'm sorry. I don't know. You tell me.",),
]
julia> formattedtext = YiemAgent.formatLLMtext(chatmessage, "llama3instruct")
"<|begin_of_text|>\n <|start_header_id|>system<|end_header_id|>\n You are a helpful, respectful and honest assistant.\n <|eot_id|>\n <|start_header_id|>user<|end_header_id|>\n list me all planets in our solar system.\n <|eot_id|>\n <|start_header_id|>assistant<|end_header_id|>\n I'm sorry. I don't know. You tell me.\n <|eot_id|>\n"
```
# Signature
"""
function formatLLMtext(messages::Vector{Dict{Symbol, T}},
formatname::String="llama3instruct") where {T<:Any}
f = if formatname == "llama3instruct"
formatLLMtext_llama3instruct
elseif formatname == "mistral"
# not define yet
elseif formatname == "phi3instruct"
formatLLMtext_phi3instruct
else
error("$formatname template not define yet")
end
str = ""
for t in messages
str *= f(t[:name], t[:text])
end
# add <|assistant|> so that the model don't generate it and I don't need to clean it up later
if formatname == "phi3instruct"
str *= "<|assistant|>\n"
end
return str
end
"""
Arguments\n
-----
Return\n
-----
Example\n
-----
```jldoctest
julia>
```
TODO\n
-----
[] update docstring
[PENDING] implement the function
Signature\n
-----
"""
function iterativeprompting(a::T, prompt::String, verification::Function) where {T<:agent}
msgMeta = GeneralUtils.generate_msgMeta(
a.config[:externalService][:text2textinstruct],
senderName= "iterativeprompting",
senderId= a.id,
receiverName= "text2textinstruct",
)
outgoingMsg = Dict(
:msgMeta=> msgMeta,
:payload=> Dict(
:text=> prompt,
)
)
success = nothing
result = nothing
critique = ""
# iteration loop
while true
# send prompt to LLM
response = GeneralUtils.sendReceiveMqttMsg(outgoingMsg)
error("--> iterativeprompting")
# check for correctness and get feedback
success, _critique = verification(response)
if success
result = response
break
else
# add critique to prompt
critique *= _critique * "\n"
replace!(prompt, "Critique: ..." => "Critique: $critique")
end
end
return (success=success, result=result)
end
end # module util