# Agent Workflows and Recipes

## Setup and Utils

In [None]:
import asyncio
import os, json
import together
from pydantic import ValidationError
from together import AsyncTogether, Together

client = Together(api_key= "abc")
async_client = AsyncTogether(api_key= "abc")

In [137]:
def run_llm(user_prompt : str, model : str, system_prompt : str = None):

    messages = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})
    
    messages.append({"role": "user", "content": user_prompt})
    
    response = client.chat.completions.create(
        model=model,
        messages=messages,
        temperature=0.7,
        max_tokens=4000,        
    )

    return response.choices[0].message.content

In [140]:
run_llm("what is the captial of america?", model='meta-llama/Llama-3.3-70B-Instruct-Turbo')

"The capital of the United States of America is Washington, D.C. (short for District of Columbia). It's a federal district that serves as the permanent capital of the country and is not part of any state."

In [None]:
# The function below will call the reference LLMs in parallel
async def run_llm_parallel(user_prompt : str, model : str, system_prompt : str = None):
    """Run parallel LLM calls with a reference model."""
    for sleep_time in [1, 2, 4]:
        try:
            messages = []
            if system_prompt:
                messages.append({"role": "system", "content": system_prompt})
    
            messages.append({"role": "user", "content": user_prompt})

            response = await async_client.chat.completions.create(
                model=model,
                messages=messages,
                temperature=0.7,
                max_tokens=2000,
            )
            break
        except together.error.RateLimitError as e:
            print(e)
            await asyncio.sleep(sleep_time)
    return response.choices[0].message.content

# Generate intermediate reference responses
reference_models = ["meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo", "meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo"]
results = await asyncio.gather(*[run_llm_parallel("what is the captial of the USA?", model) for model in reference_models])

results

['The capital of the United States of America (USA) is Washington, D.C. (short for District of Columbia).',
 'The capital of the United States of America (USA) is Washington, D.C. (short for District of Columbia).']

In [None]:
def JSON_llm(user_prompt : str, schema, system_prompt : str = None):
    
    try:
        messages = []
        if system_prompt:
            messages.append({"role": "system", "content": system_prompt})
    
        messages.append({"role": "user", "content": user_prompt})
        
        extract = client.chat.completions.create(
            messages=messages,
            model="meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo",
            response_format={
                "type": "json_object",
                "schema": schema.model_json_schema(),
            },
        )
        response = json.loads(extract.choices[0].message.content)

    except ValidationError as e:
        error_message = f"Failed to parse JSON: {e}"
        print(error_message)
        
    return response

In [None]:
from pydantic import BaseModel, Field
from typing import List

class Restaurant(BaseModel, frozen=True):
    name: str
    category: str
    description: str

class RestList(BaseModel, frozen=True):
    restaurants : List[Restaurant] = Field(..., default_factory=list)

JSON_llm("Good restaurants in new york, Output only JSON.", RestList)

{'restaurants': [{'name': 'Carbone',
   'category': 'Italian-American',
   'description': 'Upscale retro Italian-American restaurant in Greenwich Village.'},
  {'name': 'Peter Luger Steak House',
   'category': 'Steakhouse',
   'description': 'Classic steakhouse in Brooklyn, serving top-quality steaks since 1887.'},
  {'name': 'Di Fara Pizza',
   'category': 'Pizzeria',
   'description': 'Classic New York-style pizzeria in Brooklyn.'},
  {'name': "Katz's Delicatessen",
   'category': 'Deli',
   'description': 'Classic Jewish deli on the Lower East Side.'},
  {'name': 'Eleven Madison Park',
   'category': 'Fine dining',
   'description': 'Three-Michelin-starred restaurant in the Flatiron District.'},
  {'name': 'Xe Lua',
   'category': 'Vietnamese',
   'description': 'Casual Vietnamese restaurant in Chinatown.'},
  {'name': "Artichoke Basille's Pizza",
   'category': 'Pizzeria',
   'description': 'Thick, crispy pizza slices in multiple locations.'},
  {'name': 'Le Bernardin',
   'catego

----

1. Prompt chaining

`PDF -> Clean text -> Brainstorm and ideation -> Improve and Augment -> Script`

2. Routing

`Given prompt -> LLM strcutured model choice -> Call this model -> Output`

3. Parallelization

`Mixture of Agents code`

4. Orchestrator-workers

`Summarization for product descriptions`

5. Evaluator-optimizer

`Code generation and evaluation in a loop`

----

## Prompt Chaining Recipe
A simple snippet of serial prompt chaining.

In [None]:
def serial_chain_workflow(input_query: str, prompt_chain : List[str]) -> List[str]:
    """Run a serial chain of LLM calls to address the `input_query` 
    using a prompts specified in a list `prompt_chain`.
    """
    response_chain = []
    response = input_query
    for i, prompt in enumerate(prompt_chain):
        print(f"Step {i+1}")
        response = run_llm(f"{prompt}\nInput:\n{response}", model='meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo')
        response_chain.append(response)
        print(f"{response}\n")
    return response_chain

# Toy Example

question = "Weng earns $12 an hour for babysitting. Yesterday, she just did 50 minutes of babysitting. How much did she earn?"

prompt_chain = ["""Given the math problem, ONLY extract any relevant numerical information and how it can be used.""",
                """Given the numberical information extracted, ONLY express the steps you would take to solve the problem.""",
                """Given the steps, express the final answer to the problem."""]

responses = serial_chain_workflow(question, prompt_chain)

final_answer = responses[-1]


Step 1
Relevant numerical information: 
- $12 (hourly wage)
- 50 minutes (time worked, can be converted to hours for calculation: 50 / 60 = 5/6 hour)

Step 2
1. Convert 50 minutes to hours: 50 / 60 = 5/6 hour.
2. Multiply the hourly wage by the time worked in hours: $12 * (5/6).
3. Calculate the result of the multiplication to find the earnings.

Step 3
To find the earnings, we need to perform the multiplication of $12 and 5/6.

First, convert the fraction to a decimal: 5/6 ≈ 0.83

Then, multiply $12 by 0.83: 
$12 * 0.83 ≈ $9.96

So, the earnings are approximately $9.96.



## Routing
A simple snippet of the consitional routing workflow.

In [None]:
def router_workflow(input_query: str, routes : Dict[str, str]) -> str:
    """ Given a `input_qeury` and a dictionary of `routes` containing options and details for each.
    Selects the best model for the task and return the response from the model.
    """
    ROUTER_PROMPT = """Given a user prompt/query: {user_query}, select the best option out of the following routes:
    {routes}. Answer only in JSON format."""

    # Create a schema from the routes dictionary
    class Schema(BaseModel):
        route: Literal[tuple(routes.keys())]
    
        reason: str = Field(
            description="Short one-liner explanation why this route was selected for the task in the prompt/query."
        )

    # Call LLM to select route
    selected_route = JSON_llm(ROUTER_PROMPT.format(user_query=input_query, routes=routes), Schema)
    print(f"Selcted route:{selected_route['route']}\nReason: {selected_route['reason']}\n")

    # Use LLM on selected route. 
    # Could also have different prompts that need to be used for each route.
    response = run_llm(user_prompt= input_query, model = selected_route['route'])
    print(f"Response: {response}\n")
    
    return response

In [187]:
prompt_list = ["Produce python snippet to check to see if a number is prime or not.",
               "Plan and provide a short itenary for a 2 week vacation in Europe.",
               "Write a short story about a dragon and a knight."]

model_routes = {
    "Qwen/Qwen2.5-Coder-32B-Instruct" : "Best model choice for code generation tasks.",
    "Gryphe/MythoMax-L2-13b" : "Best model choice for story-telling, role-playing and fantasy tasks.",
    "Qwen/QwQ-32B-Preview" : "Best model for reasoning, planning and muilti-step tasks",
}

for i, prompt in enumerate(prompt_list):
    print(f"Task {i+1}: {prompt}\n")
    print(20*'==')
    router_workflow(prompt, model_routes)


Task 1: Produce python snippet to check to see if a number is prime or not.

Seelction route:Qwen/Qwen2.5-Coder-32B-Instruct
 Reason: The task requires generating a Python code snippet to check if a number is prime or not, which falls under code generation tasks.

Response: Certainly! Below is a Python function that checks whether a given number is prime or not:

```python
def is_prime(n):
    """Check if a number is prime."""
    if n <= 1:
        return False
    if n <= 3:
        return True
    if n % 2 == 0 or n % 3 == 0:
        return False
    i = 5
    while i * i <= n:
        if n % i == 0 or n % (i + 2) == 0:
            return False
        i += 6
    return True

# Example usage:
number = 29
if is_prime(number):
    print(f"{number} is a prime number.")
else:
    print(f"{number} is not a prime number.")
```

### Explanation:
1. **Initial Checks**: 
   - Numbers less than or equal to 1 are not prime.
   - Numbers 2 and 3 are prime.
   
2. **Divisibility Check**:
   - If

## Parallel Recipe
A simple snippet of parallel agent workflow.

In [None]:
async def parallel_workflow(prompt : str, proposer_models : List[str], aggregator_model : str, aggregator_prompt: str):
    """Run a parallel chain of LLM calls to address the `input_query` 
    using a list of models specified in `models`.

    Returns output from final aggregator model.
    """

    # Gather intermediate responses from proposer models
    proposed_responses = await asyncio.gather(*[run_llm_parallel(prompt, model) for model in proposer_models])
    
    # Aggregate responses using an aggregator model
    final_output = run_llm(user_prompt=prompt,
                           model=aggregator_model,
                           system_prompt=aggregator_prompt + "\n" + "\n".join(f"{i+1}. {str(element)}" for i, element in enumerate(proposed_responses)
           ))
    
    return final_output, proposed_responses


In [199]:
reference_models = [
    "microsoft/WizardLM-2-8x22B",
    "Qwen/Qwen2.5-72B-Instruct-Turbo",
    "google/gemma-2-27b-it",
    "meta-llama/Llama-3.3-70B-Instruct-Turbo",
]

user_prompt = """Jenna and her mother picked some apples from their apple farm. 
Jenna picked half as many apples as her mom. If her mom got 20 apples, how many apples did they both pick?"""

aggregator_model = "deepseek-ai/DeepSeek-V3"

aggregator_system_prompt = """You have been provided with a set of responses from various open-source models to the latest user query.
Your task is to synthesize these responses into a single, high-quality response. It is crucial to critically evaluate the information
provided in these responses, recognizing that some of it may be biased or incorrect. Your response should not simply replicate the
given answers but should offer a refined, accurate, and comprehensive reply to the instruction. Ensure your response is well-structured,
coherent, and adheres to the highest standards of accuracy and reliability.

Responses from models:"""

answer, intermediate_reponses = await parallel_workflow(prompt = user_prompt, 
                                                        proposer_models = reference_models, 
                                                        aggregator_model = aggregator_model, 
                                                        aggregator_prompt = aggregator_system_prompt)

In [201]:
for i, response in enumerate(intermediate_reponses):
    print(f"Intermetidate Response {i+1}:\n\n{response}\n")

Intermetidate Response 1:

 Let's think step by step.To solve the problem, we need to determine the total number of apples picked by Jenna and her mother combined. We are given two pieces of information:

1. Jenna's mother picked 20 apples.
2. Jenna picked half as many apples as her mother.

Let's break down the solution process:

Step 1: Determine the number of apples Jenna's mother picked.
- According to the information provided, Jenna's mother picked 20 apples.

Step 2: Calculate the number of apples Jenna picked.
- Since Jenna picked half as many apples as her mother, we need to find half of the mother's count.
- Half of 20 apples is calculated by dividing 20 by 2, which gives us 10 apples.
- Therefore, Jenna picked 10 apples.

Step 3: Find the total number of apples they both picked.
- To find the total, we add the number of apples picked by Jenna to the number of apples picked by her mother.
- Adding Jenna's 10 apples to her mother's 20 apples gives us a total of 30 apples.

Step

In [202]:
print(f"Final Answer: {answer}\n")

Final Answer: To determine the total number of apples Jenna and her mother picked together, follow these steps:

1. **Determine the number of apples Jenna's mother picked:**
   - Jenna's mother picked **20 apples**.

2. **Calculate the number of apples Jenna picked:**
   - Jenna picked half as many apples as her mother.
   - Half of 20 is \( \frac{1}{2} \times 20 = 10 \) apples.
   - So, Jenna picked **10 apples**.

3. **Find the total number of apples they both picked:**
   - Add the number of apples picked by Jenna and her mother: \( 20 + 10 = 30 \) apples.

**Final Answer:** Jenna and her mother picked a total of **30 apples**.



## Orchestrator Agent Workflow
A simple snippet of the parallel orchestrator-worker agent workflow.

In [204]:
from pydantic import BaseModel, Field
from typing import Literal, List

ORCHESTRATOR_PROMPT = """
Analyze this task and break it down into 2-3 distinct approaches:

Task: {task}

Provide an Analysis:

Explain your understanding of the task and which variations would be valuable.
Focus on how each approach serves different aspects of the task.

Along with the analysis, provide 2-3 approaches to tackle the task, each with a brief description:

Formal style: Write technically and precisely, focusing on detailed specifications
Conversational style: Write in a friendly and engaging way that connects with the reader
Hybrid style: Tell a story that includes technical details, combining emotional elements with specifications

Return only JSON output.
"""

WORKER_PROMPT = """
Generate content based on:
Task: {original_task}
Style: {task_type}
Guidelines: {task_description}

Return only your response:
[Your content here, maintaining the specified style and fully addressing requirements.]
"""

task = """Write a product description for a new eco-friendly water bottle.
The target_audience is environmentally conscious millennials and key product features are: plastic-free, insulated, lifetime warranty
"""

In [216]:
class Task(BaseModel):
    type: Literal["formal", "conversational", "hybrid"]
    description: str

class TaskList(BaseModel):
    analysis: str
    tasks: List[Task]  = Field(..., default_factory=list)

async def orchestrator_workflow(task : str, orchestrator_prompt : str, worker_prompt : str): 
    """Use a orchestrator model to break down a task into sub-tasks and then use worker models to generate and return responses."""

    # Use orchestrator model to break the task up into sub-tasks
    orchestrator_response = JSON_llm(orchestrator_prompt.format(task=task), schema=TaskList)
 
    # Parse orchestrator response
    analysis = orchestrator_response["analysis"]
    tasks= orchestrator_response["tasks"]

    print("\n=== ORCHESTRATOR OUTPUT ===")
    print(f"\nANALYSIS:\n{analysis}")
    print(f"\nTASKS:\n{json.dumps(tasks, indent=2)}")

    worker_model =  ["meta-llama/Llama-3.3-70B-Instruct-Turbo"]*len(tasks)

    # Gather intermediate responses from worker models
    return tasks , await asyncio.gather(*[run_llm_parallel(user_prompt=worker_prompt.format(original_task=task, task_type=task_info['type'], task_description=task_info['description']), model=model) for task_info, model in zip(tasks,worker_model)])

In [217]:
task = """Write a product description for a new eco-friendly water bottle. 
The target_audience is environmentally conscious millennials and key product features are: plastic-free, insulated, lifetime warranty
"""

tasks, worker_resp = await orchestrator_workflow(task, orchestrator_prompt=ORCHESTRATOR_PROMPT, worker_prompt=WORKER_PROMPT)


=== ORCHESTRATOR OUTPUT ===

ANALYSIS:
The task requires writing a product description for an eco-friendly water bottle targeting environmentally conscious millennials. The key features to highlight are the plastic-free material, insulated design, and lifetime warranty. A valuable product description should effectively communicate these features while resonating with the target audience.

TASKS:
[
  {
    "type": "formal",
    "description": "Write a technically precise product description focusing on detailed specifications, such as the materials used, insulation technology, and warranty terms. This approach serves the task by providing a clear understanding of the product's features and benefits."
  },
  {
    "type": "conversational",
    "description": "Write a friendly and engaging product description that connects with the reader on an emotional level. This approach serves the task by building a relationship with the target audience and highlighting the product's eco-friendly as

In [218]:
for task_info, response in zip(tasks, worker_resp):
    print(f"\n=== WORKER RESULT ({task_info['type']}) ===\n{response}\n")


=== WORKER RESULT (formal) ===
Introduction to the HydraGreen Water Bottle

We are pleased to introduce the HydraGreen water bottle, a revolutionary, eco-friendly hydration solution designed specifically for environmentally conscious millennials. This premium product boasts a plastic-free construction, advanced insulation technology, and a comprehensive lifetime warranty, ensuring a superior user experience while minimizing its ecological footprint.

Materials and Construction

The HydraGreen water bottle is crafted from high-quality, BPA-free stainless steel (18/8 food-grade) and features a durable, non-toxic silicone sleeve. The bottle's body is constructed using a proprietary double-walled insulation process, which provides exceptional thermal retention while maintaining a slim, ergonomic design. The lid is manufactured from a sustainable, plant-based polymer, further reducing the product's reliance on petroleum-derived materials.

Insulation Technology

The HydraGreen water bottle

## Loop Optimizer Agent Workflow
A simple snippet of looping generator-evaluator workflow.

In [219]:
task = """
Implement a Stack with:
1. push(x)
2. pop()
3. getMin()
All operations should be O(1).
"""

In [222]:
GENERATOR_PROMPT = """
Your goal is to complete the task based on <user input>. If there are feedback 
from your previous generations, you should reflect on them to improve your solution

Output your answer concisely in the following format: 

Thoughts:
[Your understanding of the task and feedback and how you plan to improve]

Response:
[Your code implementation here]
"""

def generate(task: str, generator_prompt: str, context: str = "") -> tuple[str, str]:
    """Generate and improve a solution based on feedback."""
    full_prompt = f"{generator_prompt}\n{context}\nTask: {task}" if context else f"{generator_prompt}\nTask: {task}"

    response = run_llm(full_prompt, model="Qwen/Qwen2.5-Coder-32B-Instruct")
    
    print("\n=== GENERATION START ===")
    print(f"Output:\n{response}\n")
    print("=== GENERATION END ===\n")
    
    return response

EVALUATOR_PROMPT = """
Evaluate this following code implementation for:
1. code correctness
2. time complexity
3. style and best practices

You should be evaluating only and not attemping to solve the task.

Only output "PASS" if all criteria are met and you have no further suggestions for improvements.

Provide detailed feedback if there are areas that need improvement. You should specify what needs improvement and why.

Only output JSON.
"""

def evaluate(task : str, evaluator_prompt : str, generated_content: str, schema) -> tuple[str, str]:
    """Evaluate if a solution meets requirements."""
    full_prompt = f"{evaluator_prompt}\nOriginal task: {task}\nContent to evaluate: {generated_content}"
    
    response = JSON_llm(full_prompt, schema)
    
    evaluation = response["evaluation"]
    feedback = response["feedback"]

    print("=== EVALUATION START ===")
    print(f"Status: {evaluation}")
    print(f"Feedback: {feedback}")
    print("=== EVALUATION END ===\n")

    return evaluation, feedback



In [223]:
def loop_workflow(task: str, evaluator_prompt: str, generator_prompt: str) -> tuple[str, list[dict]]:
    """Keep generating and evaluating until the evaluator passes the last generated response."""
    # Store previous responses from generator
    memory = []
    
    # Generate initial response
    response = generate(task, generator_prompt)
    memory.append(response)

    #Build a schema for the evaluation
    class Evaluation(BaseModel):
        evaluation: Literal["PASS", "NEEDS_IMPROVEMENT", "FAIL"]
        feedback: str

    # While the generated response is not passing, keep generating and evaluating
    while True:
        evaluation, feedback = evaluate(task, evaluator_prompt, response, Evaluation)
        # Terminating condition
        if evaluation == "PASS":
            return response
        
        # Add current response and feedback to context and generate a new response
        context = "\n".join([
            "Previous attempts:",
            *[f"- {m}" for m in memory],
            f"\nFeedback: {feedback}"
        ])
        
        response = generate(generator_prompt, task, context)
        memory.append(response)

In [224]:
loop_workflow(task, EVALUATOR_PROMPT, GENERATOR_PROMPT)


=== GENERATION START ===
Output:
Thoughts:
To implement a stack with the operations `push(x)`, `pop()`, and `getMin()` all in O(1) time complexity, we can use two stacks. One stack will store the actual stack elements, and the other will store the minimum values. The minimum stack will help us keep track of the minimum element efficiently.

Response:
```python
class MinStack:
    def __init__(self):
        self.stack = []
        self.min_stack = []

    def push(self, x: int) -> None:
        self.stack.append(x)
        if not self.min_stack or x <= self.min_stack[-1]:
            self.min_stack.append(x)

    def pop(self) -> None:
        if self.stack:
            x = self.stack.pop()
            if x == self.min_stack[-1]:
                self.min_stack.pop()

    def top(self) -> int:
        if self.stack:
            return self.stack[-1]
        raise IndexError("Stack is empty")

    def getMin(self) -> int:
        if self.min_stack:
            return self.min_stack[-1]


'Thoughts:\nThe current implementation is mostly correct and handles edge cases by returning `None` for empty stack scenarios. However, we can further improve the code by:\n1. Ensuring consistent method naming (e.g., `getMin` to `get_min` for consistency with `get_top_element`).\n2. Adding type hints and docstrings for better readability and maintainability.\n3. Ensuring that the implementation adheres to best practices by handling empty stack scenarios gracefully and providing clear documentation.\n\nResponse:\n```python\nclass MinStack:\n    def __init__(self):\n        """Initialize the stack and the minimum stack."""\n        self.stack = []\n        self.min_stack = []\n\n    def push(self, x: int) -> None:\n        """Push element x onto the stack.\n\n        Args:\n            x (int): The element to be pushed onto the stack.\n        """\n        self.stack.append(x)\n        if not self.min_stack or x <= self.min_stack[-1]:\n            self.min_stack.append(x)\n\n    def pop(

## 1. Prompt Chaining Workflow

In [52]:
from typing import List, Literal

class DialogueItem(BaseModel):
    """A single dialogue item."""

    speaker: Literal["Host (Jane)", "Guest"]
    text: str


class Dialogue(BaseModel):
    """The dialogue between the host and guest."""

    scratchpad: str
    name_of_guest: str
    dialogue: List[DialogueItem]

In [42]:
SYSTEM_PROMPT = """You are an experienced world-class podcast producer tasked with transforming the provided 
input text into an engaging and informative podcast.

You are to follow a step by step methodical process to generate the final podcast which involves:
1. Reading and extracting relevant information and snippets from the source document.
2. Using the relevant information compiled in step 1, creating an outline document containing brainstormed ideas, summarized topics that should be covered, questions and how to guide the conversation 
3. Using the details from step 1 and 2 you then need to put together a script for the podcast.

"""

CLEAN_EXTRACT_DETAILS = """The first step you need to perform is to extract details from the source document that are informative
and listeners will find useful to understand the source document better.

The input may be unstructured or messy, sourced from PDFs or web pages. 

Your goal is to extract the most interesting and insightful content for a compelling podcast discussion.

Source Document: {source_doc}
"""

OUTLINE_PROMPT = """The second step is to use the extracted information from the source document to write an outline and brainstorm ideas.

The source document and extracted details are provided below:

Extracted Details: {extracted_details}

Source Document: {source_doc}

Steps to follow when generating an outline and brainstorming ideas for the discussion in the podcast:

1. Analyze the Input:
   Carefully examine the extracted details in the text above, identifying key topics, points, and 
   interesting facts or anecdotes that could drive an engaging podcast conversation. 
   Disregard irrelevant information.

2. Brainstorm Ideas:
   Creatively brainstorm ways to present the key points engagingly. 
   
   Consider:
   - Analogies, storytelling techniques, or hypothetical scenarios to make content relatable
   - Ways to make complex topics accessible to a general audience
   - Thought-provoking questions to explore during the podcast
   - Creative approaches to fill any gaps in the information
   - Make sure that all important details extracted above are covered in the outline that you draft
"""

SCRIPT_PROMPT = """The last step is to use the extracted details and the ideas brainstormed in the outline below to craft
a script for the podcast.

Extracted Details: {extracted_details}

Using the outline provided here: {outline}

Steps to follow when generating the script:

 1. **Craft the Dialogue:**
   Develop a natural, conversational flow between the host (Jane) and the guest speaker (the author or an expert on the topic).
   In the `<scratchpad>`, creatively brainstorm ways to present the key points engagingly.
   
   Incorporate:
   - The best ideas from your brainstorming session
   - Clear explanations of complex topics
   - An engaging and lively tone to captivate listeners
   - A balance of information and entertainment

   Rules for the dialogue:
   - The host (Jane) always initiates the conversation and interviews the guest
   - Include thoughtful questions from the host to guide the discussion
   - Incorporate natural speech patterns, including occasional verbal fillers (e.g., "Uhh", "Hmmm", "um," "well," "you know")
   - Allow for natural interruptions and back-and-forth between host and guest - this is very important to make the conversation feel authentic
   - Ensure the guest's responses are substantiated by the input text, avoiding unsupported claims
   - Maintain a PG-rated conversation appropriate for all audiences
   - Avoid any marketing or self-promotional content from the guest
   - The host concludes the conversation

2. **Summarize Key Insights:**
   Naturally weave a summary of key points into the closing part of the dialogue. This should feel like a casual conversation rather than a formal recap, reinforcing the main takeaways before signing off.

3. **Maintain Authenticity:**
   Throughout the script, strive for authenticity in the conversation. Include:
   - Moments of genuine curiosity or surprise from the host
   - Instances where the guest might briefly struggle to articulate a complex idea
   - Light-hearted moments or humor when appropriate
   - Brief personal anecdotes or examples that relate to the topic (within the bounds of the input text)

4. **Consider Pacing and Structure:**
   Ensure the dialogue has a natural ebb and flow:
   - Start with a strong hook to grab the listener's attention
   - Gradually build complexity as the conversation progresses
   - Include brief "breather" moments for listeners to absorb complex information
   - For complicated concepts, reasking similar questions framed from a different perspective is recommended
   - End on a high note, perhaps with a thought-provoking question or a call-to-action for listeners

IMPORTANT RULE: Each line of dialogue should be no more than 300 characters (e.g., can finish within 30 seconds)

Remember: Always reply in valid JSON format, without code blocks. Begin directly with the JSON output.
"""

In [27]:
!wget https://arxiv.org/pdf/2406.04692
!mv 2406.04692 MoA.pdf

--2025-01-07 22:42:44--  https://arxiv.org/pdf/2406.04692
Resolving arxiv.org (arxiv.org)... 151.101.67.42, 151.101.131.42, 151.101.195.42, ...
Connecting to arxiv.org (arxiv.org)|151.101.67.42|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1157463 (1.1M) [application/pdf]
Saving to: ‘2406.04692’


2025-01-07 22:42:45 (25.5 MB/s) - ‘2406.04692’ saved [1157463/1157463]



In [31]:
!pip install pypdf

Collecting pypdf
  Downloading pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)
Downloading pypdf-5.1.0-py3-none-any.whl (297 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.1.0


In [32]:
#import pathlib
from pathlib import Path
from pypdf import PdfReader

def get_PDF_text(file : str):
    text = ''

    # Read the PDF file and extract text
    try:
        with Path(file).open("rb") as f:
            reader = PdfReader(f)
            text = "\n\n".join([page.extract_text() for page in reader.pages])
    except Exception as e:
        raise f"Error reading the PDF file: {str(e)}"

        # Check if the PDF has more than ~131,072 characters
        # The context lenght limit of the model is 131,072 tokens and thus the text should be less than this limit
    if len(text) > 131072:
        raise "The PDF is too long. Please upload a PDF with fewer than ~131072 characters."

    return text

text = get_PDF_text('./MoA.pdf')
text

'Mixture-of-Agents Enhances Large Language Model\nCapabilities\nJunlin Wang\nDuke University\nTogether AI\njunlin.wang2@duke.edu\nJue Wang\nTogether AI\njue@together.ai\nBen Athiwaratkun\nTogether AI\nben@together.ai\nCe Zhang\nUniversity of Chicago\nTogether AI\ncez@uchicago.edu\nJames Zou\nStanford University\nTogether AI\njamesz@stanford.edu\nAbstract\nRecent advances in large language models (LLMs) demonstrate substantial capa-\nbilities in natural language understanding and generation tasks. With the growing\nnumber of LLMs, how to harness the collective expertise of multiple LLMs is an\nexciting open direction. Toward this goal, we propose a new approach that lever-\nages the collective strengths of multiple LLMs through a Mixture-of-Agents (MoA)\nmethodology. In our approach, we construct a layered MoA architecture wherein\neach layer comprises multiple LLM agents. Each agent takes all the outputs from\nagents in the previous layer as auxiliary information in generating its resp

In [47]:
source_doc = text

extracted_details = run_llm(CLEAN_EXTRACT_DETAILS.format(source_doc=source_doc), 
                            model='meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo', 
                            system_prompt=SYSTEM_PROMPT)
print(extracted_details)

**Extracted Details and Snippets**

* **Key Concepts:**
	+ Mixture-of-Agents (MoA) methodology
	+ Large Language Models (LLMs)
	+ Collaborativeness of LLMs
	+ Proposers and Aggregators
* **Research Contributions:**
	+ Novel framework for leveraging multiple LLMs
	+ Finding of collaborativeness among LLMs
	+ State-of-the-art performance on AlpacaEval 2.0, MT-Bench, and FLASK benchmarks
* **Methodology:**
	+ MoA architecture consists of multiple layers with multiple LLM agents
	+ Each layer aggregates outputs from previous layer and generates new responses
	+ Aggregators can be specialized or general-purpose models
* **Experimental Results:**
	+ MoA outperforms GPT-4 Omni on AlpacaEval 2.0 and FLASK benchmarks
	+ MoA-Lite achieves competitive performance with fewer layers and lower cost
	+ Analysis of Spearman correlation between win rate and similarity scores (BLEU, TF-IDF, Levenshtein)
	+ Case studies demonstrate MoA's ability to synthesize responses from multiple models
* **Related Wo

In [48]:
outline = run_llm(OUTLINE_PROMPT.format(extracted_details=extracted_details, source_doc=source_doc),
                    model='meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo',
                    system_prompt=SYSTEM_PROMPT)

print(outline)

**Podcast Outline and Brainstormed Ideas: Mixture-of-Agents Enhances Large Language Model Capabilities**

**I. Introduction (2-3 minutes)**

* Brief overview of Large Language Models (LLMs) and their capabilities
* Introduction of the Mixture-of-Agents (MoA) methodology and its potential to enhance LLMs
* Thesis statement: MoA can significantly improve the performance of LLMs by leveraging their collective strengths

**II. What is MoA and How Does it Work? (10-12 minutes)**

* Explanation of the MoA architecture and its components (proposers, aggregators, and layers)
* Discussion of how MoA differs from other approaches (e.g., Mixture-of-Experts)
* Use analogies or storytelling techniques to make the concept more relatable and accessible

**III. The Collaborativeness of LLMs (10-12 minutes)**

* Explanation of the collaborativeness phenomenon among LLMs
* Discussion of how MoA leverages this phenomenon to improve response quality
* Use examples or case studies to illustrate the benefit

In [53]:
script = JSON_llm(SCRIPT_PROMPT.format(extracted_details=extracted_details, outline=outline),
                    Dialogue,
                    system_prompt=SYSTEM_PROMPT)

script

{'scratchpad': "Let's explore how to present the key points engagingly: Use storytelling to introduce MoA, making it more relatable and accessible to a general audience. Create a hypothetical scenario to illustrate the benefits of collaborativeness among LLMs. Use analogies to explain complex concepts, such as the MoA architecture and its components. Explore the implications of MoA for the future of AI and its potential applications in various fields.",
 'name_of_guest': 'Dr. Rachel Kim',
 'dialogue': [{'speaker': 'Host (Jane)',
   'text': "Welcome to today's podcast, where we're exploring the exciting world of Large Language Models. I'm your host, Jane, and joining me is Dr. Rachel Kim, an expert in AI and the author of a groundbreaking paper on Mixture-of-Agents. Dr. Kim, thanks for being here!"},
  {'speaker': 'Guest',
   'text': "Thanks, Jane! I'm thrilled to share my research with your audience."},
  {'speaker': 'Host (Jane)',
   'text': "So, let's dive right in. What's this Mixtu

1. Clean and extract details `given` source text
2. Generate an outline `given` extracted information and the source text
3. Generate a script `given` the facts from step 1 and outline from step 2
4. Call Text to Speech model to generate the Podcast!

In [None]:
# Write a python function that takes a PDF file as input and returns the podcast script.
def prompt_chain_podcast_workflow(file : str):
    text = get_PDF_text(file)
    source_doc = text
    
    extracted_details = run_llm(CLEAN_EXTRACT_DETAILS.format(source_doc=source_doc), 
                            model='meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo', 
                            system_prompt=SYSTEM_PROMPT)
    
    outline = run_llm(OUTLINE_PROMPT.format(extracted_details=extracted_details, source_doc=source_doc),
                    model='meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo',
                    system_prompt=SYSTEM_PROMPT)
    
    script = JSON_llm(SCRIPT_PROMPT.format(extracted_details=extracted_details, outline=outline),
                    Dialogue,
                    system_prompt=SYSTEM_PROMPT)
    return script

In [None]:
#Generate code for a series of llm calls that will generate a podcast script given a PDF file as input.

#TO DO Replase with Together API Cartesia model usage

import subprocess
import ffmpeg

host_id = "694f9389-aac1-45b6-b726-9d9369183238" # Jane - host
guest_id = "a0e99841-438c-4a64-b679-ae501e7d6091" # Guest

model_id = "sonic-english" # The Sonic Cartesia model for English TTS

output_format = {
    "container": "raw",
    "encoding": "pcm_f32le",
    "sample_rate": 44100,
    }

# Set up a WebSocket connection.
ws = client_cartesia.tts.websocket()

# Open a file to write the raw PCM audio bytes to.
f = open("podcast.pcm", "wb")

# Generate and stream audio.
for line in script.dialogue:
    if line.speaker == "Guest":
        voice_id = guest_id
    else:
        voice_id = host_id

    for output in ws.send(
        model_id=model_id,
        transcript='-' + line.text, # the "-"" is to add a pause between speakers
        voice_id=voice_id,
        stream=True,
        output_format=output_format,
    ):
        buffer = output["audio"]  # buffer contains raw PCM audio bytes
        f.write(buffer)

# Close the connection to release resources
ws.close()
f.close()

# Convert the raw PCM bytes to a WAV file.
ffmpeg.input("podcast.pcm", format="f32le").output("podcast.wav").run()

# Play the file
subprocess.run(["ffplay", "-autoexit", "-nodisp", "podcast.wav"])

----

## 2. Routing Agentic Workflow

`Given prompt -> LLM strcutured model choice -> Call this model -> Output`

ROUTER_SYSTEM_PROMPT = "Choose optimal model"

JSON output from router -> given prompt output -> Model, reason

Given model name call that model and pass it the Model selected



In [67]:
from pydantic import BaseModel, Field
from typing import Literal

class ModelOutput(BaseModel):
    model: Literal["deepseek-ai/DeepSeek-V3", 
                   "Qwen/Qwen2.5-Coder-32B-Instruct", 
                   "Gryphe/MythoMax-L2-13b", 
                   "Qwen/QwQ-32B-Preview",
                   "meta-llama/Llama-3.3-70B-Instruct-Turbo"]
    reason: str = Field(
        description="Reason why this model was selected for the task specified in the prompt/query."
    )

In [77]:
ROUTER_SYSTEM_PROMPT = """Given a user prompt/query, select the best model from the available options to solve the task and provide a reason for your choice.
Each model has different capabilities - select the best model for the task provided:
- deepseek-ai/DeepSeek-V3: Good generic model to default to incase no better alternative is selected
- Qwen/Qwen2.5-Coder-32B-Instruct: Best for code generation tasks
- Gryphe/MythoMax-L2-13b: Best model for story-telling and role-play and fantasy tasks
- Qwen/QwQ-32B-Preview: Best model for reasoning, math and muiltistep tasks
- meta-llama/Llama-3.3-70B-Instruct-Turbo: Best model for general enterprise usecases and tasks"""

ROUTER_PROMPT = "Given a user prompt/query: {user_query}, select the best model from the available options to solve the task and provide a reason for your choice. Answer only in JSON format."

prompt = "Produce python code snippet to check to see if a number is prime or not."

selected_model = JSON_llm(ROUTER_PROMPT.format(user_query=prompt),
                            ModelOutput,
                            system_prompt=ROUTER_SYSTEM_PROMPT)

selected_model

{'model': 'Qwen/Qwen2.5-Coder-32B-Instruct',
 'reason': 'The task requires code generation, specifically a Python code snippet to check if a number is prime or not, which aligns with the capabilities of the Qwen/Qwen2.5-Coder-32B-Instruct model, making it the best option for this task.'}

In [78]:
response = run_llm(user_prompt= prompt, 
                   model = selected_model['model']
)

response

'Certainly! Below is a Python code snippet to check if a number is prime:\n\n```python\ndef is_prime(n):\n    """Check if a number is prime."""\n    if n <= 1:\n        return False\n    if n <= 3:\n        return True\n    if n % 2 == 0 or n % 3 == 0:\n        return False\n    i = 5\n    while i * i <= n:\n        if n % i == 0 or n % (i + 2) == 0:\n            return False\n        i += 6\n    return True\n\n# Example usage:\nnumber = 29\nif is_prime(number):\n    print(f"{number} is a prime number.")\nelse:\n    print(f"{number} is not a prime number.")\n```\n\n### Explanation:\n1. **Initial Checks**:\n   - Numbers less than or equal to 1 are not prime.\n   - Numbers 2 and 3 are prime.\n   - If the number is divisible by 2 or 3, it is not prime.\n\n2. **Loop through potential factors**:\n   - We start checking from 5 and increment by 6 (i.e., check 5, 11, 17, ...).\n   - For each number `i`, we check if `n` is divisible by `i` or `i + 2`.\n   - This works because all primes greater

In [79]:
# Write a function that will call the router and then the output llm model in sequence to generate a response to a user prompt.
def run_router_workflow(user_prompt : str):
    
    selected_model = JSON_llm(ROUTER_PROMPT.format(user_query=user_prompt),
                            ModelOutput,
                            system_prompt=ROUTER_SYSTEM_PROMPT)
    
    response = run_llm(user_prompt= user_prompt, 
                   model = selected_model['model']
    )
    return selected_model['model'], selected_model['reason'], response

In [80]:
model, reason, response = run_router_workflow(prompt)

print(f"Query: {prompt}")
print(20*'==')
print(f"Selected Model: {model} \n Reason: {reason}")
print(20*'==')
print(f"Response: {response}")

Query: Produce python code snippet to check to see if a number is prime or not.
Selected Model: Qwen/Qwen2.5-Coder-32B-Instruct 
 Reason: The task requires generating Python code to solve a specific problem, which aligns with Qwen2.5-Coder-32B-Instruct's capability as the best model for code generation tasks.
Response: Certainly! Below is a Python code snippet that checks if a number is prime:

```python
def is_prime(n):
    """Check if a number is prime."""
    if n <= 1:
        return False
    if n <= 3:
        return True
    if n % 2 == 0 or n % 3 == 0:
        return False
    i = 5
    while i * i <= n:
        if n % i == 0 or n % (i + 2) == 0:
            return False
        i += 6
    return True

# Example usage:
number = 29
if is_prime(number):
    print(f"{number} is a prime number.")
else:
    print(f"{number} is not a prime number.")
```

### Explanation:
1. **Initial Checks**:
   - Numbers less than or equal to 1 are not prime.
   - Numbers 2 and 3 are prime.
   - An

## 3. Parallelization

`Mixture of Agents code`

In [82]:
reference_models = [
    "microsoft/WizardLM-2-8x22B",
    "Qwen/Qwen2.5-72B-Instruct-Turbo",
    "google/gemma-2-27b-it",
    "meta-llama/Llama-3.3-70B-Instruct-Turbo",
]

user_prompt = """Tim wants to invest some money in a bank which compounds quarterly
with an annual interest rate of $7\%$. To the nearest dollar, how much money should he
invest if he wants a total of $\$60,\!000$ at the end of $5$ years?"""

# Generate intermediate reference responses
results = await asyncio.gather(*[run_llm_parallel(user_prompt=user_prompt, model=model) for model in reference_models])

  user_prompt = """Tim wants to invest some money in a bank which compounds quarterly


In [83]:
len(results)

4

In [86]:
aggregator_model = "deepseek-ai/DeepSeek-V3"

aggregator_system_prompt = """You have been provided with a set of responses from various open-source models to the latest user query.
Your task is to synthesize these responses into a single, high-quality response. It is crucial to critically evaluate the information
provided in these responses, recognizing that some of it may be biased or incorrect. Your response should not simply replicate the
given answers but should offer a refined, accurate, and comprehensive reply to the instruction. Ensure your response is well-structured,
coherent, and adheres to the highest standards of accuracy and reliability.

Responses from models:"""



In [89]:
print(aggregator_system_prompt + "\n" + "\n".join(f"{i+1}. {str(element)}" for i, element in enumerate(results)))

You have been provided with a set of responses from various open-source models to the latest user query.
Your task is to synthesize these responses into a single, high-quality response. It is crucial to critically evaluate the information
provided in these responses, recognizing that some of it may be biased or incorrect. Your response should not simply replicate the
given answers but should offer a refined, accurate, and comprehensive reply to the instruction. Ensure your response is well-structured,
coherent, and adheres to the highest standards of accuracy and reliability.

Responses from models:
1.  Let's think step by step.To determine how much money Tim should invest to have $60,000 at the end of 5 years, with a quarterly compounding interest rate of 7%, we can use the formula for compound interest:

\[ A = P \left(1 + \frac{r}{n}\right)^{nt} \]

where:
- \( A \) is the amount of money accumulated after n years, including interest.
- \( P \) is the principal amount (the initial a

In [90]:
final_output = run_llm(user_prompt=user_prompt,
                       model=aggregator_model,
                       system_prompt=aggregator_system_prompt + "\n" + "\n".join(f"{i+1}. {str(element)}" for i, element in enumerate(results)
           ))

print(final_output)

To determine how much Tim should invest to have $60,000 at the end of 5 years with an annual interest rate of 7% compounded quarterly, we use the compound interest formula:

\[
A = P \left(1 + \frac{r}{n}\right)^{nt}
\]

Where:
- \(A\) is the future amount ($60,000),
- \(P\) is the principal (the amount to invest),
- \(r\) is the annual interest rate (0.07),
- \(n\) is the number of compounding periods per year (4),
- \(t\) is the time in years (5).

We rearrange the formula to solve for \(P\):

\[
P = \frac{A}{\left(1 + \frac{r}{n}\right)^{nt}}
\]

Substituting the given values:

\[
P = \frac{60,000}{\left(1 + \frac{0.07}{4}\right)^{4 \cdot 5}}
\]

Simplify the equation step by step:

1. Calculate the quarterly interest rate:
\[
\frac{0.07}{4} = 0.0175
\]

2. Calculate the total number of compounding periods:
\[
4 \times 5 = 20
\]

3. Calculate the growth factor:
\[
\left(1 + 0.0175\right)^{20} \approx 1.414778
\]

4. Solve for \(P\):
\[
P = \frac{60,000}{1.414778} \approx 42,401.31
\

---

## 4. Orchestrator-workers

`Summarization for product descriptions`

In [98]:
from pydantic import BaseModel, Field
from typing import Literal, List

class Task(BaseModel):
    type: Literal["formal", "conversational", "hybrid"]
    description: str

class TaskList(BaseModel):
    analysis: str
    tasks: List[Task]  = Field(..., default_factory=list)

ORCHESTRATOR_PROMPT = """
Analyze this task and break it down into 2-3 distinct approaches:

Task: {task}

Provide an Analysis:

Explain your understanding of the task and which variations would be valuable.
Focus on how each approach serves different aspects of the task.


Along with the analysis, provide 2-3 approaches to tackle the task, each with a brief description:

Formal style: Write technically and precisely, focusing on detailed specifications
Conversational style: Write in a friendly and engaging way that connects with the reader
Hybrid style: Tell a story that includes technical details, combining emotional elements with specifications

Return only JSON output.
"""

WORKER_PROMPT = """
Generate content based on:
Task: {original_task}
Style: {task_type}
Guidelines: {task_description}

Return your response in this format:

<response>
Your content here, maintaining the specified style and fully addressing requirements.
</response>
"""

In [97]:
TaskList.model_json_schema()

{'$defs': {'Task': {'properties': {'type': {'enum': ['formal',
      'conversational',
      'hybrid'],
     'title': 'Type',
     'type': 'string'},
    'description': {'title': 'Description', 'type': 'string'}},
   'required': ['type', 'description'],
   'title': 'Task',
   'type': 'object'}},
 'properties': {'analysis': {'title': 'Analysis', 'type': 'string'},
  'tasks': {'items': {'$ref': '#/$defs/Task'},
   'title': 'Tasks',
   'type': 'array'}},
 'required': ['analysis'],
 'title': 'TaskList',
 'type': 'object'}

In [None]:
task = """Write a product description for a new eco-friendly water bottle. 
The target_audience is environmentally conscious millennials and key product features are: plastic-free, insulated, lifetime warranty
"""

orchestrator_response = JSON_llm(ORCHESTRATOR_PROMPT.format(task=task),
                                 schema=TaskList
                                 )

{'analysis': "The task requires writing a product description for an eco-friendly water bottle targeting environmentally conscious millennials. The key features of the product are its plastic-free composition, insulation, and lifetime warranty. To effectively cater to this audience, it's essential to consider different approaches that highlight the product's unique selling points while resonating with the target audience's values and preferences.",
 'tasks': [{'type': 'formal',
   'description': "This approach focuses on providing detailed technical specifications, emphasizing the product's eco-friendly materials, insulation properties, and warranty. It's ideal for readers seeking in-depth information about the product's features and benefits."},
  {'type': 'conversational',
   'description': "This approach takes a friendly and engaging tone, connecting with the reader on an emotional level. It highlights how the product aligns with the target audience's values, such as reducing plasti

In [121]:
# Parse orchestrator response
analysis = orchestrator_response["analysis"]
tasks= orchestrator_response["tasks"]
        
print("\n=== ORCHESTRATOR OUTPUT ===")
print(f"\nANALYSIS:\n{analysis}")
print(f"\nTASKS:\n{json.dumps(tasks, indent=2)}")


=== ORCHESTRATOR OUTPUT ===

ANALYSIS:
The task requires writing a product description for an eco-friendly water bottle targeting environmentally conscious millennials. The key features of the product are its plastic-free composition, insulation, and lifetime warranty. To effectively cater to this audience, it's essential to consider different approaches that highlight the product's unique selling points while resonating with the target audience's values and preferences.

TASKS:
[
  {
    "type": "formal",
    "description": "This approach focuses on providing detailed technical specifications, emphasizing the product's eco-friendly materials, insulation properties, and warranty. It's ideal for readers seeking in-depth information about the product's features and benefits."
  },
  {
    "type": "conversational",
    "description": "This approach takes a friendly and engaging tone, connecting with the reader on an emotional level. It highlights how the product aligns with the target au

In [115]:
# Step 2: Process each task
task = """Write a product description for a new eco-friendly water bottle. 
The target_audience is environmentally conscious millennials and key product features are: plastic-free, insulated, lifetime warranty
"""

reference_models = ["meta-llama/Llama-3.3-70B-Instruct-Turbo"]*len(tasks)

worker_response = await asyncio.gather(*[run_llm_parallel(user_prompt=WORKER_PROMPT.format(original_task=task, task_type=task_info['type'], task_description=task_info['description']), model=model) \
                                         for task_info, model in zip(tasks,reference_models)])

In [116]:
worker_response

["<response>\nIntroducing the EcoHydrate water bottle, a revolutionary, eco-friendly hydration solution designed specifically for environmentally conscious individuals. Constructed from high-quality, BPA-free stainless steel (18/8 food-grade), this bottle is completely plastic-free, aligning with the values of reducing plastic waste and promoting sustainability.\n\nThe EcoHydrate water bottle boasts an impressive insulation capability, thanks to its double-walled vacuum insulation technology. This advanced design ensures that beverages maintain their temperature for hours, whether hot or cold, making it an ideal companion for daily commutes, outdoor adventures, or fitness activities.\n\nWith a generous capacity of 27 ounces (800 ml), this water bottle is spacious enough to meet your hydration needs throughout the day, while its compact dimensions (10.5 inches tall, 3.5 inches wide) make it easily portable and convenient to carry.\n\nThe EcoHydrate water bottle has undergone rigorous te

In [117]:
# print wokrer resulst sjson dumps nicely
worker_results = []

for task_info, response in zip(tasks, worker_response):
    worker_results.append({
                "type": task_info["type"],
                "description": task_info["description"],
                "result": response
            })
    
    print(f"\n=== WORKER RESULT ({task_info['type']}) ===\n{response}\n")


=== WORKER RESULT (formal) ===
<response>
Introducing the EcoHydrate water bottle, a revolutionary, eco-friendly hydration solution designed specifically for environmentally conscious individuals. Constructed from high-quality, BPA-free stainless steel (18/8 food-grade), this bottle is completely plastic-free, aligning with the values of reducing plastic waste and promoting sustainability.

The EcoHydrate water bottle boasts an impressive insulation capability, thanks to its double-walled vacuum insulation technology. This advanced design ensures that beverages maintain their temperature for hours, whether hot or cold, making it an ideal companion for daily commutes, outdoor adventures, or fitness activities.

With a generous capacity of 27 ounces (800 ml), this water bottle is spacious enough to meet your hydration needs throughout the day, while its compact dimensions (10.5 inches tall, 3.5 inches wide) make it easily portable and convenient to carry.

The EcoHydrate water bottle ha

## 5. Evaluator-optimizer

`Code generation and evaluation in a loop`

In [130]:
class Evaluation(BaseModel):
    evaluation: Literal["PASS", "NEEDS_IMPROVEMENT", "FAIL"]
    feedback: str

evaluator_prompt = """
Evaluate this following code implementation for:
1. code correctness
2. time complexity
3. style and best practices

You should be evaluating only and not attemping to solve the task.

Only output "PASS" if all criteria are met and you have no further suggestions for improvements.

Provide detailed feedback if there are areas that need improvement. You should specify what needs improvement and why.

Only output JSON.
"""

generator_prompt = """
Your goal is to complete the task based on <user input>. If there are feedback 
from your previous generations, you should reflect on them to improve your solution

Output your answer concisely in the following format: 

Thoughts:
[Your understanding of the task and feedback and how you plan to improve]

Response:
[Your code implementation here]
"""

task = """
Implement a Stack with:
1. push(x)
2. pop()
3. getMin()
All operations should be O(1).
"""

In [131]:
def generate(prompt: str, task: str, context: str = "") -> tuple[str, str]:
    """Generate and improve a solution based on feedback."""
    full_prompt = f"{prompt}\n{context}\nTask: {task}" if context else f"{prompt}\nTask: {task}"
    response = run_llm(full_prompt, model="Qwen/Qwen2.5-Coder-32B-Instruct")
    
    print("\n=== GENERATION START ===")
    print(f"Output:\n{response}\n")
    print("=== GENERATION END ===\n")
    
    return response

def evaluate(prompt: str, content: str, task: str, schema) -> tuple[str, str]:
    """Evaluate if a solution meets requirements."""
    full_prompt = f"{prompt}\nOriginal task: {task}\nContent to evaluate: {content}"
    response = JSON_llm(full_prompt, schema)
    evaluation = response["evaluation"]
    feedback = response["feedback"]

    print("=== EVALUATION START ===")
    print(f"Status: {evaluation}")
    print(f"Feedback: {feedback}")
    print("=== EVALUATION END ===\n")

    return evaluation, feedback



In [132]:
def loop(task: str, evaluator_prompt: str, generator_prompt: str) -> tuple[str, list[dict]]:
    """Keep generating and evaluating until requirements are met."""
    memory = []
    
    response = generate(generator_prompt, task)
    memory.append(response)

    while True:
        evaluation, feedback = evaluate(evaluator_prompt, response, task, Evaluation)
        if evaluation == "PASS":
            return response
            
        context = "\n".join([
            "Previous attempts:",
            *[f"- {m}" for m in memory],
            f"\nFeedback: {feedback}"
        ])
        
        response = generate(generator_prompt, task, context)
        memory.append(response)

In [133]:
loop(task, evaluator_prompt, generator_prompt)


=== GENERATION START ===
Output:
Thoughts:
To implement a stack with all operations (push, pop, getMin) in O(1) time complexity, we need to use an auxiliary data structure to keep track of the minimum values. A common approach is to use a second stack that stores the minimum values alongside the main stack. This way, every time we push a new element, we also push the current minimum onto the auxiliary stack. When we pop an element, we pop from both stacks. The top of the auxiliary stack will always be the minimum element of the main stack.

Response:
```python
class MinStack:
    def __init__(self):
        self.stack = []
        self.min_stack = []

    def push(self, x: int) -> None:
        self.stack.append(x)
        if not self.min_stack or x <= self.min_stack[-1]:
            self.min_stack.append(x)

    def pop(self) -> None:
        if self.stack:
            if self.stack[-1] == self.min_stack[-1]:
                self.min_stack.pop()
            self.stack.pop()

    def 

'Thoughts:\nBased on the feedback, I will improve the code by adding docstrings, type hints, and more descriptive variable names. Additionally, I will raise exceptions instead of returning `None` for the `getMin` method when the stack is empty.\n\nResponse:\n```python\nclass MinStack:\n    def __init__(self):\n        self.stack = []\n        self.min_stack = []\n\n    def push(self, value: int) -> None:\n        """Push a new element onto the stack."""\n        self.stack.append(value)\n        if not self.min_stack or value <= self.min_stack[-1]:\n            self.min_stack.append(value)\n\n    def pop(self) -> None:\n        """Remove the top element from the stack."""\n        if self.stack:\n            if self.stack[-1] == self.min_stack[-1]:\n                self.min_stack.pop()\n            self.stack.pop()\n        else:\n            raise IndexError("pop from empty stack")\n\n    def getMin(self) -> int:\n        """Return the minimum element in the stack."""\n        if self