In [6]:
import dspy
import asyncio
import time

dspy.configure(lm=dspy.LM("openai/gpt-4o-mini", cache=False))
predict = dspy.Predict("question->short_answer")

async def get_prediction(question):
    start_time = time.time()
    output = await predict.acall(question=question)
    elapsed = time.time() - start_time
    return output, elapsed

# List of different questions to ask in parallel
questions = [
    "why did a chicken cross the kitchen?",
    "what is the meaning of life?",
    "how does quantum computing work?",
    "what's the best way to learn programming?",
    "what are the ingredients of chocolate chip pancakes?"
]

# Run predictions in parallel
async def run_parallel_predictions():
    start_time = time.time()
    tasks = [get_prediction(q) for q in questions]
    results = await asyncio.gather(*tasks)
    total_time = time.time() - start_time
    return results, total_time

# Use await directly in Jupyter
results, total_time = await run_parallel_predictions()

# Print results
print(f"Total time for all {len(questions)} questions: {total_time:.2f} seconds\n")
for i, ((output, time_taken), question) in enumerate(zip(results, questions)):
    print(f"Question {i+1}: {question}")
    print(f"Answer: {output.short_answer}")
    print(f"Time: {time_taken:.2f} seconds\n")

# For comparison, run the same questions sequentially
async def run_sequential_predictions():
    start_time = time.time()
    results = []
    for q in questions:
        result = await get_prediction(q)
        results.append(result)
    total_time = time.time() - start_time
    return results, total_time

print("\n--- SEQUENTIAL EXECUTION FOR COMPARISON ---\n")
seq_results, seq_total_time = await run_sequential_predictions()
print(f"Total sequential time: {seq_total_time:.2f} seconds")
print(f"Speedup from parallelization: {seq_total_time/total_time:.2f}x")

Total time for all 5 questions: 1.50 seconds

Question 1: why did a chicken cross the kitchen?
Answer: To get to the other side of the dining room.
Time: 0.87 seconds

Question 2: what is the meaning of life?
Answer: The meaning of life is a philosophical question that varies for each individual, often encompassing personal fulfillment, relationships, and the pursuit of happiness or purpose.
Time: 1.07 seconds

Question 3: how does quantum computing work?
Answer: Quantum computing works by utilizing the principles of quantum mechanics, specifically superposition and entanglement. Unlike classical bits that represent either a 0 or a 1, quantum bits (qubits) can exist in multiple states simultaneously. This allows quantum computers to perform complex calculations at much higher speeds than classical computers by processing a vast amount of possibilities at once.
Time: 1.50 seconds

Question 4: what's the best way to learn programming?
Answer: The best way to learn programming is to start

In [7]:
questions = [
    "why did a chicken cross the kitchen?",
    "what is the meaning of life?",
    "how does quantum computing work?",
    "what's the best way to learn programming?",
    "explain the theory of relativity"
]

# Run predictions in parallel
async def run_parallel_predictions():
    start_time = time.time()
    tasks = [get_prediction(q) for q in questions]
    results = await asyncio.gather(*tasks)
    total_time = time.time() - start_time
    return results, total_time

# Use await directly in Jupyter
results, total_time = await run_parallel_predictions()

# Print results
print(f"Total time for all {len(questions)} questions: {total_time:.2f} seconds\n")
for i, ((output, time_taken), question) in enumerate(zip(results, questions)):
    print(f"Question {i+1}: {question}")
    print(f"Answer: {output.short_answer}")
    print(f"Time: {time_taken:.2f} seconds\n")

# For comparison, run the same questions sequentially
async def run_sequential_predictions():
    start_time = time.time()
    results = []
    for q in questions:
        result = await get_prediction(q)
        results.append(result)
    total_time = time.time() - start_time
    return results, total_time

print("\n--- SEQUENTIAL EXECUTION FOR COMPARISON ---\n")
seq_results, seq_total_time = await run_sequential_predictions()
print(f"Total sequential time: {seq_total_time:.2f} seconds")
print(f"Speedup from parallelization: {seq_total_time/total_time:.2f}x")

Total time for all 5 questions: 3.51 seconds

Question 1: why did a chicken cross the kitchen?
Answer: To get to the other side of the meal prep!
Time: 0.95 seconds

Question 2: what is the meaning of life?
Answer: The meaning of life is a philosophical question that varies for each individual, often encompassing personal fulfillment, relationships, and the pursuit of happiness or purpose.
Time: 1.07 seconds

Question 3: how does quantum computing work?
Answer: Quantum computing works by utilizing the principles of quantum mechanics, specifically superposition and entanglement, to process information. Unlike classical bits that represent either a 0 or a 1, quantum bits (qubits) can exist in multiple states simultaneously. This allows quantum computers to perform complex calculations at much higher speeds than classical computers by exploring many possibilities at once.
Time: 1.84 seconds

Question 4: what's the best way to learn programming?
Answer: The best way to learn programming is