In [None]:
# Initialization code that runs before all other cells
import subprocess
subprocess.run(["uv", "pip", "install", "dspy"])
subprocess.run(["uv", "pip", "install", "funnydspy"])
subprocess.run(["uv", "pip", "install", "git+https://github.com/hendrycks/math.git"])

Audited 1 package in 6ms
Audited 1 package in 5ms


Resolved 1 package in 1.06s
Audited 1 package in 0.05ms


In [None]:
import marimo as mo

In [None]:
import funnydspy as fd
import dspy
from dataclasses import dataclass
from typing import List, NamedTuple


In [None]:
# Configure your language model, using environment variables OPENAI_API_BASE and OPENAI_API_KEY
import os

dspy.configure(lm=dspy.LM(f"openai/{os.environ['OPENAI_API_MODEL']}"))

@fd.ChainOfThought
def rag(query: str, context: str) -> str:
    return answer

# Get Python objects directly
answer = rag("What is the capital of France?", "France is a country in Europe.")
# → "The capital of France is Paris."

# Get DSPy Prediction for optimization
pred1 = rag(
    "What is the capital of France?",
    "France is a country in Europe.",
    _prediction=True,
)
# → dspy.Prediction(reasoning="...", answer="The capital of France is Paris.")

In [None]:
answer

In [None]:
pred1

In [None]:
@fd.ChainOfThought
def analyze(numbers: list[float], threshold: float) -> tuple[float, list[float]]:
    """Analyze numbers and return statistics."""

    class Stats(NamedTuple):
        mean: float  # The average of the numbers
        above: list[float]  # Numbers above the threshold

    return Stats

# Get Python objects directly
mean_val, above_vals = analyze([1, 5, 3, 8, 2], 4.0)
# → (4.0, [5.0, 8.0])

# Get DSPy Prediction for optimization
pred2 = analyze([1, 5, 3, 8, 2], 4.0, _prediction=True)
# → dspy.Prediction(reasoning="...", mean=4.0, above=[5.0, 8.0])

In [None]:
pred2

In [None]:
@fd.ChainOfThought
def summarize_text(text: str) -> tuple[str, int, List[str]]:
    """Summarize text and extract key information."""
    summary = "A concise summary of the text"
    word_count = "Total number of words"
    key_points = "List of main points"
    return summary, word_count, key_points

summary, count, points = (
    summarize_text("""Modules help you describe AI behavior as code, not strings.
To build reliable AI systems, you must iterate fast. But maintaining prompts makes that hard: it forces you to tinker with strings or data every time you change your LM, metrics, or pipeline. Having built over a dozen best-in-class compound LM systems since 2020, we learned this the hard way—and so built DSPy to decouple AI system design from messy incidental choices about specific LMs or prompting strategies.""")
)

In [None]:
print(summary)

The text explains that maintaining prompts in AI systems is inefficient and introduces DSPy, a tool designed to separate AI system design from specific LM or prompting choices, enabling faster iteration and more reliable systems.


In [None]:
from dspy.datasets import MATH

dataset = MATH(subset="algebra")

In [None]:
@fd.Predict
def analyze_data(question: str) -> str:
    """Work on the math problem and give an answer"""
    answer = "the answer to the math problem"
    return answer


In [None]:
dataset.train[0]

In [None]:
dataset.train[10:13]

In [None]:
# Access the underlying DSPy module for optimization
optimizer = dspy.SIMBA(
    metric=dataset.metric, bsize=4, num_candidates=4, max_steps=1, max_demos=4
)
compiled_analyze = optimizer.compile(
    analyze_data.module, trainset=dataset.train[10:14]
)

2025/06/29 21:38:24 INFO dspy.teleprompt.simba: Starting batch 1 of 1.
2025/06/29 21:38:24 INFO dspy.teleprompt.simba: Sampling program trajectories on 4 examples x 4 samples.


  0%|                                                                                                                                                  | 0/16 [00:00<?, ?it/s]Processed 1 / 16 examples:   0%|                                                                                                                       | 0/16 [00:00<?, ?it/s]Processed 2 / 16 examples:   6%|██████▉                                                                                                       | 1/16 [00:00<00:00, 236.47it/s]Processed 3 / 16 examples:  12%|█████████████▊                                                                                                | 2/16 [00:00<00:00, 337.39it/s]Processed 4 / 16 examples:  19%|████████████████████▋                                                                                         | 3/16 [00:00<00:00, 500.02it/s]

Processed 5 / 16 examples:  25%|███████████████████████████▌                                                                                  | 4/16 [00:00<00:00, 409.26it/s]Processed 6 / 16 examples:  31%|██████████████████████████████████▍                                                                           | 5/16 [00:00<00:00, 466.65it/s]Processed 7 / 16 examples:  38%|█████████████████████████████████████████▎                                                                    | 6/16 [00:00<00:00, 535.58it/s]Processed 8 / 16 examples:  44%|████████████████████████████████████████████████▏                                                             | 7/16 [00:00<00:00, 582.75it/s]Processed 9 / 16 examples:  50%|███████████████████████████████████████████████████████                                                       | 8/16 [00:00<00:00, 640.73it/s]Processed 10 / 16 examples:  56%|█████████████████████████████████████████████████████████████▎                             

2025/06/29 21:38:24 INFO dspy.teleprompt.simba: Batch 1: Baseline mini-batch score: 0.5

2025/06/29 21:38:24 INFO dspy.teleprompt.simba: Batch 1: Processing bucket #1, with max score 1.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/06/29 21:38:24 INFO dspy.teleprompt.simba: Batch 1: Invoking strategy: append_a_rule


2025/06/29 21:38:24 INFO dspy.teleprompt.simba_utils: Advice for self: When receiving a math problem involving absolute values and inequalities like '$|ax+b|=c$ and $x<d$', you should: 1) Solve both equations $ax+b=c$ and $ax+b=-c$, 2) Check which solution satisfies the inequality $x<d$, and 3) Always return the correct solution that meets all conditions. Make sure to always produce an output, even if you need to show your reasoning steps to arrive at the answer.
2025/06/29 21:38:24 INFO dspy.teleprompt.simba: 

2025/06/29 21:38:24 INFO dspy.teleprompt.simba: Batch 1: Processing bucket #2, with max score 1.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/06/29 21:38:24 INFO dspy.teleprompt.simba: Batch 1: Invoking strategy: append_a_demo_
2025/06/29 21:38:24 INFO dspy.teleprompt.simba_utils: Added 1 demos (one each) across all predictors.
2025/06/29 21:38:24 INFO dspy.teleprompt.simba: 

2025/06/29 21:38:24 INFO dspy.teleprompt.simba: Batch 1: Processing bucket #3, with max score 0.

2025/06/29 21:38:37 INFO dspy.teleprompt.simba_utils: Advice for self: When given a question about the sum of the reciprocals of the roots of an equation, first rewrite the equation in the standard quadratic form \(ax^2 + bx + c = 0\). Then, use Vieta's formulas to find the sum \(r + s = -b/a\) and product \(rs = c/a\) of the roots. Finally, compute the sum of the reciprocals as \((r + s)/(rs)\). Avoid skipping steps or misapplying algebraic transformations, as this can lead to incorrect answers.
2025/06/29 21:38:37 INFO dspy.teleprompt.simba: 

2025/06/29 21:38:37 INFO dspy.teleprompt.simba: Batch 1: Processing bucket #4, with max score 0.0, max-to-min gap 0.0, and max-to-avg gap 0.0.
2025/06/29 21:38:37 INFO dspy.teleprompt.simba: Batch 1: Invoking strategy: append_a_demo_
2025/06/29 21:38:37 INFO dspy.teleprompt.simba_utils: Added 1 demos (one each) across all predictors.
2025/06/29 21:38:37 INFO dspy.teleprompt.simba: 

2025/06/29 21:38:37 INFO dspy.teleprompt.simba: Batch 1: Evalu

  0%|                                                                                                                                                  | 0/16 [00:00<?, ?it/s]

Processed 1 / 16 examples:   0%|                                                                                                                       | 0/16 [00:02<?, ?it/s]Processed 1 / 16 examples:   6%|██████▉                                                                                                        | 1/16 [00:02<00:42,  2.83s/it]

Processed 2 / 16 examples:   6%|██████▉                                                                                                        | 1/16 [00:03<00:42,  2.83s/it]Processed 2 / 16 examples:  12%|█████████████▉                                                                                                 | 2/16 [00:03<00:18,  1.30s/it]

Processed 3 / 16 examples:  12%|█████████████▉                                                                                                 | 2/16 [00:03<00:18,  1.30s/it]

Processed 4 / 16 examples:  19%|████████████████████▊                                                                                          | 3/16 [00:03<00:16,  1.30s/it]Processed 4 / 16 examples:  25%|███████████████████████████▊                                                                                   | 4/16 [00:03<00:07,  1.57it/s]

Processed 5 / 16 examples:  25%|███████████████████████████▊                                                                                   | 4/16 [00:04<00:07,  1.57it/s]Processed 5 / 16 examples:  31%|██████████████████████████████████▋                                                                            | 5/16 [00:04<00:07,  1.57it/s]

Processed 6 / 16 examples:  31%|██████████████████████████████████▋                                                                            | 5/16 [00:04<00:07,  1.57it/s]

Processed 7 / 16 examples:  38%|█████████████████████████████████████████▋                                                                     | 6/16 [00:04<00:06,  1.57it/s]Processed 7 / 16 examples:  44%|████████████████████████████████████████████████▌                                                              | 7/16 [00:04<00:03,  2.37it/s]

Processed 8 / 16 examples:  44%|████████████████████████████████████████████████▌                                                              | 7/16 [00:12<00:03,  2.37it/s]Processed 8 / 16 examples:  50%|███████████████████████████████████████████████████████▌                                                       | 8/16 [00:12<00:17,  2.21s/it]

Processed 9 / 16 examples:  50%|███████████████████████████████████████████████████████▌                                                       | 8/16 [00:12<00:17,  2.21s/it]Processed 9 / 16 examples:  56%|██████████████████████████████████████████████████████████████▍                                                | 9/16 [00:12<00:12,  1.81s/it]

Processed 10 / 16 examples:  56%|█████████████████████████████████████████████████████████████▉                                                | 9/16 [00:12<00:12,  1.81s/it]Processed 10 / 16 examples:  62%|████████████████████████████████████████████████████████████████████▏                                        | 10/16 [00:12<00:08,  1.37s/it]

Processed 11 / 16 examples:  62%|████████████████████████████████████████████████████████████████████▏                                        | 10/16 [00:16<00:08,  1.37s/it]Processed 11 / 16 examples:  69%|██████████████████████████████████████████████████████████████████████████▉                                  | 11/16 [00:16<00:10,  2.11s/it]

Processed 12 / 16 examples:  69%|██████████████████████████████████████████████████████████████████████████▉                                  | 11/16 [00:18<00:10,  2.11s/it]Processed 12 / 16 examples:  75%|█████████████████████████████████████████████████████████████████████████████████▊                           | 12/16 [00:18<00:07,  1.96s/it]

Processed 13 / 16 examples:  75%|█████████████████████████████████████████████████████████████████████████████████▊                           | 12/16 [00:19<00:07,  1.96s/it]Processed 13 / 16 examples:  81%|████████████████████████████████████████████████████████████████████████████████████████▌                    | 13/16 [00:19<00:05,  1.76s/it]

Processed 14 / 16 examples:  81%|████████████████████████████████████████████████████████████████████████████████████████▌                    | 13/16 [00:20<00:05,  1.76s/it]Processed 14 / 16 examples:  88%|███████████████████████████████████████████████████████████████████████████████████████████████▍             | 14/16 [00:20<00:02,  1.29s/it]

Processed 15 / 16 examples:  88%|███████████████████████████████████████████████████████████████████████████████████████████████▍             | 14/16 [00:21<00:02,  1.29s/it]Processed 15 / 16 examples:  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████▏      | 15/16 [00:21<00:01,  1.32s/it]

Processed 16 / 16 examples:  94%|██████████████████████████████████████████████████████████████████████████████████████████████████████▏      | 15/16 [00:27<00:01,  1.32s/it]Processed 16 / 16 examples: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:27<00:00,  2.86s/it]Processed 16 / 16 examples: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:27<00:00,  1.75s/it]


2025/06/29 21:39:05 INFO dspy.teleprompt.simba: Scores after 1 batches: [0.25, 0.75, 0.5, 0.0], Best: 0.75

2025/06/29 21:39:05 INFO dspy.teleprompt.simba: VALIDATION: Evaluating 2 programs on the full trainset.


  0%|                                                                                                                                                   | 0/8 [00:00<?, ?it/s]Processed 1 / 8 examples:   0%|                                                                                                                         | 0/8 [00:00<?, ?it/s]Processed 2 / 8 examples:  12%|█████████████▉                                                                                                 | 1/8 [00:00<00:00, 6584.46it/s]Processed 3 / 8 examples:  25%|███████████████████████████▊                                                                                   | 2/8 [00:00<00:00, 7358.43it/s]Processed 4 / 8 examples:  38%|█████████████████████████████████████████▋                                                                     | 3/8 [00:00<00:00, 7984.08it/s]Processed 5 / 8 examples:  50%|███████████████████████████████████████████████████████▌                                     

Processed 8 / 8 examples:  88%|█████████████████████████████████████████████████████████████████████████████████████████████████▏             | 7/8 [00:00<00:00, 6087.52it/s]Processed 8 / 8 examples: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████| 8/8 [00:00<00:00, 5776.28it/s]


2025/06/29 21:39:05 INFO dspy.teleprompt.simba: Final trainset scores: [0.5, 0.75], Best: 0.75 (at index 1)





In [None]:
# Wrap the optimized module back into a Pythonic interface
analyze_optimized = fd.funnier(compiled_analyze)

In [None]:
compiled_analyze

In [None]:
for q in dataset.train[20:24]:
    print(
        f"Ground truth: {q['answer']}\nUnoptimized answer:\n{analyze_data(q['question'])}\nOptimized answer:\n{analyze_optimized(q['question'])}\n--------------\n"
    )

Ground truth: [0,1)
Unoptimized answer:
To determine the domain of the function \( q(x) = \frac{\sqrt{x}}{\sqrt{1-x^2}} \), we need to ensure that both the numerator and the denominator are defined and that the denominator is not zero.

1. **Numerator \(\sqrt{x}\):** The expression under the square root must be non-negative:
   \[
   x \geq 0
   \]

2. **Denominator \(\sqrt{1-x^2}\):** The expression under the square root must be positive (since division by zero is not allowed):
   \[
   1 - x^2 > 0
   \]
   This inequality can be rewritten as:
   \[
   x^2 < 1
   \]
   Which implies:
   \[
   -1 < x < 1
   \]

Combining these two conditions, we have:
\[
x \geq 0 \quad \text{and} \quad -1 < x < 1
\]
This simplifies to:
\[
0 \leq x < 1
\]

Therefore, the domain of \( q(x) \) is the interval \([0, 1)\).

\[
\boxed{[0, 1)}
\]
Optimized answer:
(0, 1)
--------------

Ground truth: 2015
Unoptimized answer:
To solve the problem, we first analyze the given condition:

\[ T(b+1) - T(b) = T(x) 

In [None]:
compiled_analyze.save("output/analyze_optimized.json")

In [None]:
# inspect the content of output/analyze_optimized.json
import json

with open("output/analyze_optimized.json", "r") as file:
    data = json.load(file)
    print(data)

{'traces': [], 'train': [], 'demos': [{'augmented': True, 'question': 'What is the smallest two-digit positive integer such that the product of its two digits is one-half the integer?', 'answer': '36'}], 'signature': {'instructions': 'Work on the math problem and give an answer', 'fields': [{'prefix': 'Question:', 'description': None}, {'prefix': 'Answer:', 'description': 'the answer to the math problem'}]}, 'lm': None, 'metadata': {'dependency_versions': {'python': '3.12', 'dspy': '2.6.27', 'cloudpickle': '3.1'}}}


In [None]:
devset = dataset.dev[20:24]
evaluate = dspy.Evaluate(devset=devset, metric=dataset.metric, num_threads=4, display_progress=True, display_table=0, max_errors=999)
evaluate(compiled_analyze)

  0%|                                                                                                                                                   | 0/4 [00:00<?, ?it/s]

Average Metric: 0.00 / 1 (0.0%):   0%|                                                                                                                  | 0/4 [00:01<?, ?it/s]Average Metric: 0.00 / 1 (0.0%):  25%|██████████████████████████▌                                                                               | 1/4 [00:01<00:03,  1.09s/it]

Average Metric: 1.00 / 2 (50.0%):  25%|██████████████████████████▎                                                                              | 1/4 [00:01<00:03,  1.09s/it]Average Metric: 1.00 / 2 (50.0%):  50%|████████████████████████████████████████████████████▌                                                    | 2/4 [00:01<00:01,  1.51it/s]

Average Metric: 2.00 / 3 (66.7%):  50%|████████████████████████████████████████████████████▌                                                    | 2/4 [00:01<00:01,  1.51it/s]Average Metric: 2.00 / 3 (66.7%):  75%|██████████████████████████████████████████████████████████████████████████████▊                          | 3/4 [00:01<00:00,  2.11it/s]

Average Metric: 2.00 / 4 (50.0%):  75%|██████████████████████████████████████████████████████████████████████████████▊                          | 3/4 [00:02<00:00,  2.11it/s]Average Metric: 2.00 / 4 (50.0%): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:02<00:00,  2.45it/s]Average Metric: 2.00 / 4 (50.0%): 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 4/4 [00:02<00:00,  1.99it/s]


2025/06/29 21:41:58 INFO dspy.evaluate.evaluate: Average Metric: 2 / 4 (50.0%)
