In [1]:
from textwrap import dedent
from typing import Literal

import dspy
import ujson
from dspy.datasets import DataLoader

In [4]:
lm = dspy.LM(
    "ollama_chat/llama3.2",
    api_base="http://localhost:11434",
    api_key="ollama",
)
dspy.configure(lm=lm)

In [5]:
class FEL0(dspy.Signature):
    """Extract layer1 or layer2 from the input text."""

    text: str = dspy.InputField(
        desc=dedent(
            """The query text about features in one layer or two layers.
        A layer is a collection of features.
        A feature has a shape (point, linestring, polygon) and attributes.
        """
        ).strip(),
    )
    layer1: str = dspy.OutputField(
        desc="The name of the first layer.",
    )
    layer2: str = dspy.OutputField(
        desc="The name of the second layer if applicable or 'none'.",
    )

In [6]:
matcher = dspy.Predict(FEL0)

In [7]:
def metric_fel0(
    data: FEL0,
    pred: FEL0,
    trace=None,
):
    return data.layer1 == pred.layer1 and data.layer2 == pred.layer2

In [8]:
with open("/Users/mraad/data/NorthSea0.json", mode="r", encoding="utf-8") as fp:
    docs = ujson.load(fp)

In [9]:
# dl = DataLoader()

In [10]:
# dl.from_json("/Users/mraad/data/NorthSea0.json", input_keys=("text"))

In [11]:
# dspy.Example(dict(foo="bar")).with_inputs("foo")

In [12]:
def yield_example():
    for doc in docs:
        text = doc["text"]
        layer1 = doc["meta"]["layer1"]
        layer2 = doc["meta"]["layer2"] or "none"
        yield dspy.Example(dict(text=text, layer1=layer1, layer2=layer2)).with_inputs("text")


trainset = [_ for _ in yield_example()]

In [None]:
trainset[:5]

In [None]:
compiler = dspy.SIMBA(
    metric=metric_fel0,
    max_steps=10,
    max_demos=10,
)
compiled = compiler.compile(matcher, trainset=trainset)