## 4.3 The Traditional NLP Approach

### dspy module

In [1]:
import dspy
from sentence_transformers import SentenceTransformer
from transformers import pipeline
lm = dspy.LM('xai/grok-3-mini', max_tokens=6000, temperature=0.1, top_p=0.9)
dspy.configure(lm=lm)

In [2]:
class RAG(dspy.Module):
    def __init__(self, make_search):
        self.make_search = make_search
        self.respond = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')

    def forward(self, topic, question, literal, pragmatic):
        search = self.make_search(topic, literal, pragmatic)
        context = " ".join(search(question))
        output = self.respond(context=context, question=question)
        return dspy.Prediction(response=output['answer'])

### test set as list of `dspy.Example`

In [3]:
import json
import os
def get_conversations_set(filename, dataset_dir="../PragmatiCQA/data"):
    conversations = []
    with open(os.path.join(dataset_dir, filename), 'r') as f:
        for line in f:
            conversations.append(json.loads(line))
    return [dspy.Example(topic=d['community'],
                        question=d['qas'][0]['q'], 
                        response=d['qas'][0]['a'],
                        literal=[text['text'] for text in d['qas'][0]['a_meta']['literal_obj']],
                        pragmatic=[text['text'] for text in d['qas'][0]['a_meta']['pragmatic_obj']]).with_inputs('question', 'topic', 'literal', 'pragmatic') for d in conversations]

pcqa_test = get_conversations_set("test.jsonl")

### Evaluation of a model based on example set

In [4]:
from dspy.evaluate import SemanticF1, Evaluate
import pickle
import os
def evaluate(model, example_set, save_path):
    metric = SemanticF1(decompositional=True)

    evaluator = Evaluate(devset=example_set, metric=metric, num_threads=1, display_progress=True, display_table=True, provide_feedback=True)
    results = None
    if os.path.exists(save_path):
        results = pickle.load(save_path)
    else:
        results = evaluator(model)
        with open(save_path, "wb") as f:
            pickle.dump(results, f)

    return results

### Getting-Context Methods

In [5]:
# Traverse a directory and read html files - extract text from the html files
from bs4 import BeautifulSoup
import os
def read_html_files(dir_name, directory="../PragmatiCQA-sources"):
    texts = []
    for filename in os.listdir(os.path.join(directory, dir_name)):
        if filename.endswith(".html"):
            with open(os.path.join(directory, dir_name, filename), 'r', encoding='utf-8') as file:
                soup = BeautifulSoup(file, 'html.parser')
                texts.append(soup.get_text())
    return texts

model = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1", device="cpu")
embedder = dspy.Embedder(model.encode)
def make_search(topic=None, literal=None, pragmatic=None, k=5):
    corpus = read_html_files(topic)
    max_characters = 10000 
    search = dspy.retrievers.Embeddings(embedder=embedder, corpus=corpus, k=k, brute_force_threshold=max_characters)
    return lambda q: search(q).passages


def get_literal(topic, literal, pragmatic):
    return lambda q: literal

def get_pragmatic(topic, literal, pragmatic):
    return lambda q: pragmatic

### Retrieved Answer

In [9]:
evaluate(RAG(make_search), pcqa_test, "eval_results_distilbert.pkl")

Device set to use cpu


Average Metric: 22.34 / 213 (10.5%): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [2:09:47<00:00, 36.56s/it]

2025/08/24 23:21:18 INFO dspy.evaluate.evaluate: Average Metric: 22.341969801856195 / 213 (10.5%)





Unnamed: 0,topic,question,example_response,literal,pragmatic,pred_response,SemanticF1
0,The Legend of Zelda,What year did the Legend of Zelda come out?,The Legend of Zelda came out as early as 1986 for the Famicom in J...,"['FDS release February 21, 1986\n', 'The Legend of Zelda is the fi...","['It came out as early as 1986 for the Famicom in Japan, and was l...",1986,✔️ [0.500]
1,The Legend of Zelda,What console is The Legend of Zelda designed for?,The Legend of Zelda was originally released in 1986 for the Famico...,"[' It came out as early as 1986 for the Famicom in Japan, and was ...","['The Legend of Zelda was commercially successful, selling a milli...",Famicom\n Disk System,✔️ [0.286]
2,The Legend of Zelda,when did the legend of zelda last until?,The Legend of Zelda is the first installment in the Zelda franchis...,['The Legend of Zelda is the first installment in the Zelda franch...,"[Nintendo Switch Online, April 23, 2019]","June 19, 2011",
3,The Legend of Zelda,When was the Legend of Zelda released?,"The Legend of Zelda was released on August 22nd, 1987, for the Nin...","[August 22, 1987]","['It came out as early as 1986 for the Famicom in Japan, and was l...",one hundred years before the events of\n \n Breath of the Wild,
4,The Legend of Zelda,What kind of game is The Legend of Zelda?,"The Legend of Zelda is one that includes roleplaying, action, adve...",[The Legend of Zelda is the first installment of the Zelda series....,"[one that includes roleplaying, action, adventure, and puzzle/logi...",multiplayer,
...,...,...,...,...,...,...,...
208,Baseball,How many times have the new york yankees been to the world series?,"48 times, which is a record actually. Indeed, They have won the wo...",[ 48],"[winning 27 World Series titles, Their 27 titles makes them the mo...",twelve,
209,Baseball,Are the new york yankees based in new york?,"Yes, the New York Yankees now play in New York City but were found...","[The New York Yankees are a Major League Baseball team, based in t...","[The club was founded in Baltimore , Maryland in 1901, and moved t...",New York Yankees\n \n\n\n\n\n One East 161st Street\n ...,✔️ [0.500]
210,Baseball,What sports league does the new York Yankees play in?,The Yankees play in Major League Baseball in the American League a...,[Major League Baseball],"[American League, They are also the only team represented in the B...",Major League Baseball,✔️ [0.500]
211,Baseball,who is the all time leader inh rbi's for the yankees?,Lou Gehrig is the all time leader in runs batted in for the Yankee...,[Lou Gehrig],"['Gehrig finished his career with a .340 AVG, 1,995 RBI', ""During ...",Hank Aaron,


10.49

### Literal Answer

In [14]:
evaluate(RAG(get_literal), pcqa_test, "eval_results_literal.pkl")

Device set to use cpu


Average Metric: 86.80 / 213 (40.8%): 100%|████████████████████████████████████████████| 213/213 [34:46<00:00,  9.80s/it]

2025/08/25 00:01:53 INFO dspy.evaluate.evaluate: Average Metric: 86.79994056425019 / 213 (40.8%)





Unnamed: 0,topic,question,example_response,literal,pragmatic,pred_response,SemanticF1
0,The Legend of Zelda,What year did the Legend of Zelda come out?,The Legend of Zelda came out as early as 1986 for the Famicom in J...,"['FDS release February 21, 1986\n', 'The Legend of Zelda is the fi...","['It came out as early as 1986 for the Famicom in Japan, and was l...",1986,✔️ [0.500]
1,The Legend of Zelda,What console is The Legend of Zelda designed for?,The Legend of Zelda was originally released in 1986 for the Famico...,"[' It came out as early as 1986 for the Famicom in Japan, and was ...","['The Legend of Zelda was commercially successful, selling a milli...",Famicom,✔️ [0.250]
2,The Legend of Zelda,when did the legend of zelda last until?,The Legend of Zelda is the first installment in the Zelda franchis...,['The Legend of Zelda is the first installment in the Zelda franch...,"[Nintendo Switch Online, April 23, 2019]",first installment in the Zelda franchise,✔️ [0.333]
3,The Legend of Zelda,When was the Legend of Zelda released?,"The Legend of Zelda was released on August 22nd, 1987, for the Nin...","[August 22, 1987]","['It came out as early as 1986 for the Famicom in Japan, and was l...","August 22, 1987",✔️ [0.667]
4,The Legend of Zelda,What kind of game is The Legend of Zelda?,"The Legend of Zelda is one that includes roleplaying, action, adve...",[The Legend of Zelda is the first installment of the Zelda series....,"[one that includes roleplaying, action, adventure, and puzzle/logi...",Zelda,✔️ [0.286]
...,...,...,...,...,...,...,...
208,Baseball,How many times have the new york yankees been to the world series?,"48 times, which is a record actually. Indeed, They have won the wo...",[ 48],"[winning 27 World Series titles, Their 27 titles makes them the mo...",48,✔️ [0.400]
209,Baseball,Are the new york yankees based in new york?,"Yes, the New York Yankees now play in New York City but were found...","[The New York Yankees are a Major League Baseball team, based in t...","[The club was founded in Baltimore , Maryland in 1901, and moved t...",The New York Yankees,✔️ [0.500]
210,Baseball,What sports league does the new York Yankees play in?,The Yankees play in Major League Baseball in the American League a...,[Major League Baseball],"[American League, They are also the only team represented in the B...",Major League Baseball,✔️ [0.500]
211,Baseball,who is the all time leader inh rbi's for the yankees?,Lou Gehrig is the all time leader in runs batted in for the Yankee...,[Lou Gehrig],"['Gehrig finished his career with a .340 AVG, 1,995 RBI', ""During ...",Lou Gehrig,✔️ [0.286]


40.75

### Pragmatic Answer

In [15]:
evaluate(RAG(get_pragmatic), pcqa_test, "eval_results_pragmatic.pkl")

Device set to use cpu


Average Metric: 74.43 / 213 (34.9%): 100%|████████████████████████████████████████████| 213/213 [31:15<00:00,  8.81s/it]

2025/08/25 00:33:10 INFO dspy.evaluate.evaluate: Average Metric: 74.42974948540159 / 213 (34.9%)





Unnamed: 0,topic,question,example_response,literal,pragmatic,pred_response,SemanticF1
0,The Legend of Zelda,What year did the Legend of Zelda come out?,The Legend of Zelda came out as early as 1986 for the Famicom in J...,"['FDS release February 21, 1986\n', 'The Legend of Zelda is the fi...","['It came out as early as 1986 for the Famicom in Japan, and was l...",1986,✔️ [0.500]
1,The Legend of Zelda,What console is The Legend of Zelda designed for?,The Legend of Zelda was originally released in 1986 for the Famico...,"[' It came out as early as 1986 for the Famicom in Japan, and was ...","['The Legend of Zelda was commercially successful, selling a milli...",Nintendo Entertainment System,✔️ [0.333]
2,The Legend of Zelda,when did the legend of zelda last until?,The Legend of Zelda is the first installment in the Zelda franchis...,['The Legend of Zelda is the first installment in the Zelda franch...,"[Nintendo Switch Online, April 23, 2019]","April 23, 2019",✔️ [0.400]
3,The Legend of Zelda,When was the Legend of Zelda released?,"The Legend of Zelda was released on August 22nd, 1987, for the Nin...","[August 22, 1987]","['It came out as early as 1986 for the Famicom in Japan, and was l...",1987,✔️ [0.667]
4,The Legend of Zelda,What kind of game is The Legend of Zelda?,"The Legend of Zelda is one that includes roleplaying, action, adve...",[The Legend of Zelda is the first installment of the Zelda series....,"[one that includes roleplaying, action, adventure, and puzzle/logi...",roleplaying,✔️ [0.286]
...,...,...,...,...,...,...,...
208,Baseball,How many times have the new york yankees been to the world series?,"48 times, which is a record actually. Indeed, They have won the wo...",[ 48],"[winning 27 World Series titles, Their 27 titles makes them the mo...",27,✔️ [0.400]
209,Baseball,Are the new york yankees based in new york?,"Yes, the New York Yankees now play in New York City but were found...","[The New York Yankees are a Major League Baseball team, based in t...","[The club was founded in Baltimore , Maryland in 1901, and moved t...",New York in 1903.,✔️ [0.400]
210,Baseball,What sports league does the new York Yankees play in?,The Yankees play in Major League Baseball in the American League a...,[Major League Baseball],"[American League, They are also the only team represented in the B...",American League,✔️ [0.500]
211,Baseball,who is the all time leader inh rbi's for the yankees?,Lou Gehrig is the all time leader in runs batted in for the Yankee...,[Lou Gehrig],"['Gehrig finished his career with a .340 AVG, 1,995 RBI', ""During ...",Gehrig,✔️ [0.286]


34.94

## 4.4 Multi Step LLM 

### Signatures

In [6]:
import dspy

# --- Intermediates ---
class SummarizeGoal(dspy.Signature):
    """Summarize the student's long-term goal or interest from the dialogue history to guide cooperative answering."""
    history = dspy.InputField(desc="list of (question, answer) strings from prior turns")
    goal_summary = dspy.OutputField(desc="concise 1-3 sentence summary of the student's goal, preferences, constraints")

class InferNeed(dspy.Signature):
    """Infer the pragmatic, cooperative need underlying the current question, using history and retrieved context."""
    history = dspy.InputField()
    question = dspy.InputField()
    context = dspy.InputField()
    need = dspy.OutputField(desc="brief description of what the student is really trying to accomplish right now")

class GenerateCoopQuery(dspy.Signature):
    """Generate a cooperative follow-up query that would retrieve missing or clarifying info from the corpus."""
    history = dspy.InputField()
    question = dspy.InputField()
    need = dspy.InputField()
    coop_query = dspy.OutputField(desc="a single, highly targeted search query or question to re-query the corpus")

# --- Final answering step with CoT ---
class CooperativeAnswer(dspy.Signature):
    """Produce a cooperative, step-by-step answer grounded in all context, surfacing assumptions and next steps."""
    history = dspy.InputField()
    question = dspy.InputField()
    goal_summary = dspy.InputField()
    need = dspy.InputField()
    context = dspy.InputField(desc="initial retrieved context")
    extra_context = dspy.InputField(desc="additional context retrieved via the cooperative query")
    answer = dspy.OutputField(desc="final cooperative answer with citations if helpful")


### Module

In [7]:
class CooperativeRAG(dspy.Module):
    def __init__(self, make_search, k_initial=6, k_extra=4):
        super().__init__()
        self.make_search = make_search
        self.k_initial = k_initial
        self.k_extra = k_extra

        # Submodules (you can swap these for Instruct/Refine variants if you like)
        self.summarize_goal = dspy.Predict(SummarizeGoal)
        self.infer_need = dspy.Predict(InferNeed)
        self.generate_coop_query = dspy.Predict(GenerateCoopQuery)
        # Chain-of-Thought final step
        self.answerer = dspy.ChainOfThought(CooperativeAnswer)

    def _format_history(self, history_pairs):
        # history_pairs: list[{"question": str, "answer": str}]
        lines = []
        for i, t in enumerate(history_pairs, 1):
            q = t.get("question", "").strip()
            a = t.get("answer", "").strip()
            lines.append(f"Turn {i} - Q: {q}\nTurn {i} - A: {a}")
        return "\n".join(lines) if lines else "No prior turns."

    def _retrieve_initial(self, topic, question):
        search = self.make_search(topic=topic, k=self.k_initial)
        docs = search(question)
        return "\n\n".join(docs)

    def _retrieve_extra(self, topic, coop_query):
        if not coop_query or coop_query.strip() == "":
            return ""
        
        search = self.make_search(topic=topic, k=self.k_extra)
        docs = search(coop_query)
        return "\n\n".join(docs)

    def forward(self, topic, question, history):
        # 1) Format inputs
        history_text = self._format_history(history)

        # 2) Initial retrieval (if you prefer to pipe in context externally, you can)
        initial_context = self._retrieve_initial(topic, question)

        # 3) Intermediates
        goal = self.summarize_goal(history=history_text).goal_summary
        need = self.infer_need(history=history_text, question=question, context=initial_context).need
        coop_query = self.generate_coop_query(history=history_text, question=question, need=need).coop_query

        # 4) Extra retrieval using cooperative query
        extra_context = self._retrieve_extra(topic, coop_query)

        # 5) Final, cooperative answer
        result = self.answerer(
            history=history_text,
            question=question,
            goal_summary=goal,
            need=need,
            context=initial_context,
            extra_context=extra_context
        )

        return dspy.Prediction(
            goal_summary=goal,
            need=need,
            coop_query=coop_query,
            response=result.answer
        )

### 4.4.1 First questions

In [None]:

def get_first_qs(filename, dataset_dir="../PragmatiCQA/data"):
    conversations = []
    with open(os.path.join(dataset_dir, filename), 'r') as f:
        for line in f:
            conversations.append(json.loads(line))
    return [dspy.Example(topic=d['community'],
                        question=d['qas'][0]['q'], 
                        response=d['qas'][0]['a'],
                        history=[]).with_inputs('question', 'topic', 'history') for d in conversations]


first_qs = get_first_qs("test.jsonl")

: 

In [None]:
evaluate(CooperativeRAG(make_search), first_qs, "eval_results_coop_1Q.pkl")

  0%|          | 0/213 [00:00<?, ?it/s]



Average Metric: 0.57 / 1 (57.1%):   0%|          | 1/213 [02:20<8:15:26, 140.22s/it]



Average Metric: 1.12 / 2 (55.8%):   1%|          | 2/213 [04:22<7:35:42, 129.59s/it]



Average Metric: 2.48 / 7 (35.4%):   3%|▎         | 7/213 [12:17<4:54:52, 85.89s/it] 



Average Metric: 3.41 / 9 (37.9%):   4%|▍         | 9/213 [16:39<5:54:39, 104.31s/it]



Average Metric: 5.26 / 13 (40.5%):   6%|▌         | 13/213 [23:01<5:14:29, 94.35s/it] 



Average Metric: 5.81 / 14 (41.5%):   7%|▋         | 14/213 [25:20<5:57:02, 107.65s/it]



Average Metric: 6.21 / 15 (41.4%):   7%|▋         | 15/213 [28:35<7:22:19, 134.04s/it]



Average Metric: 7.00 / 17 (41.2%):   8%|▊         | 17/213 [32:04<6:20:04, 116.35s/it]



Average Metric: 7.89 / 19 (41.5%):   9%|▉         | 19/213 [34:26<5:06:19, 94.74s/it] 



Average Metric: 8.40 / 20 (42.0%):   9%|▉         | 20/213 [37:28<6:28:57, 120.92s/it]



Average Metric: 10.65 / 25 (42.6%):  12%|█▏        | 25/213 [43:45<3:58:29, 76.11s/it]



Average Metric: 11.42 / 27 (42.3%):  13%|█▎        | 27/213 [46:30<3:59:24, 77.23s/it]



Average Metric: 11.75 / 28 (42.0%):  13%|█▎        | 28/213 [48:27<4:35:35, 89.38s/it]



Average Metric: 12.25 / 29 (42.2%):  14%|█▎        | 29/213 [50:24<4:59:18, 97.60s/it]



Average Metric: 14.31 / 34 (42.1%):  16%|█▌        | 34/213 [56:51<3:41:54, 74.38s/it] 



Average Metric: 15.06 / 35 (43.0%):  16%|█▋        | 35/213 [58:36<4:08:27, 83.75s/it]



Average Metric: 15.69 / 37 (42.4%):  17%|█▋        | 37/213 [1:01:20<3:57:07, 80.84s/it]



Average Metric: 16.75 / 39 (43.0%):  18%|█▊        | 39/213 [1:04:03<3:50:00, 79.32s/it]



Average Metric: 16.75 / 40 (41.9%):  19%|█▉        | 40/213 [1:05:54<4:15:22, 88.57s/it]



Average Metric: 16.75 / 41 (40.9%):  19%|█▉        | 41/213 [1:07:40<4:29:39, 94.07s/it]



Average Metric: 18.13 / 44 (41.2%):  21%|██        | 44/213 [1:11:17<3:28:57, 74.19s/it] 



Average Metric: 19.59 / 47 (41.7%):  22%|██▏       | 47/213 [1:15:32<3:23:23, 73.51s/it]



Average Metric: 20.41 / 50 (40.8%):  23%|██▎       | 50/213 [1:19:28<3:14:05, 71.45s/it]



Average Metric: 20.93 / 52 (40.3%):  24%|██▍       | 52/213 [1:22:05<3:14:13, 72.38s/it]



Average Metric: 21.24 / 53 (40.1%):  25%|██▍       | 53/213 [1:23:44<3:34:47, 80.55s/it]



Average Metric: 22.10 / 57 (38.8%):  27%|██▋       | 57/213 [1:27:31<2:32:20, 58.59s/it]



Average Metric: 22.38 / 58 (38.6%):  27%|██▋       | 58/213 [1:29:15<3:06:41, 72.27s/it]



Average Metric: 22.87 / 60 (38.1%):  28%|██▊       | 60/213 [1:32:10<3:18:40, 77.91s/it]



Average Metric: 24.63 / 64 (38.5%):  30%|███       | 64/213 [1:37:22<2:48:28, 67.84s/it] 



Average Metric: 26.17 / 68 (38.5%):  32%|███▏      | 68/213 [1:41:30<2:21:25, 58.52s/it]



Average Metric: 26.99 / 70 (38.6%):  33%|███▎      | 70/213 [1:44:52<3:00:33, 75.76s/it]