## 4.3 The Traditional NLP Approach

### dspy module

In [1]:
import dspy
from sentence_transformers import SentenceTransformer
from transformers import pipeline
lm = dspy.LM('xai/grok-3-mini', max_tokens=6000, temperature=0.1, top_p=0.9)
dspy.configure(lm=lm)

# Traverse a directory and read html files - extract text from the html files
import os
from bs4 import BeautifulSoup
def read_html_files(dir_name, directory="../PragmatiCQA-sources"):
    texts = []
    for filename in os.listdir(os.path.join(directory, dir_name)):
        if filename.endswith(".html"):
            with open(os.path.join(directory, dir_name, filename), 'r', encoding='utf-8') as file:
                soup = BeautifulSoup(file, 'html.parser')
                texts.append(soup.get_text())
    return texts

model = SentenceTransformer("sentence-transformers/static-retrieval-mrl-en-v1", device="cpu")
embedder = dspy.Embedder(model.encode)

# Perform retrieval on a specific topic: read html files for the corresponding folder, index
def make_search(topic):
    corpus = read_html_files(topic)
    max_characters = 10000 
    topk_docs_to_retrieve = 5  # number of documents to retrieve per search query
    return dspy.retrievers.Embeddings(embedder=embedder, corpus=corpus, k=topk_docs_to_retrieve, brute_force_threshold=max_characters)

# # Make a RAG module with a given retriever.
# class RAG(dspy.Module):
#     def __init__(self):
#         self.respond = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')

#     def forward(self, topic, question, literal, pragmatic):
#         search = make_search(topic)
#         context = " ".join(search(question).passages)
#         output = self.respond(context=context, question=question)
#         return dspy.Prediction(response=output['answer'])

In [2]:
class RAG(dspy.Module):
    def __init__(self, make_search):
        self.make_search = make_search
        self.respond = pipeline("question-answering", model='distilbert-base-cased-distilled-squad')

    def forward(self, topic, question, literal, pragmatic):
        search = self.make_search(topic, literal, pragmatic)
        context = " ".join(search(question))
        output = self.respond(context=context, question=question)
        return dspy.Prediction(response=output['answer'])

In [3]:
import json
def get_conversations_set(filename, dataset_dir="../PragmatiCQA/data"):
    conversations = []
    with open(os.path.join(dataset_dir, filename), 'r') as f:
        for line in f:
            conversations.append(json.loads(line))
    return [dspy.Example(topic=d['community'],
                        question=d['qas'][0]['q'], 
                        response=d['qas'][0]['a'],
                        literal=[text['text'] for text in d['qas'][0]['a_meta']['literal_obj']],
                        pragmatic=[text['text'] for text in d['qas'][0]['a_meta']['pragmatic_obj']]).with_inputs('question', 'topic', 'literal', 'pragmatic') for d in conversations]

pcqa_test = get_conversations_set("test.jsonl")

In [4]:
# from dspy.evaluate import SemanticF1, Evaluate
# metric = SemanticF1(decompositional=True)

# model = RAG()
# print(pcqa_test[0].inputs())
# pred = model(**pcqa_test[0].inputs())
# print(pred)

# metric(pcqa_test[0], pred)


In [5]:
from dspy.evaluate import SemanticF1, Evaluate
import pickle
def evaluate(model, example_set, save_path):
    metric = SemanticF1(decompositional=True)

    evaluator = Evaluate(devset=example_set, metric=metric, num_threads=1, display_progress=True, display_table=True, provide_feedback=True)
    results = None
    if os.path.exists(save_path):
        results = pickle.load(save_path)
    else:
        results = evaluator(model)
        with open(save_path, "wb") as f:
            pickle.dump(results, f)

    return results

In [13]:
def make_search(topic, literal, pragmatic):
    corpus = read_html_files(topic)
    max_characters = 10000 
    topk_docs_to_retrieve = 5  # number of documents to retrieve per search query
    search = dspy.retrievers.Embeddings(embedder=embedder, corpus=corpus, k=topk_docs_to_retrieve, brute_force_threshold=max_characters)
    return lambda q: search(q).passages


def get_literal(topic, literal, pragmatic):
    return lambda q: literal

def get_pragmatic(topic, literal, pragmatic):
    return lambda q: pragmatic

Retrieved

In [9]:
evaluate(RAG(make_search), pcqa_test, "eval_results_distilbert.pkl")

Device set to use cpu


Average Metric: 22.34 / 213 (10.5%): 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 213/213 [2:09:47<00:00, 36.56s/it]

2025/08/24 23:21:18 INFO dspy.evaluate.evaluate: Average Metric: 22.341969801856195 / 213 (10.5%)





Unnamed: 0,topic,question,example_response,literal,pragmatic,pred_response,SemanticF1
0,The Legend of Zelda,What year did the Legend of Zelda come out?,The Legend of Zelda came out as early as 1986 for the Famicom in J...,"['FDS release February 21, 1986\n', 'The Legend of Zelda is the fi...","['It came out as early as 1986 for the Famicom in Japan, and was l...",1986,✔️ [0.500]
1,The Legend of Zelda,What console is The Legend of Zelda designed for?,The Legend of Zelda was originally released in 1986 for the Famico...,"[' It came out as early as 1986 for the Famicom in Japan, and was ...","['The Legend of Zelda was commercially successful, selling a milli...",Famicom\n Disk System,✔️ [0.286]
2,The Legend of Zelda,when did the legend of zelda last until?,The Legend of Zelda is the first installment in the Zelda franchis...,['The Legend of Zelda is the first installment in the Zelda franch...,"[Nintendo Switch Online, April 23, 2019]","June 19, 2011",
3,The Legend of Zelda,When was the Legend of Zelda released?,"The Legend of Zelda was released on August 22nd, 1987, for the Nin...","[August 22, 1987]","['It came out as early as 1986 for the Famicom in Japan, and was l...",one hundred years before the events of\n \n Breath of the Wild,
4,The Legend of Zelda,What kind of game is The Legend of Zelda?,"The Legend of Zelda is one that includes roleplaying, action, adve...",[The Legend of Zelda is the first installment of the Zelda series....,"[one that includes roleplaying, action, adventure, and puzzle/logi...",multiplayer,
...,...,...,...,...,...,...,...
208,Baseball,How many times have the new york yankees been to the world series?,"48 times, which is a record actually. Indeed, They have won the wo...",[ 48],"[winning 27 World Series titles, Their 27 titles makes them the mo...",twelve,
209,Baseball,Are the new york yankees based in new york?,"Yes, the New York Yankees now play in New York City but were found...","[The New York Yankees are a Major League Baseball team, based in t...","[The club was founded in Baltimore , Maryland in 1901, and moved t...",New York Yankees\n \n\n\n\n\n One East 161st Street\n ...,✔️ [0.500]
210,Baseball,What sports league does the new York Yankees play in?,The Yankees play in Major League Baseball in the American League a...,[Major League Baseball],"[American League, They are also the only team represented in the B...",Major League Baseball,✔️ [0.500]
211,Baseball,who is the all time leader inh rbi's for the yankees?,Lou Gehrig is the all time leader in runs batted in for the Yankee...,[Lou Gehrig],"['Gehrig finished his career with a .340 AVG, 1,995 RBI', ""During ...",Hank Aaron,


10.49

In [14]:
evaluate(RAG(get_literal), pcqa_test, "eval_results_literal.pkl")

Device set to use cpu


Average Metric: 86.80 / 213 (40.8%): 100%|████████████████████████████████████████████| 213/213 [34:46<00:00,  9.80s/it]

2025/08/25 00:01:53 INFO dspy.evaluate.evaluate: Average Metric: 86.79994056425019 / 213 (40.8%)





Unnamed: 0,topic,question,example_response,literal,pragmatic,pred_response,SemanticF1
0,The Legend of Zelda,What year did the Legend of Zelda come out?,The Legend of Zelda came out as early as 1986 for the Famicom in J...,"['FDS release February 21, 1986\n', 'The Legend of Zelda is the fi...","['It came out as early as 1986 for the Famicom in Japan, and was l...",1986,✔️ [0.500]
1,The Legend of Zelda,What console is The Legend of Zelda designed for?,The Legend of Zelda was originally released in 1986 for the Famico...,"[' It came out as early as 1986 for the Famicom in Japan, and was ...","['The Legend of Zelda was commercially successful, selling a milli...",Famicom,✔️ [0.250]
2,The Legend of Zelda,when did the legend of zelda last until?,The Legend of Zelda is the first installment in the Zelda franchis...,['The Legend of Zelda is the first installment in the Zelda franch...,"[Nintendo Switch Online, April 23, 2019]",first installment in the Zelda franchise,✔️ [0.333]
3,The Legend of Zelda,When was the Legend of Zelda released?,"The Legend of Zelda was released on August 22nd, 1987, for the Nin...","[August 22, 1987]","['It came out as early as 1986 for the Famicom in Japan, and was l...","August 22, 1987",✔️ [0.667]
4,The Legend of Zelda,What kind of game is The Legend of Zelda?,"The Legend of Zelda is one that includes roleplaying, action, adve...",[The Legend of Zelda is the first installment of the Zelda series....,"[one that includes roleplaying, action, adventure, and puzzle/logi...",Zelda,✔️ [0.286]
...,...,...,...,...,...,...,...
208,Baseball,How many times have the new york yankees been to the world series?,"48 times, which is a record actually. Indeed, They have won the wo...",[ 48],"[winning 27 World Series titles, Their 27 titles makes them the mo...",48,✔️ [0.400]
209,Baseball,Are the new york yankees based in new york?,"Yes, the New York Yankees now play in New York City but were found...","[The New York Yankees are a Major League Baseball team, based in t...","[The club was founded in Baltimore , Maryland in 1901, and moved t...",The New York Yankees,✔️ [0.500]
210,Baseball,What sports league does the new York Yankees play in?,The Yankees play in Major League Baseball in the American League a...,[Major League Baseball],"[American League, They are also the only team represented in the B...",Major League Baseball,✔️ [0.500]
211,Baseball,who is the all time leader inh rbi's for the yankees?,Lou Gehrig is the all time leader in runs batted in for the Yankee...,[Lou Gehrig],"['Gehrig finished his career with a .340 AVG, 1,995 RBI', ""During ...",Lou Gehrig,✔️ [0.286]


40.75

In [15]:
evaluate(RAG(get_pragmatic), pcqa_test, "eval_results_pragmatic.pkl")

Device set to use cpu


Average Metric: 74.43 / 213 (34.9%): 100%|████████████████████████████████████████████| 213/213 [31:15<00:00,  8.81s/it]

2025/08/25 00:33:10 INFO dspy.evaluate.evaluate: Average Metric: 74.42974948540159 / 213 (34.9%)





Unnamed: 0,topic,question,example_response,literal,pragmatic,pred_response,SemanticF1
0,The Legend of Zelda,What year did the Legend of Zelda come out?,The Legend of Zelda came out as early as 1986 for the Famicom in J...,"['FDS release February 21, 1986\n', 'The Legend of Zelda is the fi...","['It came out as early as 1986 for the Famicom in Japan, and was l...",1986,✔️ [0.500]
1,The Legend of Zelda,What console is The Legend of Zelda designed for?,The Legend of Zelda was originally released in 1986 for the Famico...,"[' It came out as early as 1986 for the Famicom in Japan, and was ...","['The Legend of Zelda was commercially successful, selling a milli...",Nintendo Entertainment System,✔️ [0.333]
2,The Legend of Zelda,when did the legend of zelda last until?,The Legend of Zelda is the first installment in the Zelda franchis...,['The Legend of Zelda is the first installment in the Zelda franch...,"[Nintendo Switch Online, April 23, 2019]","April 23, 2019",✔️ [0.400]
3,The Legend of Zelda,When was the Legend of Zelda released?,"The Legend of Zelda was released on August 22nd, 1987, for the Nin...","[August 22, 1987]","['It came out as early as 1986 for the Famicom in Japan, and was l...",1987,✔️ [0.667]
4,The Legend of Zelda,What kind of game is The Legend of Zelda?,"The Legend of Zelda is one that includes roleplaying, action, adve...",[The Legend of Zelda is the first installment of the Zelda series....,"[one that includes roleplaying, action, adventure, and puzzle/logi...",roleplaying,✔️ [0.286]
...,...,...,...,...,...,...,...
208,Baseball,How many times have the new york yankees been to the world series?,"48 times, which is a record actually. Indeed, They have won the wo...",[ 48],"[winning 27 World Series titles, Their 27 titles makes them the mo...",27,✔️ [0.400]
209,Baseball,Are the new york yankees based in new york?,"Yes, the New York Yankees now play in New York City but were found...","[The New York Yankees are a Major League Baseball team, based in t...","[The club was founded in Baltimore , Maryland in 1901, and moved t...",New York in 1903.,✔️ [0.400]
210,Baseball,What sports league does the new York Yankees play in?,The Yankees play in Major League Baseball in the American League a...,[Major League Baseball],"[American League, They are also the only team represented in the B...",American League,✔️ [0.500]
211,Baseball,who is the all time leader inh rbi's for the yankees?,Lou Gehrig is the all time leader in runs batted in for the Yankee...,[Lou Gehrig],"['Gehrig finished his career with a .340 AVG, 1,995 RBI', ""During ...",Gehrig,✔️ [0.286]


34.94