# Main imports and code

In [1]:
from urllib import request
import pandas as pd
import logging
import torch
from collections import Counter
from ast import literal_eval

In [2]:
# prepare logger
logging.basicConfig(level=logging.INFO)

transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

# check gpu
cuda_available = torch.cuda.is_available()

print('Cuda available? ',cuda_available)

Cuda available?  True


# Fetch Don't Patronize Me! data manager module

In [3]:
# helper function to save predictions to an output file
def labels2file(p, outf_path):
	with open(outf_path,'w') as outf:
		for pi in p:
			outf.write(','.join([str(k) for k in pi])+'\n')

In [4]:
from dont_patronize_me import DontPatronizeMe

In [5]:
dpm = DontPatronizeMe('.', '.')

In [6]:
dpm.load_task1()
dpm.load_task2(return_one_hot=True)

Map of label to numerical label:
{'Unbalanced_power_relations': 0, 'Shallow_solution': 1, 'Presupposition': 2, 'Authority_voice': 3, 'Metaphors': 4, 'Compassion': 5, 'The_poorer_the_merrier': 6}


# Load paragraph IDs

In [7]:
trids = pd.read_csv('data/train_semeval_parids-labels.csv')
teids = pd.read_csv('data/dev_semeval_parids-labels.csv')

In [8]:
trids.par_id = trids.par_id.astype(str)
teids.par_id = teids.par_id.astype(str)

In [9]:
data=dpm.train_task1_df

In [10]:
data

Unnamed: 0,par_id,art_id,keyword,country,text,label,orig_label
0,1,@@24942188,hopeless,ph,"We 're living in times of absolute insanity , ...",0,0
1,2,@@21968160,migrant,gh,"In Libya today , there are countless number of...",0,0
2,3,@@16584954,immigrant,ie,"""White House press secretary Sean Spicer said ...",0,0
3,4,@@7811231,disabled,nz,Council customers only signs would be displaye...,0,0
4,5,@@1494111,refugee,ca,""""""" Just like we received migrants fleeing El ...",0,0
...,...,...,...,...,...,...,...
10464,10465,@@14297363,women,lk,"""Sri Lankan norms and culture inhibit women fr...",0,1
10465,10466,@@70091353,vulnerable,ph,He added that the AFP will continue to bank on...,0,0
10466,10467,@@20282330,in-need,ng,""""""" She has one huge platform , and informatio...",1,3
10467,10468,@@16753236,hopeless,in,""""""" Anja Ringgren Loven I ca n't find a word t...",1,4




# Rebuild training set (Task 1)

In [11]:
rows = [] # will contain par_id, label and text
for idx in range(len(trids)):
  parid = trids.par_id[idx]
  #print(parid)
  # select row from original dataset to retrieve `text` and binary label
  keyword = data.loc[data.par_id == parid].keyword.values[0]
  text = data.loc[data.par_id == parid].text.values[0]
  label = data.loc[data.par_id == parid].label.values[0]
  rows.append({
      'par_id':parid,
      'community':keyword,
      'text':text,
      'label':label
  })


In [12]:
import random

In [13]:
trdf1 = pd.DataFrame(rows)

In [14]:
trdf1

Unnamed: 0,par_id,community,text,label
0,4341,poor-families,"The scheme saw an estimated 150,000 children f...",1
1,4136,homeless,Durban 's homeless communities reconciliation ...,1
2,10352,poor-families,The next immediate problem that cropped up was...,1
3,8279,vulnerable,Far more important than the implications for t...,1
4,1164,poor-families,To strengthen child-sensitive social protectio...,1
...,...,...,...,...
8370,8380,refugee,Rescue teams search for survivors on the rubbl...,0
8371,8381,hopeless,The launch of ' Happy Birthday ' took place la...,0
8372,8382,homeless,"The unrest has left at least 20,000 people dea...",0
8373,8383,hopeless,You have to see it from my perspective . I may...,0


# Rebuild test set (Task 1)

In [15]:
rows = [] # will contain par_id, label and text
for idx in range(len(teids)):
  parid = teids.par_id[idx]
  #print(parid)
  # select row from original dataset
  keyword = data.loc[data.par_id == parid].keyword.values[0]
  text = data.loc[data.par_id == parid].text.values[0]
  label = data.loc[data.par_id == parid].label.values[0]
  rows.append({
      'par_id':parid,
      'community':keyword,
      'text':text,
      'label':label
  })


In [16]:
len(rows)

2094

In [17]:
tedf1 = pd.DataFrame(rows)

In [18]:
tedf1 = tedf1.sample(frac=1, random_state=42).reset_index(drop=True)

# RoBERTa Baseline for Task 1

In [19]:
# downsample negative instances
pcldf = trdf1[trdf1.label==1]
npos = len(pcldf)

training_set1 = pd.concat([pcldf,trdf1[trdf1.label==0][:npos*2]])

In [20]:
training_set1

Unnamed: 0,par_id,community,text,label
0,4341,poor-families,"The scheme saw an estimated 150,000 children f...",1
1,4136,homeless,Durban 's homeless communities reconciliation ...,1
2,10352,poor-families,The next immediate problem that cropped up was...,1
3,8279,vulnerable,Far more important than the implications for t...,1
4,1164,poor-families,To strengthen child-sensitive social protectio...,1
...,...,...,...,...
2377,1775,refugee,Last but not the least element of culpability ...,0
2378,1776,refugee,"Then , taking the art of counter-intuitive non...",0
2379,1777,refugee,Kagunga village was reported to lack necessary...,0
2380,1778,vulnerable,"""After her parents high-profile divorce after ...",0


In [22]:

from baseline import RobertaClassifier

task1_model = RobertaClassifier(
    num_labels=2,
    num_train_epochs=1,
    use_cuda=cuda_available
)

task1_model.train_model(training_set1[['text', 'label']])
 
preds_task1, _ = task1_model.predict(tedf1.text.tolist())


  from .autonotebook import tqdm as notebook_tqdm
INFO:datasets:TensorFlow version 2.20.0 available.
2026-02-11 11:29:42.967824: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-02-11 11:30:12.463287: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 2382/2382 [00:00<00:00, 6294.60 examples/s]
You're using

Step,Training Loss
149,0.536


Map: 100%|██████████| 2094/2094 [00:00<00:00, 20869.46 examples/s]


In [23]:
Counter(preds_task1)

Counter({np.int64(0): 1671, np.int64(1): 423})

In [24]:
labels2file([[k] for k in preds_task1], 'task1.txt')

In [25]:
# Calculate F1 score for Task 1
from sklearn.metrics import f1_score, classification_report, confusion_matrix

# Get true labels from tedf1
true_labels_task1 = tedf1['label'].tolist()

# Calculate F1 score
f1_binary = f1_score(true_labels_task1, preds_task1, average='binary')
f1_macro = f1_score(true_labels_task1, preds_task1, average='macro')
f1_weighted = f1_score(true_labels_task1, preds_task1, average='weighted')

print("=" * 50)
print("Task 1 - Binary Classification Results")
print("=" * 50)
print(f"F1 Score (Binary):    {f1_binary:.4f}")
print(f"F1 Score (Macro):     {f1_macro:.4f}")
print(f"F1 Score (Weighted):  {f1_weighted:.4f}")
print("=" * 50)
print("\nClassification Report:")
print(classification_report(true_labels_task1, preds_task1, 
                            target_names=['Non-PCL (0)', 'PCL (1)']))
print("\nConfusion Matrix:")
print(confusion_matrix(true_labels_task1, preds_task1))
print("[[TN FP]")
print(" [FN TP]]")

Task 1 - Binary Classification Results
F1 Score (Binary):    0.4437
F1 Score (Macro):     0.6734
F1 Score (Weighted):  0.8593

Classification Report:
              precision    recall  f1-score   support

 Non-PCL (0)       0.96      0.85      0.90      1895
     PCL (1)       0.33      0.69      0.44       199

    accuracy                           0.83      2094
   macro avg       0.64      0.77      0.67      2094
weighted avg       0.90      0.83      0.86      2094


Confusion Matrix:
[[1610  285]
 [  61  138]]
[[TN FP]
 [FN TP]]


# Rebuild training set (Task 2)

In [26]:
rows2 = [] # will contain par_id, label and text
for idx in range(len(trids)):
  parid = trids.par_id[idx]
  label = trids.label[idx]
  # select row from original dataset to retrieve the `text` value
  text = dpm.train_task1_df.loc[dpm.train_task1_df.par_id == parid].text.values[0]
  rows2.append({
      'par_id':parid,
      'text':text,
      'label':label
  })


In [27]:
trdf2 = pd.DataFrame(rows2)

In [28]:
trdf2

Unnamed: 0,par_id,text,label
0,4341,"The scheme saw an estimated 150,000 children f...","[1, 0, 0, 1, 0, 0, 0]"
1,4136,Durban 's homeless communities reconciliation ...,"[0, 1, 0, 0, 0, 0, 0]"
2,10352,The next immediate problem that cropped up was...,"[1, 0, 0, 0, 0, 1, 0]"
3,8279,Far more important than the implications for t...,"[0, 0, 0, 1, 0, 0, 0]"
4,1164,To strengthen child-sensitive social protectio...,"[1, 0, 0, 1, 1, 1, 0]"
...,...,...,...
8370,8380,Rescue teams search for survivors on the rubbl...,"[0, 0, 0, 0, 0, 0, 0]"
8371,8381,The launch of ' Happy Birthday ' took place la...,"[0, 0, 0, 0, 0, 0, 0]"
8372,8382,"The unrest has left at least 20,000 people dea...","[0, 0, 0, 0, 0, 0, 0]"
8373,8383,You have to see it from my perspective . I may...,"[0, 0, 0, 0, 0, 0, 0]"


In [29]:
trdf2.label = trdf2.label.apply(literal_eval)

# Rebuild test set (Task 2)

In [30]:
rows2 = [] # will contain par_id, label and text
for idx in range(len(teids)):
  parid = teids.par_id[idx]
  label = teids.label[idx]
  #print(parid)
  # select row from original dataset to access the `text` value
  text = dpm.train_task1_df.loc[dpm.train_task1_df.par_id == parid].text.values[0]
  rows2.append({
      'par_id':parid,
      'text':text,
      'label':label
  })


In [31]:
tedf2 = pd.DataFrame(rows2)

In [32]:
tedf2

Unnamed: 0,par_id,text,label
0,4046,We also know that they can benefit by receivin...,"[1, 0, 0, 1, 0, 0, 0]"
1,1279,Pope Francis washed and kissed the feet of Mus...,"[0, 1, 0, 0, 0, 0, 0]"
2,8330,Many refugees do n't want to be resettled anyw...,"[0, 0, 1, 0, 0, 0, 0]"
3,4063,"""Budding chefs , like """" Fred """" , """" Winston ...","[1, 0, 0, 1, 1, 1, 0]"
4,4089,"""In a 90-degree view of his constituency , one...","[1, 0, 0, 0, 0, 0, 0]"
...,...,...,...
2089,10462,"The sad spectacle , which occurred on Saturday...","[0, 0, 0, 0, 0, 0, 0]"
2090,10463,""""""" The Pakistani police came to our house and...","[0, 0, 0, 0, 0, 0, 0]"
2091,10464,"""When Marie O'Donoghue went looking for a spec...","[0, 0, 0, 0, 0, 0, 0]"
2092,10465,"""Sri Lankan norms and culture inhibit women fr...","[0, 0, 0, 0, 0, 0, 0]"


In [33]:
tedf2.label = tedf2.label.apply(literal_eval)

# RoBERTa baseline for Task 2

In [34]:
all_negs = trdf2[trdf2.label.apply(lambda x:sum(x) == 0)]
all_pos = trdf2[trdf2.label.apply(lambda x:sum(x) > 0)]

training_set2 = pd.concat([all_pos,all_negs[:round(len(all_pos)*0.5)]])

In [35]:
training_set2

Unnamed: 0,par_id,text,label
0,4341,"The scheme saw an estimated 150,000 children f...","[1, 0, 0, 1, 0, 0, 0]"
1,4136,Durban 's homeless communities reconciliation ...,"[0, 1, 0, 0, 0, 0, 0]"
2,10352,The next immediate problem that cropped up was...,"[1, 0, 0, 0, 0, 1, 0]"
3,8279,Far more important than the implications for t...,"[0, 0, 0, 1, 0, 0, 0]"
4,1164,To strengthen child-sensitive social protectio...,"[1, 0, 0, 1, 1, 1, 0]"
...,...,...,...
1186,434,""""""" I was absolutely useless at school , hopel...","[0, 0, 0, 0, 0, 0, 0]"
1187,435,I also noticed the change in socio-economic le...,"[0, 0, 0, 0, 0, 0, 0]"
1188,436,"Can Donald Trump win ? It 's possible , but ce...","[0, 0, 0, 0, 0, 0, 0]"
1189,437,He added that any introduction of new law must...,"[0, 0, 0, 0, 0, 0, 0]"


In [36]:
type(training_set2[['text', 'label']])

pandas.core.frame.DataFrame

In [41]:
# FIX: Convert labels from lists to numpy arrays of floats
import numpy as np
training_set2['label'] = training_set2['label'].apply(lambda x: np.array(x, dtype=np.float32))
tedf2['label'] = tedf2['label'].apply(lambda x: np.array(x, dtype=np.float32))

In [42]:
from baseline import RobertaMultiLabelClassifier

multi_model = RobertaMultiLabelClassifier(
    num_labels=7,
    num_train_epochs=3
)

multi_model.train_model(training_set2[['text', 'label']])

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.dense.bias', 'classifier.out_proj.weight', 'classifier.out_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Map: 100%|██████████| 1191/1191 [00:00<00:00, 16229.00 examples/s]
  lambda x: {"labels": torch.tensor(x["labels"], dtype=torch.float32)}
Map: 100%|██████████| 1191/1191 [00:00<00:00, 3422.78 examples/s]
You're using a RobertaTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
75,0.4329
150,0.34
225,0.2871


In [43]:
preds_task2, probs_task2 = multi_model.predict(tedf2.text.tolist())

Map: 100%|██████████| 2094/2094 [00:00<00:00, 10975.75 examples/s]


In [44]:
labels2file(preds_task2, 'task2.txt')

In [48]:
# Calculate F1 score for Task 2 - Multi-label Classification
from sklearn.metrics import f1_score, classification_report, hamming_loss

# Category names from the dataset
category_names = [
    'Unbalanced_power_relations',
    'Shallow_solution', 
    'Presupposition',
    'Authority_voice',
    'Metaphors',
    'Compassion',
    'The_poorer_the_merrier'
]

# Get true labels from tedf2 - convert back to array format
true_labels_task2 = np.array([label for label in tedf2['label'].values])

print("=" * 70)
print("Task 2 - Multi-Label Classification Results")
print("=" * 70)

# Calculate per-category F1 scores
f1_per_category = f1_score(true_labels_task2, preds_task2, average=None)

print("\nF1 Score per Category:")
print("-" * 70)
for idx, (category, f1) in enumerate(zip(category_names, f1_per_category)):
    print(f"{idx}. {category:30s} F1: {f1:.4f}")

# Calculate overall metrics
f1_micro = f1_score(true_labels_task2, preds_task2, average='micro')
f1_macro = f1_score(true_labels_task2, preds_task2, average='macro')
f1_weighted = f1_score(true_labels_task2, preds_task2, average='weighted')
f1_samples = f1_score(true_labels_task2, preds_task2, average='samples')
hamming = hamming_loss(true_labels_task2, preds_task2)

print("\n" + "=" * 70)
print("Overall Metrics:")
print("-" * 70)
print(f"F1 Score (Micro):     {f1_micro:.4f}")
print(f"F1 Score (Macro):     {f1_macro:.4f}")
print(f"F1 Score (Weighted):  {f1_weighted:.4f}")
print(f"F1 Score (Samples):   {f1_samples:.4f}")
print(f"Hamming Loss:         {hamming:.4f}")
print("=" * 70)

# Detailed classification report
print("\nDetailed Classification Report:")
print(classification_report(true_labels_task2, preds_task2, 
                           target_names=category_names, 
                           zero_division=0))

Task 2 - Multi-Label Classification Results

F1 Score per Category:
----------------------------------------------------------------------
0. Unbalanced_power_relations     F1: 0.3978
1. Shallow_solution               F1: 0.2619
2. Presupposition                 F1: 0.3363
3. Authority_voice                F1: 0.0984
4. Metaphors                      F1: 0.0000
5. Compassion                     F1: 0.3864
6. The_poorer_the_merrier         F1: 0.0000

Overall Metrics:
----------------------------------------------------------------------
F1 Score (Micro):     0.3415
F1 Score (Macro):     0.2115
F1 Score (Weighted):  0.2941
F1 Score (Samples):   0.0551
Hamming Loss:         0.0553

Detailed Classification Report:
                            precision    recall  f1-score   support

Unbalanced_power_relations       0.27      0.77      0.40       142
          Shallow_solution       0.23      0.31      0.26        36
            Presupposition       0.37      0.31      0.34        62
      

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


## Prepare submission

In [45]:
!cat task1.txt | head -n 10

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
0
1
0
0
0
1
0
0
0
0


In [46]:
!cat task2.txt | head -n 10

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
0,0,0,0,0,0,0
1,1,0,0,0,0,0
0,0,0,0,0,0,0
1,0,0,0,0,1,0
0,0,0,0,0,0,0
0,0,0,0,0,1,0
1,0,0,0,0,0,0
1,1,0,0,0,0,0
0,0,0,0,0,1,0
1,0,1,0,0,1,0


In [47]:
!zip submission.zip task1.txt task2.txt

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
/bin/bash: line 1: zip: command not found
