## Load the DataSet

In [1]:
%pip install pandas


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
%pip install scikit-learn



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [3]:
import json
import pandas as pd

# Load the train dataset
with open('cocolofa/train.json', 'r') as f:
    train_data = json.load(f)

# Extract relevant fields
comments = []
fallacies = []
for article in train_data:
    for comment in article['comments']:
        comments.append(comment['comment'])
        fallacies.append(comment['fallacy'])

# Create a DataFrame
df = pd.DataFrame({'comment': comments, 'fallacy': fallacies})
print(df.head())


                                             comment              fallacy
0  Lack of transparency in government isn't unexp...                 none
1  While the issues discussed here should be addr...  appeal to authority
2  The excuse that Brazilian municipalities do no...                 none
3  This is what's to be expected of developing an...                 none
4  Sad to say, I have to agree with you. Rulers c...  appeal to tradition


In [4]:
new_data = [
    {"comment": "Katherine is a bad choice for mayor because she didn’t grow up in this town.", "fallacy": "ad hominem"},
    {"comment": "You can’t trust John’s opinion on climate change because he’s not a scientist.", "fallacy": "ad hominem"},
    {"comment": "Don’t listen to her advice on education reform; she dropped out of college.", "fallacy": "ad hominem"},
    {"comment": "The CEO’s proposal is invalid because he only cares about making money.", "fallacy": "ad hominem"},
    {"comment": "His argument on healthcare policy is irrelevant because he’s overweight.", "fallacy": "ad hominem"},
    {"comment": "Why would we take financial advice from someone who went bankrupt?", "fallacy": "ad hominem"},
    {"comment": "Her stance on environmental issues is biased because she owns a gas station.", "fallacy": "ad hominem"},
    {"comment": "Of course, he would say that—he’s been paid by the opposition.", "fallacy": "ad hominem"},
    {"comment": "You shouldn't listen to her critique on art; she’s never painted anything herself.", "fallacy": "ad hominem"},
    {"comment": "His thoughts on improving traffic systems are worthless since he doesn’t even drive.", "fallacy": "ad hominem"}
]

# Append to DataFrame
new_df = pd.DataFrame(new_data)
df = pd.concat([df, new_df], ignore_index=True)

# Save updated dataset
df.to_json('cocolofa/updated_train.json', orient='records', lines=True)


In [5]:
#Check Class Distribution
print(df['fallacy'].value_counts())



fallacy
none                        2202
slippery slope               431
appeal to worse problems     421
appeal to nature             412
appeal to tradition          401
false dilemma                391
appeal to majority           383
hasty generalization         379
appeal to authority          350
ad hominem                    10
Name: count, dtype: int64


## Preprocess Data


In [6]:
%pip install imbalanced-learn

from imblearn.over_sampling import RandomOverSampler
import pandas as pd

# Perform oversampling to balance classes
ros = RandomOverSampler(random_state=42)
X_resampled, y_resampled = ros.fit_resample(df[['comment']], df['fallacy'])

# Create a new balanced DataFrame
balanced_df = pd.DataFrame({'comment': X_resampled['comment'], 'fallacy': y_resampled})

# Check class distribution after oversampling
print(balanced_df['fallacy'].value_counts())



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
fallacy
none                        2202
appeal to authority         2202
appeal to tradition         2202
appeal to worse problems    2202
hasty generalization        2202
slippery slope              2202
false dilemma               2202
appeal to majority          2202
appeal to nature            2202
ad hominem                  2202
Name: count, dtype: int64


In [7]:
# Remove "none" fallacies
df = df[df['fallacy'] != 'none']

# Reset index
df.reset_index(drop=True, inplace=True)


In [8]:
#Split Data
from sklearn.model_selection import train_test_split

# Split data
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df['comment'], df['fallacy'], test_size=0.2, random_state=42
)


In [9]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
train_labels_encoded = label_encoder.fit_transform(train_labels)
val_labels_encoded = label_encoder.transform(val_labels)

print("Classes:", label_encoder.classes_)


Classes: ['ad hominem' 'appeal to authority' 'appeal to majority'
 'appeal to nature' 'appeal to tradition' 'appeal to worse problems'
 'false dilemma' 'hasty generalization' 'slippery slope']


In [10]:
from transformers import BertTokenizer

# Load BERT tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

# Tokenize data
train_encodings = tokenizer(list(train_texts), truncation=True, padding=True, max_length=128)
val_encodings = tokenizer(list(val_texts), truncation=True, padding=True, max_length=128)


  from .autonotebook import tqdm as notebook_tqdm


In [11]:
%pip install torch


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


## Create Dataset Class

In [12]:
import torch
from torch.utils.data import Dataset

class LogicalFallacyDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

# Prepare datasets
train_dataset = LogicalFallacyDataset(train_encodings, train_labels_encoded)
val_dataset = LogicalFallacyDataset(val_encodings, val_labels_encoded)


## Load Pre-Trained Model and Define Training Arguments

In [13]:
ew

NameError: name 'ew' is not defined

## Evaluate and Predict

In [None]:
# Evaluate
trainer.evaluate()

# Predict
def predict_fallacy(sentence):
    tokens = tokenizer(sentence, return_tensors="pt", truncation=True, padding=True, max_length=128)
    outputs = model(**tokens)
    prediction = torch.argmax(outputs.logits, dim=1).item()
    return label_encoder.inverse_transform([prediction])[0]

#show the accuracy
print("Accuracy:", (trainer.evaluate()['eval_loss']))


# Example prediction
test_sentence = """
If we legalize marijuana, it might seem harmless at first. But over time, society could start accepting more dangerous drugs. 
Think about how alcohol was once banned, and now it's widely available despite its negative effects. Legalizing marijuana 
could pave the way for other drugs like cocaine or heroin to be considered acceptable, leading to widespread addiction and 
eventually the collapse of public health systems. We need to think carefully before taking such a risky step.
"""

print(test_sentence)
print("Predicted Fallacy:", predict_fallacy(test_sentence))
print("=====================================")



test_sentence = "If we legalize marijuana, next thing you know, people will want to legalize harder drugs like cocaine or heroin, and society will spiral into chaos."
print("If we legalize marijuana, next thing you know, people will want to legalize harder drugs like cocaine or heroin, and society will spiral into chaos.")
print("Predicted Fallacy:", predict_fallacy(test_sentence))
print("=====================================")








NameError: name 'trainer' is not defined

In [None]:

# Example prediction
test_sentence = "Recycling programs might seem beneficial, but they divert resources and focus from more pressing issues like global poverty and famine. While we’re busy sorting waste, millions of people are starving every day. Surely, as a society, our priorities should be on saving lives rather than debating about bins."


print(test_sentence)
print("Predicted Fallacy:", predict_fallacy(test_sentence))
print("Correct Fallacy is Appeal to worse ")

print("=====================================")


Recycling programs might seem beneficial, but they divert resources and focus from more pressing issues like global poverty and famine. While we’re busy sorting waste, millions of people are starving every day. Surely, as a society, our priorities should be on saving lives rather than debating about bins.
Predicted Fallacy: appeal to worse problems
Correct Fallacy is Appeal to worse 


In [None]:
# Slippery Slope
test_sentence = "If we ban cars, we’ll soon ban planes, and eventually, we’ll all be walking everywhere!"
print("Predicted Fallacy:", predict_fallacy(test_sentence))  # Expected: slippery slope
print("Expected Fallacy:", "slippery slope")
print("=====================================")

# Appeal to Worse Problems
test_sentence = "Why worry about plastic pollution when we have much bigger problems like world hunger?"
print("Predicted Fallacy:", predict_fallacy(test_sentence))  # Expected: appeal to worse problems
print("Expected Fallacy:", "appeal to worse problems")
print("=====================================")

# Appeal to Nature
test_sentence = "Eating organic food is better because it’s natural and free from artificial chemicals."
print("Predicted Fallacy:", predict_fallacy(test_sentence))  # Expected: appeal to nature
print("Expected Fallacy:", "appeal to nature")
print("=====================================")

# Appeal to Tradition
test_sentence = "We should continue this practice because it has been done this way for centuries."
print("Predicted Fallacy:", predict_fallacy(test_sentence))  # Expected: appeal to tradition
print("Expected Fallacy:", "appeal to tradition")
print("Expected Fallacy:", "appeal to tradition")
print("=====================================")

# False Dilemma
test_sentence = "You’re either with us, or you’re against us—there’s no middle ground."
print("Predicted Fallacy:", predict_fallacy(test_sentence))  # Expected: false dilemma
print("Expected Fallacy:", "false dilemma")
print("=====================================")

# Appeal to Majority
test_sentence = "Most people believe this is true, so it must be right."
print("Predicted Fallacy:", predict_fallacy(test_sentence))  # Expected: appeal to majority
print("Expected Fallacy:", "appeal to majority")
print("=====================================")

# Hasty Generalization
test_sentence = "My neighbor doesn’t recycle, so no one in this town cares about the environment."
print("Predicted Fallacy:", predict_fallacy(test_sentence))  # Expected: hasty generalization
print("Expected Fallacy:", "hasty generalization")
print("=====================================")

# Appeal to Authority
test_sentence = "This diet must be effective because a famous doctor endorses it."
print("Predicted Fallacy:", predict_fallacy(test_sentence))  # Expected: appeal to authority
print("Expected Fallacy:", "appeal to authority")


Predicted Fallacy: slippery slope
Expected Fallacy: slippery slope
Predicted Fallacy: appeal to worse problems
Expected Fallacy: appeal to worse problems
Predicted Fallacy: appeal to nature
Expected Fallacy: appeal to nature
Predicted Fallacy: appeal to tradition
Expected Fallacy: appeal to tradition
Expected Fallacy: appeal to tradition
Predicted Fallacy: false dilemma
Expected Fallacy: false dilemma
Predicted Fallacy: appeal to majority
Expected Fallacy: appeal to majority
Predicted Fallacy: hasty generalization
Expected Fallacy: hasty generalization
Predicted Fallacy: appeal to authority
Expected Fallacy: appeal to authority


## Save the Label Encoder

In [None]:
import joblib

# Save label encoder
joblib.dump(label_encoder, "label_encoder.pkl")

# Save model
model.save_pretrained("./saved_model")
tokenizer.save_pretrained("./saved_model")


('./saved_model/tokenizer_config.json',
 './saved_model/special_tokens_map.json',
 './saved_model/vocab.txt',
 './saved_model/added_tokens.json')