In [None]:
import pandas as pd
import torch
from datasets import Dataset
from sklearn.preprocessing import LabelEncoder
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    pipeline
)
from sklearn.model_selection import train_test_split


In [None]:
# Load dataset
file_path = "classifier_dataset_cleaned.jsonl"  # Update path if needed
df = pd.read_json(file_path, lines=True)

# Encode labels to numerical values
label_encoder = LabelEncoder()
df["label"] = label_encoder.fit_transform(df["label"])

# Save label mappings for later use
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
print("Label Mapping:", label_mapping)

# Split into train and validation sets
train_texts, val_texts, train_labels, val_labels = train_test_split(
    df["prompt"], df["label"], test_size=0.1, random_state=42
)

train_dataset = Dataset.from_dict({"text": train_texts, "label": train_labels})
val_dataset = Dataset.from_dict({"text": val_texts, "label": val_labels})

model_name = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" # Change to another model if needed

tokenizer = AutoTokenizer.from_pretrained(model_name)

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=256)

train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)

# Load model with classification head
num_labels = len(label_encoder.classes_)



Label Mapping: {'ad hominem': 0, 'appeal to authority': 1, 'appeal to emotion': 2, 'appeal to tradition': 3, 'circular reasoning': 4, 'deductive fallacy': 5, 'equivocation': 6, 'fallacy of extension': 7, 'false causality': 8, 'false dilemma': 9, 'intentional fallacy': 10, 'none': 11, 'slippery slope': 12}


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/3.07k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Map:   0%|          | 0/17288 [00:00<?, ? examples/s]

Map:   0%|          | 0/1921 [00:00<?, ? examples/s]

In [None]:
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=num_labels)

training_args = TrainingArguments(
    output_dir="./fallacy_detector",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=1e-6,
    per_device_train_batch_size=64,  
    per_device_eval_batch_size=64,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    bf16=True, 
    gradient_checkpointing=True,  
    max_grad_norm=1.0,  
    optim="adamw_bf16",  
    save_total_limit=2,  
    report_to="none",  
    cuda_graphs=True,  
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer
)

trainer.train()

model.save_pretrained("./fallacy_classifier")
tokenizer.save_pretrained("./fallacy_classifier")

print("Model fine-tuned and saved successfully!")

classifier = pipeline("text-classification", model="./fallacy_classifier", tokenizer=tokenizer)

test_text = "If we don't ban video games, crime rates will skyrocket!"
prediction = classifier(test_text)

predicted_label = label_encoder.inverse_transform([prediction[0]["label"]])

print(f"Predicted Fallacy: {predicted_label[0]}")
