In [10]:
import pandas as pd
from transformers import T5Tokenizer, T5ForConditionalGeneration
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from torch.nn import CrossEntropyLoss
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
import numpy as np


In [6]:
# 1 ->209
# one -> 80
# zero ->5733
# 0 -> 632
def replace_token_ids(target_encoding, replacement_map):
    input_ids = target_encoding['input_ids']
    for old_id, new_id in replacement_map.items():
        input_ids[input_ids == old_id] = new_id  # Replace old ID with new ID
    target_encoding['input_ids'] = input_ids
    return target_encoding
replacement_map = {5733: 632, 80: 209}
label_map = {0: "No", 1: "Yes"}


In [7]:
class ToxicDataset(Dataset):
    def __init__(self, csv_file, tokenizer, max_length=128):
        self.data = pd.read_csv(csv_file)
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)
        
    def __getitem__(self, index):
        row = self.data.iloc[index]
        input_text = row['text']
        label = row['label']
        explanation = row['explanation']
        
        # Prompt the model with instructions
        input_prompt = f"Is the below statement toxic (Yes/No)? Provide a brief explanation (20 words max). Statement: {input_text}"
        # target_text = f"{label} Explanation: {explanation}"
        target_text = f"{label_map[label]} Explanation: {explanation}"
        
        input_encoding = self.tokenizer(
            # input_text,
            input_prompt,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )
        target_encoding = self.tokenizer(
            target_text,
            max_length=self.max_length,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )
        # target_encoding = replace_token_ids(target_encoding, replacement_map)
        
        return {
            "input_ids": input_encoding["input_ids"].squeeze(),
            "attention_mask": input_encoding["attention_mask"].squeeze(),
            "labels": target_encoding["input_ids"].squeeze(),
            "label_ids": label,
        }
            

In [8]:
# Initialize tokenizer and model
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-base")
# Add custom special tokens
special_tokens = {"bos_token": "<s>", "eos_token": "</s>"}
tokenizer.add_special_tokens(special_tokens)

# # Update the model to handle new tokens
model.resize_token_embeddings(len(tokenizer))


tokenizer_config.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


config.json:   0%|          | 0.00/1.40k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Embedding(32101, 768)

In [9]:
label = 1
explanation ="This is jkdbshja lsiuawkqear and prejudice against Asians.  It's untrue and inflammatory."
target_text = f"<s>{label_map[label]} Explanation: {explanation}</s>"
target_encoding = tokenizer(
            target_text,
            max_length=128,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
        )


print(target_encoding)

# Replace tokens
# updated_target_encoding = replace_token_ids(target_encoding, replacement_map)

# print(updated_target_encoding)


{'input_ids': tensor([[32100,  2163,  1881,  3767,   257,    10,   100,    19,     3,   354,
           157,    26,   115,     7,   107,  1191,     3,    40,     7,    23,
            76,     9,   210,   157,  1824,  2741,    11, 25244,   581,  6578,
             7,     5,    94,    31,     7,    73,  2666,    15,    11,     3,
         15329,     5,     1,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,  



In [10]:

#Yes 2163
#No 465
yes_token_id = tokenizer("Yes")
no_token_id = tokenizer("No")
print(yes_token_id)
print(no_token_id)
print(tokenizer.decode(4168))
print(tokenizer.decode(19739))
print(tokenizer.decode(2163))

{'input_ids': [2163, 1], 'attention_mask': [1, 1]}
{'input_ids': [465, 1], 'attention_mask': [1, 1]}
No
Yes
Yes


In [11]:
tokenizer("Yes")

{'input_ids': [2163, 1], 'attention_mask': [1, 1]}

In [12]:
# Load dataset and dataloader
dataset = ToxicDataset("/kaggle/input/geminiresponses1/response_train_filtered.csv", tokenizer)
dataloader = DataLoader(dataset, batch_size=16, shuffle=True)


In [13]:
# Optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

num_epochs = 8
loss_fn = CrossEntropyLoss()

In [None]:
for epoch in range(num_epochs):
    model.train()
    epoch_loss = 0
    for batch in dataloader:
        optimizer.zero_grad()
        
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["labels"].to(device)
        label_ids = batch["label_ids"].to(device)
        
        # Forward pass
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )
        

        logits = outputs.logits
        explanation_loss = outputs.loss
        label_logits = logits[:, 1, :]  

        label_token_ids = torch.tensor([465, 2163]).to(device)  


        label_logits = label_logits[:, label_token_ids]  # Shape: [batch_size, 2]


        label_loss = loss_fn(label_logits, label_ids)
        
        # Combine losses
        loss = explanation_loss + label_loss
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss / len(dataloader)}")

In [None]:
# Save the fine-tuned model
model.save_pretrained("fine_tuned_flan_t5_8")
tokenizer.save_pretrained("fine_tuned_flan_t5_8")

In [None]:
# Load the fine-tuned model
# from transformers import T5Tokenizer, T5ForConditionalGeneration

# tokenizer = T5Tokenizer.from_pretrained("fine_tuned_flan_t5")
# model = T5ForConditionalGeneration.from_pretrained("fine_tuned_flan_t5")
model.eval()
test_df = pd.read_csv("/kaggle/input/geminiresponses1/response_train_filtered.csv")

for i in range(10):
    input_text = test_df.iloc[i]["text"]
    input_prompt = f"Is the below statement toxic (Yes/No)? Provide a brief explanation (20 words max). Statement: {input_text}"
    input_encoding = tokenizer(input_prompt, return_tensors="pt", truncation=True, padding="max_length", max_length=128)
    input_ids = input_encoding["input_ids"].to(device)
    
    with torch.no_grad():
        outputs = model.generate(input_ids).to(device)
        decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print("Input:", input_text)
    print("Output:", decoded)

    if "Yes" in decoded.split("Output:")[-1]: 
            print("TOXIC")
            print()
    elif "No" in decoded.split("Output:")[-1]:  
        print("NOT TOXIC")
        print()

In [None]:
# from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
# def calculate_accuracy(model, tokenizer, data_df, max_length=128):
#     model.eval()
#     correct_count = 0
#     total_count = 0
#     all_preds = []
#     all_labels = []

#     for i in range(len(data_df)):
#         input_text = data_df.iloc[i]["text"]
#         true_label = data_df.iloc[i]["label"]  

#         # Create the input prompt
#         input_prompt = f"Is the below statement toxic (Yes/No)? Provide a brief explanation (20 words max). Statement: {input_text}"
#         input_encoding = tokenizer(
#             input_prompt,
#             return_tensors="pt",
#             truncation=True,
#             padding="max_length",
#             max_length=max_length
#         )
#         input_ids = input_encoding["input_ids"].to(device)

#         with torch.no_grad():
#             outputs = model.generate(input_ids, max_length=max_length)
#             decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

      
#         predicted_label = -1  
#         if "Yes" in decoded.split("Output:")[-1]:  
#             predicted_label = 1
#         elif "No" in decoded.split("Output:")[-1]:  
#             predicted_label = 0

#         # if predicted_label == true_label:
#         #     correct_count += 1
#         all_preds.append(predicted_label)
#         all_labels.append(true_label)

#     # Calculate accuracy
#     # accuracy = correct_count / len(data_df)
#     accuracy = accuracy_score(all_labels, all_preds)
#     f1 = f1_score(all_labels, all_preds)
#     auc = roc_auc_score(all_labels, all_preds)
#     return accuracy, f1, auc




In [None]:
# # Calculate test accuracy
# test_accuracy, test_f1, test_auc = calculate_accuracy(
#     model, tokenizer, pd.read_csv("/kaggle/input/geminiresponses1/response_test_filtered.csv")
# )


In [None]:
# print(f"Test Accuracy: {test_accuracy:.2%}")
# print(f"Test F1: {test_f1 : .2}")
# print(f"Test AUC: {test_auc : .2}")

In [None]:
# Save the fine-tuned model
model.save_pretrained("fine_tuned_flan_t5_8")
tokenizer.save_pretrained("fine_tuned_flan_t5_8")

In [11]:
def calculate_accuracy_1(model, tokenizer, data_df, max_length=128):
    model.eval()
    correct_count = 0
    total_count = 0
    all_preds = []
    all_labels = []

    for i in range(len(data_df)):
        input_text = data_df.iloc[i]["text"]
        true_label = data_df.iloc[i]["toxicity_human"]  

        # Create the input prompt
        input_prompt = f"Is the below statement toxic (Yes/No)? Provide a brief explanation (20 words max). Statement: {input_text}"
        input_encoding = tokenizer(
            input_prompt,
            return_tensors="pt",
            truncation=True,
            padding="max_length",
            max_length=max_length
        )
        input_ids = input_encoding["input_ids"].to(device)

        with torch.no_grad():
            outputs = model.generate(input_ids, max_length=max_length)
            decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

      
        predicted_label = -1  
        if "Yes" in decoded.split("Output:")[-1]:  
            predicted_label = 1
        elif "No" in decoded.split("Output:")[-1]:  
            predicted_label = 0

        # if predicted_label == true_label:
        #     correct_count += 1
        all_preds.append(predicted_label)
        all_labels.append(true_label)

    # Calculate accuracy
    # accuracy = correct_count / len(data_df)
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_preds)
    return accuracy, f1, auc, all_labels, all_preds

In [None]:
# Calculate test accuracy
test_accuracy, test_f1, test_auc = calculate_accuracy_1(
    model, tokenizer, pd.read_csv("/kaggle/input/toxichuman/test_response.csv")
)


In [None]:
print(f"Test Accuracy: {test_accuracy:.2%}")
print(f"Test F1: {test_f1 : .2}")
print(f"Test AUC: {test_auc : .2}")

In [None]:
# from transformers import T5Tokenizer, T5ForConditionalGeneration
# import torch


# # Path to the fine-tuned model directory
# model_dir = "/kaggle/input/finalmodel/pytorch/default/1"

# # Load the tokenizer
# tokenizer = T5Tokenizer.from_pretrained(model_dir)

# # Load the model
# model = T5ForConditionalGeneration.from_pretrained(model_dir)

# # Move the model to the device (e.g., GPU if available)
# device = "cuda" if torch.cuda.is_available() else "cpu"
# model.to(device)

# print("Model and tokenizer successfully loaded.")


Model and tokenizer successfully loaded.


In [13]:
# Calculate test accuracy
test_accuracy, test_f1, test_auc, true_labels, pred_labels = calculate_accuracy_1(
    model, tokenizer, pd.read_csv("/kaggle/input/toxichuman/test_response.csv")
)


In [14]:
print(f"Test Accuracy: {test_accuracy:.2%}")
print(f"Test F1: {test_f1 : .2}")
print(f"Test AUC: {test_auc : .2}")

Test Accuracy: 79.91%
Test F1:  0.77
Test AUC:  0.8


In [15]:
import numpy as np

def calculate_percentages(true_labels, pred_labels):
    # Convert to numpy arrays for easy indexing
    true_labels = np.array(true_labels)
    pred_labels = np.array(pred_labels)

    # Calculate TP, TN, FP, FN
    TP = np.sum((true_labels == 1) & (pred_labels == 1))
    TN = np.sum((true_labels == 0) & (pred_labels == 0))
    FP = np.sum((true_labels == 0) & (pred_labels == 1))
    FN = np.sum((true_labels == 1) & (pred_labels == 0))

    # Total samples
    total_samples = len(true_labels)

    # Calculate percentages
    TP_percentage = (TP / total_samples) * 100
    TN_percentage = (TN / total_samples) * 100
    FP_percentage = (FP / total_samples) * 100
    FN_percentage = (FN / total_samples) * 100

    return TP_percentage, TN_percentage, FP_percentage, FN_percentage

TP_percentage, TN_percentage, FP_percentage, FN_percentage = calculate_percentages(true_labels, pred_labels)

print(f"True Positive Percentage: {TP_percentage:.2f}%")
print(f"True Negative Percentage: {TN_percentage:.2f}%")
print(f"False Positive Percentage: {FP_percentage:.2f}%")
print(f"False Negative Percentage: {FN_percentage:.2f}%")

True Positive Percentage: 33.22%
True Negative Percentage: 46.69%
False Positive Percentage: 12.69%
False Negative Percentage: 7.40%


In [19]:
def calculate_accuracy_2(model, tokenizer, data_df, max_length=128):
    model.eval()
    correct_count = 0
    total_count = 0
    all_preds = []
    all_labels = []

    for i in tqdm(range(len(data_df))):
        input_text = data_df.iloc[i]["text"]
        true_label = data_df.iloc[i]["toxic"]  

        # Create the input prompt
        input_prompt = f"Is the below statement toxic (Yes/No)? Provide a brief explanation (20 words max). Statement: {input_text}"
        input_encoding = tokenizer(
            input_prompt,
            return_tensors="pt",
            truncation=True,
            padding="max_length",
            max_length=max_length
        )
        input_ids = input_encoding["input_ids"].to(device)

        with torch.no_grad():
            outputs = model.generate(input_ids, max_length=max_length)
            decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)

      
        predicted_label = -1  
        if "Yes" in decoded.split("Output:")[-1]:  
            predicted_label = 1
        elif "No" in decoded.split("Output:")[-1]:  
            predicted_label = 0

        # if predicted_label == true_label:
        #     correct_count += 1
        all_preds.append(predicted_label)
        all_labels.append(true_label)

    # Calculate accuracy
    # accuracy = correct_count / len(data_df)
    accuracy = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds)
    auc = roc_auc_score(all_labels, all_preds)
    return accuracy, f1, auc, all_labels, all_preds

In [22]:
# Calculate test accuracy
test_accuracy, test_f1, test_auc, true_labels, pred_labels = calculate_accuracy_2(
    model, tokenizer, pd.read_csv("/kaggle/input/dataset1/toxicity_parsed_dataset.csv")
)


100%|██████████| 4000/4000 [35:29<00:00,  1.88it/s]


In [23]:
print(f"Test Accuracy: {test_accuracy:.2%}")
print(f"Test F1: {test_f1 : .2}")
print(f"Test AUC: {test_auc : .2}")

Test Accuracy: 86.83%
Test F1:  0.87
Test AUC:  0.87


In [24]:
TP_percentage, TN_percentage, FP_percentage, FN_percentage = calculate_percentages(true_labels, pred_labels)

print(f"True Positive Percentage: {TP_percentage:.2f}%")
print(f"True Negative Percentage: {TN_percentage:.2f}%")
print(f"False Positive Percentage: {FP_percentage:.2f}%")
print(f"False Negative Percentage: {FN_percentage:.2f}%")

True Positive Percentage: 44.98%
True Negative Percentage: 41.85%
False Positive Percentage: 8.15%
False Negative Percentage: 5.03%


In [21]:
# white slavery (18th-19th century. mostly white women. still going on in some places) - people of color in general
# obama is a muslim,
# i was walking down the street the other day and i saw a group of about 5 latino men walking towards me.
# i dont want to live in a world where i have to learn mandarin to get along
# if we don't learn about the past, we wont learn about the present and future2I
# not all of them eat cats and dogs, some of them don't even know what that is   

In [36]:
input_text = "if we don't learn about the past, we wont learn about the present and future2I"
input_prompt = f"Is the below statement toxic (Yes/No)? Provide a brief explanation (20 words max). Statement: {input_text}"
input_encoding = tokenizer(
    input_prompt,
    return_tensors="pt",
    truncation=True,
    padding="max_length",
    max_length=128
)
input_ids = input_encoding["input_ids"].to(device)

with torch.no_grad():
    outputs = model.generate(input_ids, max_length=128)
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(input_text)
    print(decoded)

if we don't learn about the past, we wont learn about the present and future2I
Yes Explanation: This statement is a harmful and untrue generalization that promotes a harmful and untrue stereotype about people. It's prejudiced and harmful.
