<a href="https://colab.research.google.com/github/zihanltesla/Deep-Learning/blob/main/AI_Generated_Text_Detection_via_Disentangled_Style_Content_Attention.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
uploaded = files.upload()

Saving train.jsonl to train.jsonl


In [None]:
import pandas as pd

df = pd.read_json("train.jsonl", lines=True)
print(df.columns)

Index(['id', 'text', 'model', 'label', 'genre'], dtype='object')


In [None]:
df = df[["text", "label"]]
print(df["label"].unique())

[1 0]


In [None]:
from google.colab import drive
drive.mount('/content/drive')
df.to_csv("/content/drive/My Drive/training_dataset.csv", index=False)

Mounted at /content/drive


In [None]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')
train_df = pd.read_csv("/content/drive/My Drive/training_dataset.csv")
train_df.head()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,text,label
0,"Duke Ellington, a titan of jazz, revolutionize...",1
1,I reflected on the shifting dynamics of media ...,1
2,"In F. Scott Fitzgerald's ""The Great Gatsby,"" t...",1
3,I still chuckle when I think about that time I...,1
4,"Yoga, originating in ancient India, encompasse...",1


In [None]:
from google.colab import files
uploaded = files.upload()

Saving val.jsonl to val.jsonl


In [None]:
import pandas as pd

df = pd.read_json("val.jsonl", lines=True)
print(df.columns)

Index(['id', 'text', 'model', 'label', 'genre'], dtype='object')


In [None]:
df = df[["text", "label"]]
print(df["label"].unique())

[1 0]


In [None]:
from google.colab import drive
drive.mount('/content/drive')
df.to_csv("/content/drive/My Drive/validation_dataset.csv", index=False)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
from google.colab import drive
drive.mount('/content/drive')
val_df = pd.read_csv("/content/drive/My Drive/validation_dataset.csv")
val_df.head()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,text,label
0,"In William Faulkner's ""The Sound and the Fury,...",1
1,"Manipulation, a profound and pervasive theme i...",1
2,Edna's journey is a testament to her rebellion...,1
3,There are three main aspects of the gun contro...,0
4,During the Portuguese colonial period in Angol...,1


In [None]:
import pandas as pd
import spacy
import textstat
import numpy as np
import re
from tqdm import tqdm
tqdm.pandas()

nlp = spacy.load("en_core_web_sm")

slang = {"lol", "brb", "idk", "imo", "smh", "tbh", "rofl", "lmao"}
uppercase_pattern = re.compile(r"[A-Z]{2,}")
url_pattern = re.compile(r"https?://|www\.")
subject_pronouns = {"i", "we", "you", "he", "she", "they", "it"}
function_words = {"the", "is", "and", "but", "or", "because", "as", "that"}

def extract_features(text):
    doc = nlp(text)

    punctuation_count = sum(1 for char in text if char in ".,!?;:-")
    uppercase_ratio = len(uppercase_pattern.findall(text)) / (len(text) + 1)
    url_count = len(url_pattern.findall(text))
    slang_count = sum(1 for token in doc if token.text.lower() in slang)

    tokens = [token.text.lower() for token in doc if token.is_alpha]
    type_token_ratio = len(set(tokens)) / (len(tokens) + 1) if tokens else 0

    function_word_count = sum(1 for token in doc if token.text.lower() in function_words)

    sentence_lengths = [len(sent) for sent in doc.sents]
    avg_sentence_length = np.mean(sentence_lengths) if sentence_lengths else 0
    readability_score = textstat.flesch_reading_ease(text)

    has_subject = 0
    is_pronoun_subject = 0
    is_noun_subject = 0
    for token in doc:
        if token.dep_ in {"nsubj", "nsubjpass"}:
            has_subject = 1
            if token.text.lower() in subject_pronouns:
                is_pronoun_subject = 1
            else:
                is_noun_subject = 1
            break

    return [
        function_word_count,
        punctuation_count,
        type_token_ratio,
        avg_sentence_length,
        readability_score,
        uppercase_ratio,
        slang_count,
        url_count,
        has_subject,
        is_pronoun_subject,
        is_noun_subject
    ]


train_df["features"] = train_df["text"].progress_apply(extract_features)

val_df["features"] = val_df["text"].progress_apply(extract_features)

train_df.to_pickle("/content/drive/My Drive/ai_detect_train.pkl")
val_df.to_pickle("/content/drive/My Drive/ai_detect_val.pkl")

print("Feature extraction completed!")


100%|██████████| 23707/23707 [1:06:45<00:00,  5.92it/s]
100%|██████████| 3589/3589 [10:11<00:00,  5.87it/s]


Feature extraction completed!


In [None]:
import torch
from torch.utils.data import Dataset
from transformers import AutoTokenizer
import pandas as pd

class AIDetectionDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length=512, mode="train"):
        self.df = dataframe
        self.tokenizer = tokenizer
        self.max_length = max_length
        self.mode = mode

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        text = row["text"]

        encoded = self.tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=self.max_length,
            return_tensors="pt"
        )

        features = torch.tensor(row["features"], dtype=torch.float)

        if self.mode == "train":
            label = torch.tensor(row["label"], dtype=torch.long)
            return {
                "input_ids": encoded["input_ids"].squeeze(0),
                "attention_mask": encoded["attention_mask"].squeeze(0),
                "features": features,
                "labels": label
            }
        else:
            return {
                "input_ids": encoded["input_ids"].squeeze(0),
                "attention_mask": encoded["attention_mask"].squeeze(0),
                "features": features
            }


In [None]:
from transformers import AutoTokenizer
from torch.utils.data import DataLoader
from google.colab import drive
drive.mount('/content/drive')

tokenizer = AutoTokenizer.from_pretrained("microsoft/deberta-v3-small")


train_df = pd.read_pickle("/content/drive/My Drive/ai_detect_train.pkl")
val_df = pd.read_pickle("/content/drive/My Drive/ai_detect_val.pkl")

train_dataset = AIDetectionDataset(train_df, tokenizer, mode="train")
val_dataset = AIDetectionDataset(val_df, tokenizer, mode="train")

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from transformers import AutoModel
from torch.optim import AdamW
from sklearn.metrics import accuracy_score, f1_score

class SingleInputFactorizedAttentionModel(nn.Module):
    def __init__(self,
                 pretrained_model_name="microsoft/deberta-v3-small",
                 num_handcrafted_features=11,
                 hidden_dim=256,
                 dropout=0.1):
        super().__init__()

        # Pretrained encoder
        self.encoder = AutoModel.from_pretrained(pretrained_model_name)
        hidden_size = self.encoder.config.hidden_size

        # Style and Content attention
        self.style_attention = nn.Linear(hidden_size, 1)
        self.content_attention = nn.Linear(hidden_size, 1)

        # Projections
        self.style_proj = nn.Linear(hidden_size, hidden_dim)
        self.content_proj = nn.Linear(hidden_size, hidden_dim)
        self.feature_proj = nn.Linear(num_handcrafted_features, hidden_dim)

        # Final classifier
        self.classifier = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(3 * hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, 2)
        )

    def forward(self, input_ids, attention_mask, features, return_loss=False, labels=None):
        # BERT encoding
        outputs = self.encoder(input_ids=input_ids, attention_mask=attention_mask)
        hidden_states = outputs.last_hidden_state

        # Attention weights
        style_weights = torch.softmax(self.style_attention(hidden_states).squeeze(-1), dim=1)
        content_weights = torch.softmax(self.content_attention(hidden_states).squeeze(-1), dim=1)

        # Weighted sum
        style_repr = torch.sum(style_weights.unsqueeze(-1) * hidden_states, dim=1)
        content_repr = torch.sum(content_weights.unsqueeze(-1) * hidden_states, dim=1)

        # Projection
        style_vec = self.style_proj(style_repr)
        content_vec = self.content_proj(content_repr)
        feat_vec = self.feature_proj(features)

        # Final concat
        joint_repr = torch.cat([style_vec, content_vec, feat_vec], dim=1)
        logits = self.classifier(joint_repr)

        # Output
        if return_loss and labels is not None:
            # Orthogonality loss
            ortho_loss = torch.mean(torch.sum(style_vec * content_vec, dim=1) ** 2)

            # Feature alignment loss
            align_loss = torch.mean((style_vec - feat_vec) ** 2)

            # Classification loss
            cls_loss = nn.CrossEntropyLoss()(logits, labels)

            total_loss = cls_loss + 0.01 * ortho_loss + 0.01 * align_loss
            return logits, total_loss
        else:
            return logits


In [None]:
from torch.cuda.amp import autocast, GradScaler

scaler = GradScaler()

def train_model(model, train_loader, optimizer, device, epochs=3):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        torch.cuda.empty_cache()
        for batch in train_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            features = batch["features"].to(device)
            labels = batch["labels"].to(device)

            with autocast():
                logits, loss = model(
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    features=features,
                    return_loss=True,
                    labels=labels
                )

            optimizer.zero_grad()
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            total_loss += loss.item()

        print(f"Epoch {epoch + 1}/{epochs} | Avg Loss: {total_loss / len(train_loader):.4f}")

  scaler = GradScaler()


In [None]:
@torch.no_grad()
def evaluate_model(model, val_loader, device):
    model.eval()
    preds, targets = [], []

    for batch in val_loader:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        features = batch["features"].to(device)
        labels = batch["labels"].to(device)

        logits = model(input_ids=input_ids, attention_mask=attention_mask, features=features)
        pred_labels = torch.argmax(logits, dim=1)

        preds.extend(pred_labels.cpu().tolist())
        targets.extend(labels.cpu().tolist())

    acc = accuracy_score(targets, preds)
    f1 = f1_score(targets, preds, average="macro")
    print(f"Validation Accuracy: {acc:.4f} | Macro F1: {f1:.4f}")
    return acc, f1

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = SingleInputFactorizedAttentionModel().to(device)
optimizer = AdamW(model.parameters(), lr=1e-5, weight_decay=1e-4)


train_model(model, train_loader, optimizer, device, epochs=3)
evaluate_model(model, val_loader, device)

  with autocast():


Epoch 1/3 | Avg Loss: 5.9289
Epoch 2/3 | Avg Loss: 4.5793
Epoch 3/3 | Avg Loss: 3.6442
Validation Accuracy: 0.9398 | Macro F1: 0.9316


(0.9398161047645583, 0.9316431587078207)