In [None]:
import torch
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset,DataLoader
from transformers import AutoTokenizer
from bs4 import BeautifulSoup

In [None]:
from transformers import AutoModelForSequenceClassification,AutoTokenizer

model = AutoModelForSequenceClassification.from_pretrained(
    "distilbert-base-uncased", num_labels=1)
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
class LoRALayer(torch.nn.Module):
    def __init__(self, in_dim, out_dim, rank, alpha):
        super().__init__()
        std_dev = 1 / torch.sqrt(torch.tensor(rank).float())
        self.A = torch.nn.Parameter(torch.randn(in_dim, rank) * std_dev)
        self.B = torch.nn.Parameter(torch.zeros(rank, out_dim))
        self.alpha = alpha

    def forward(self, x):
        x = self.alpha * (x @ self.A @ self.B)
        return x

In [None]:
class LinearWithLoRA(torch.nn.Module):
    def __init__(self, linear, rank, alpha):
        super().__init__()
        self.linear = linear
        self.lora = LoRALayer(
            linear.in_features, linear.out_features, rank, alpha
        )

    def forward(self, x):
        return self.linear(x) + self.lora(x)

In [None]:
for param in model.parameters():
    param.requires_grad = False

In [None]:
print(model)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): Linear(in_features=768, out_features=768, bias=True)
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): Linear(in_features=768, out_features=768, bias=True)
            (out_lin): Linear(in_features=768, out_features=768, bias=True)
          )
          (sa_layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (ffn): FFN(
            (dropout): Dropout(p=0.1, inplace=False)


In [None]:
from functools import partial

# default hyperparameter choices
lora_r = 4
lora_alpha = 16
lora_dropout = 0.05
lora_query = True
lora_key = False
lora_value = True
lora_projection = False
lora_mlp = False
lora_head = False

layers = []

assign_lora = partial(LinearWithLoRA, rank=lora_r, alpha=lora_alpha)

for layer in model.distilbert.transformer.layer:
    if lora_query:
        layer.attention.q_lin = assign_lora(layer.attention.q_lin)
    if lora_key:
        layer.attention.k_lin = assign_lora(layer.attention.k_lin)
    if lora_value:
        layer.attention.v_lin = assign_lora(layer.attention.v_lin)
    if lora_projection:
        layer.attention.out_lin = assign_lora(layer.attention.out_lin)
    if lora_mlp:
        layer.ffn.lin1 = assign_lora(layer.ffn.lin1)
        layer.ffn.lin2 = assign_lora(layer.ffn.lin2)
if lora_head:
    model.pre_classifier = assign_lora(model.pre_classifier)
    model.classifier = assign_lora(model.classifier)

In [None]:
print(model)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): LinearWithLoRA(
              (linear): Linear(in_features=768, out_features=768, bias=True)
              (lora): LoRALayer()
            )
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): LinearWithLoRA(
              (linear): Linear(in_features=768, out_features=768, bias=True)
              (lora): LoRALayer()
            )
            (out_lin): Linear(in_features=768, out_features=768, bia

In [None]:
from bs4 import BeautifulSoup

def clean_html(text):
    return BeautifulSoup(text, "html.parser").get_text()


In [None]:
data = pd.read_csv("Test.csv")

In [None]:
df= data [:2000]

In [None]:
data.shape

(5000, 2)

In [None]:
X = df['text']
y = df['label']

In [None]:
X_train ,X_test,y_train,y_test = train_test_split(X,y, test_size = 0.2 , random_state =42)

In [None]:
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)


In [None]:
X_train

Unnamed: 0,text
0,Do-It-Yourself indie horror auteur Todd Sheets...
1,"All I can say is, this movie is made for the L..."
2,"I have never read the book""A wrinkle in time""...."
3,I can remember a college professor commenting ...
4,Seeing as Keifer Sutherland plays my favorite ...
...,...
1595,Another of those flimsy stories coupled with m...
1596,"This is a wonderful look, you should pardon th..."
1597,"OK, here is the deal. I love action movies and..."
1598,In an industry dominated by men and in lack of...


In [None]:
y_train[1]

np.int64(0)

In [None]:
def clean_html(text):
    return BeautifulSoup(text, "html.parser").get_text()

class SentimentDataset(Dataset):
    def __init__(self, texts, labels, tokenizer_name="distilbert-base-uncased", max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        raw_text = self.texts[idx]
        label = self.labels[idx]

        # Clean HTML before tokenization
        clean_text = clean_html(raw_text)


        # Tokenize the cleaned text
        encoding = self.tokenizer(
            clean_text,
            padding='max_length',
            truncation=True,
            max_length=self.max_length,
            return_tensors='pt'
        )

        return {
            'input_ids': encoding['input_ids'].squeeze(0),         # [max_length]
            'attention_mask': encoding['attention_mask'].squeeze(0), # [max_length]
            'label': torch.tensor(label, dtype=torch.float)
        }


In [None]:
# Assuming you already have X_train, X_test, y_train, y_test as lists
train_dataset = SentimentDataset(X_train, y_train)
test_dataset = SentimentDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
len(train_loader)

50

In [None]:
criterion = nn.BCEWithLogitsLoss()

In [None]:
optimizer = torch.optim.Adam(model.parameters(), lr=2e-5, weight_decay=0.01)


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model.to(device)

DistilBertForSequenceClassification(
  (distilbert): DistilBertModel(
    (embeddings): Embeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (layer): ModuleList(
        (0-5): 6 x TransformerBlock(
          (attention): DistilBertSdpaAttention(
            (dropout): Dropout(p=0.1, inplace=False)
            (q_lin): LinearWithLoRA(
              (linear): Linear(in_features=768, out_features=768, bias=True)
              (lora): LoRALayer()
            )
            (k_lin): Linear(in_features=768, out_features=768, bias=True)
            (v_lin): LinearWithLoRA(
              (linear): Linear(in_features=768, out_features=768, bias=True)
              (lora): LoRALayer()
            )
            (out_lin): Linear(in_features=768, out_features=768, bia

In [None]:
epochs = 5
from tqdm import tqdm

In [None]:
for epoch in range(epochs):
    print(f"\n🟢 Epoch {epoch+1}/{epochs}")
    model.train()
    total_train_loss = 0

    train_bar = tqdm(train_loader, desc="Training", leave=False)
    for batch in train_bar:
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        labels = batch["label"].to(device).unsqueeze(1)  # shape: [batch_size, 1]

        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        logits = outputs.logits

        loss = criterion(logits, labels)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()
        train_bar.set_postfix(loss=loss.item())

    avg_train_loss = total_train_loss / len(train_loader)
    print(f"✅ Avg Training Loss: {avg_train_loss:.4f}")

    # ✅ Evaluation
    model.eval()
    total = 0
    correct = 0

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            labels = batch["label"].to(device).unsqueeze(1)

            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            logits = outputs.logits
            probs = torch.sigmoid(logits)

            preds = (probs > 0.5).float()
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    accuracy = correct / total
    print(f"🎯 Test Accuracy: {accuracy:.4f}")



🟢 Epoch 1/5




✅ Avg Training Loss: 0.6738
🎯 Test Accuracy: 0.7375

🟢 Epoch 2/5




✅ Avg Training Loss: 0.5134
🎯 Test Accuracy: 0.8300

🟢 Epoch 3/5




✅ Avg Training Loss: 0.3710
🎯 Test Accuracy: 0.8675

🟢 Epoch 4/5




✅ Avg Training Loss: 0.3272
🎯 Test Accuracy: 0.8575

🟢 Epoch 5/5




✅ Avg Training Loss: 0.2812
🎯 Test Accuracy: 0.8750


In [None]:
def count_trainable_parameters(model):
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print(f"Total parameters: {total_params:,}")
    print(f"Trainable parameters: {trainable_params:,}")
    return total_params, trainable_params


In [None]:
# Save entire model
torch.save(model.state_dict(), "model_weights.pth")


In [None]:
count_trainable_parameters(model)

Total parameters: 67,027,969
Trainable parameters: 73,728


(67027969, 73728)

In [None]:
import pickle

with open("model.pkl", "wb") as f:
    pickle.dump(model, f)


In [None]:
import pickle

with open("model.pkl", "rb") as f:
    model = pickle.load(f)

model.eval()
