In [1]:
from functools import reduce

import pandas as pd
from transformers import (
    AutoTokenizer, 
    XLMRobertaForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
)
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import numpy as np
import os
from dataclasses import dataclass
import wandb

import pennylane as qml

os.environ['CUDA_VISIBLE_DEVICES'] = '5'
os.environ['TOKENIZERS_PARALLELISM'] = 'false'

os.environ['WANDB_PROJECT'] = 'quantum-peft-for-sentiment-analysis'
os.environ['WANDB_ENTITY'] = 'uvd174'
os.environ['WANDB_WATCH'] = 'false'



In [2]:
class QuantumLayer(nn.Module):
    def __init__(self, n_qubits: int):
        super().__init__()
        self.n_qubits = n_qubits
        
        self._dev = qml.device('default.qubit', wires=n_qubits)
        self._weight_shapes = {'weights': (3, n_qubits, 3)}
        
        @qml.qnode(self._dev)
        def qnode(inputs, weights):
            qml.templates.AngleEmbedding(inputs, wires=range(self.n_qubits))
            qml.templates.StronglyEntanglingLayers(weights, wires=range(self.n_qubits))
            return [qml.expval(qml.PauliZ(wires=i)) for i in range(self.n_qubits)]
        
        self.qlayer = qml.qnn.TorchLayer(qnode, self._weight_shapes)

    def forward(self, x):
        return self.qlayer(x)


@dataclass
class QuantumLoraConfig:
    r: int
    lora_alpha: int
    lora_dropout: float


class QuantumLoraAdapter(nn.Module):
    def __init__(self, layer: nn.Module, config: QuantumLoraConfig):
        super().__init__()
        assert isinstance(layer, nn.Linear), 'Layer must be an instance of nn.Linear'
        
        self.config = config
        
        self.lora_A = nn.Linear(layer.in_features, config.r, bias=False)
        self.lora_B = nn.Linear(config.r, layer.out_features, bias=False)
        nn.init.zeros_(self.lora_B.weight)
        self.scaling = config.lora_alpha / config.r
        self.dropout = nn.Dropout(p=config.lora_dropout)
        
        self.quantum_layer = QuantumLayer(config.r)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.lora_B(self.quantum_layer(self.lora_A(self.dropout(x)))) * self.scaling

class QuantumLoraLinear(nn.Module):
    def __init__(self, layer: nn.Linear, config: QuantumLoraConfig):
        super().__init__()
        self.base_layer = layer
        self.lora = QuantumLoraAdapter(layer, config)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.base_layer(x) + self.lora(x)

In [3]:
def apply_quantum_lora_to_xlm_roberta_model(
        model: XLMRobertaForSequenceClassification,
        lora_config: QuantumLoraConfig,
):
    for name, module in model.named_modules():
        parent_name = '.'.join(name.split('.')[:-1])
        module_name = name.split('.')[-1]
        
        if isinstance(module, nn.Linear) and ('query' in module_name or 'key' in module_name or 'value' in module_name):
            parent = model
            if parent_name:
                parent = reduce(getattr, parent_name.split('.'), model)
            
            quantum_layer = QuantumLoraLinear(module, lora_config)
            setattr(parent, module_name, quantum_layer)
    
    return model

In [4]:
# Read the dataset from CSV file
df = pd.read_csv('task-3-dataset.csv')
df.head()

Unnamed: 0,отзывы,разметка
0,Оболочка после чистого андроида тоже очень нек...,-
1,"Нормальный телефон, очень красивая задняя панель",+
2,Деньги на ветер .,-
3,ну так себе,-
4,Ценник вполне адекватный для такой мощной нови...,+


In [5]:
# Display target distribution
sentiment_counts = df['разметка'].value_counts()
print("Target Distribution:")
print(sentiment_counts)
print("\nPercentages:")
print(sentiment_counts / len(df) * 100)

Target Distribution:
разметка
+    121
-     89
Name: count, dtype: int64

Percentages:
разметка
+    57.619048
-    42.380952
Name: count, dtype: float64


In [6]:
# Create dataset class
class SentimentDataset(torch.utils.data.Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.encodings = tokenizer(texts, truncation=True)
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item['labels'] = self.labels[idx]
        return item

    def __len__(self):
        return len(self.labels)

In [None]:
# Load pretrained model and tokenizer
model_name = 'FacebookAI/xlm-roberta-large'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = XLMRobertaForSequenceClassification.from_pretrained(model_name, num_labels=2)

# Configure LoRA parameters
lora_config = QuantumLoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.1,
)
model = apply_quantum_lora_to_xlm_roberta_model(model, lora_config)

# Freeze all layers
for param in model.parameters():
    param.requires_grad = False
# Unfreeze lora adapters
for name, module in model.named_modules():
    if isinstance(module, QuantumLoraAdapter):
        for param in module.parameters():
            param.requires_grad = True
# Unfreeze the classifier
for param in model.classifier.parameters():
    param.requires_grad = True

# Move model to GPU if available
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Prepare data
texts = df['отзывы'].tolist()
labels = (df['разметка'] == '+').astype(int).tolist()

# Split data
train_texts, val_texts, train_labels, val_labels = train_test_split(
    texts, labels, test_size=0.05, random_state=42, stratify=labels,
)

# Create datasets
train_dataset = SentimentDataset(train_texts, train_labels, tokenizer)
val_dataset = SentimentDataset(val_texts, val_labels, tokenizer)

# Define metrics
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return {
        'accuracy': (predictions == labels).mean()
    }

# Set up training arguments
training_args = TrainingArguments(
    output_dir='final-results',
    num_train_epochs=70,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=256,
    warmup_ratio=0.1,
    learning_rate=1e-4,
    weight_decay=0.01,
    logging_steps=10,
    eval_strategy='epoch',
    save_strategy='epoch',
    save_total_limit=6,
    load_best_model_at_end=True,
    metric_for_best_model='eval_accuracy',
    greater_is_better=True,
    dataloader_num_workers=8,
    dataloader_pin_memory=True,
    save_safetensors=False,
    report_to='wandb',
    run_name='xlm-roberta-large',
)

# Create trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    data_collator=DataCollatorWithPadding(tokenizer=tokenizer),
    compute_metrics=compute_metrics,
)

# Train the model
try:
    trainer.train()
except Exception as e:
    raise e
finally:
    wandb.finish()
# Save the model
trainer.save_model()

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33muvd174[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch,Training Loss,Validation Loss,Accuracy
1,0.7303,0.704864,0.454545
2,0.7031,0.694084,0.545455
3,0.6773,0.701789,0.545455
4,0.6859,0.696995,0.545455
5,0.6727,0.704966,0.545455
6,0.6493,0.718236,0.545455
7,0.6809,0.704474,0.545455
8,0.6533,0.702782,0.545455
9,0.6584,0.725158,0.545455
10,0.6349,0.701692,0.454545


In [21]:
# Read the test data from CSV file
test_df = pd.read_csv('test50.csv')
test_df.head()

Unnamed: 0,Отзывы,разметка
0,"Очень разочарован QPhone. Ожидал, что квантовы...",-
1,Очень быстро разряжается. Просто полное разоча...,-
2,"Ну что тут сказать, телефон хороший, флагман т...",+
3,QPhone Pro MAX просто бомба! Быстрее любого см...,+
4,Слишком дорого для массового рынка – не каждый...,-


In [30]:
# Prepare test data
test_texts = test_df['Отзывы'].tolist()
test_labels = (test_df['разметка'] == '+').astype(int).tolist()

# Create test dataset
test_dataset = SentimentDataset(test_texts, test_labels, tokenizer)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=256)

In [61]:
# Load the best model from selected checkpoint
model = XLMRobertaForSequenceClassification.from_pretrained(
    'FacebookAI/xlm-roberta-large', num_labels=2,
)
# Configure LoRA parameters
lora_config = QuantumLoraConfig(
    r=4,
    lora_alpha=16,
    lora_dropout=0.1,
)
model = apply_quantum_lora_to_xlm_roberta_model(model, lora_config)
model.load_state_dict(torch.load('final-results/pytorch_model.bin', weights_only=True))
model.to(device)

pass

Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at FacebookAI/xlm-roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [62]:
# Set model to evaluation mode
model.eval()

# Initialize metrics
total_loss = 0
predictions = []
all_labels = []

# Evaluation loop
with torch.no_grad():
    for batch in test_dataloader:
        # Move batch to device
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device) 
        labels = batch['labels'].to(device)
        
        # Forward pass
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        logits = outputs.logits
        
        # Accumulate loss
        total_loss += loss.item()
        
        # Get predictions
        batch_preds = torch.argmax(logits, dim=1).cpu().numpy()
        predictions.extend(batch_preds)
        all_labels.extend(labels.cpu().numpy())

# Calculate metrics
avg_loss = total_loss / len(test_dataloader)
accuracy = (np.array(predictions) == np.array(all_labels)).mean()

print("\nTest Results:")
print(f"Test Accuracy: {accuracy:.4f}")
print(f"Test Loss: {avg_loss:.4f}")

print("\nDetailed Classification Report:")
print(classification_report(all_labels, predictions))

print("\nConfusion Matrix:")
print(confusion_matrix(all_labels, predictions))


Test Results:
Test Accuracy: 0.8400
Test Loss: 0.8472

Detailed Classification Report:
              precision    recall  f1-score   support

           0       0.95      0.74      0.83        27
           1       0.76      0.96      0.85        23

    accuracy                           0.84        50
   macro avg       0.86      0.85      0.84        50
weighted avg       0.86      0.84      0.84        50


Confusion Matrix:
[[20  7]
 [ 1 22]]
