In [2]:
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup, RobertaTokenizer, RobertaModel
import pandas as pd
from sklearn.metrics import f1_score
import torch.nn as nn

# Define a custom dataset
class SentimentDataset(Dataset):
    def __init__(self, file_path, tokenizer, max_len):
        self.data = pd.read_csv(file_path)
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        text = str(self.data.iloc[index, 0])
        label = self.data.iloc[index, 1]
        # Convert labels to integers (0: neutral, 1: positive, 2: negative)
        if label == 'positive':
            label = 1
        elif label == 'negative':
            label = 2
        else:
            label = 0

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_token_type_ids=True
        )

        return {
            'input_ids': torch.tensor(inputs['input_ids'], dtype=torch.long),
            'attention_mask': torch.tensor(inputs['attention_mask'], dtype=torch.long),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Define custom models
class CustomBERTModel(nn.Module):
    def __init__(self, model_name, num_labels):
        super(CustomBERTModel, self).__init__()
        self.bert = BertModel.from_pretrained(model_name)
        self.cnn = nn.Conv1d(in_channels=768, out_channels=768, kernel_size=3, padding=1)
        self.fc = nn.Linear(768, num_labels)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = outputs.last_hidden_state
        cnn_output = self.cnn(sequence_output.permute(0, 2, 1)).max(dim=2)[0]
        logits = self.fc(cnn_output)
        return logits

class CustomRoBERTaModel(nn.Module):
    def __init__(self, model_name, num_labels):
        super(CustomRoBERTaModel, self).__init__()
        self.roberta = RobertaModel.from_pretrained(model_name)
        self.cnn = nn.Conv1d(in_channels=768, out_channels=768, kernel_size=3, padding=1)
        self.fc = nn.Linear(768, num_labels)

    def forward(self, input_ids, attention_mask):
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = outputs.last_hidden_state
        cnn_output = self.cnn(sequence_output.permute(0, 2, 1)).max(dim=2)[0]
        logits = self.fc(cnn_output)
        return logits

# Load data
tokenizer_bert = BertTokenizer.from_pretrained('bert-base-german-dbmdz-uncased')
tokenizer_roberta = RobertaTokenizer.from_pretrained('roberta-base')

train_dataset = SentimentDataset('/content/drive/MyDrive/sentiment-train.csv', tokenizer_bert, max_len=128)
test_dataset = SentimentDataset('/content/drive/MyDrive/sentiment-test.csv', tokenizer_bert, max_len=128)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Initialize models
model_bert = CustomBERTModel('bert-base-german-dbmdz-uncased', num_labels=3)
model_roberta = CustomRoBERTaModel('roberta-base', num_labels=3)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_bert.to(device)
model_roberta.to(device)

# Training function
def train(models, train_loader, test_loader, epochs=3, accumulation_steps=4):
    optimizers = [torch.optim.AdamW(model.parameters(), lr=2e-5) for model in models]
    total_steps = len(train_loader) * epochs
    schedulers = [get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps) for optimizer in optimizers]

    for epoch in range(epochs):
        for model in models:
            model.train()

        for i, batch in enumerate(train_loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            logits_list = []
            loss_list = []

            for model, optimizer in zip(models, optimizers):
                optimizer.zero_grad()
                logits = model(input_ids=input_ids, attention_mask=attention_mask)
                logits_list.append(logits)
                loss_fct = nn.CrossEntropyLoss()
                loss = loss_fct(logits, labels)
                loss_list.append(loss)
                loss.backward()

            if (i + 1) % accumulation_steps == 0:
                for optimizer in optimizers:
                    optimizer.step()
                for scheduler in schedulers:
                    scheduler.step()

                # Reset gradients
                for model in models:
                    model.zero_grad()

        print(f"Epoch {epoch + 1}/{epochs} completed.")
        evaluate(models, test_loader)

def evaluate(models, test_loader):
    for model in models:
        model.eval()
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            logits_list = []
            for model in models:
                logits = model(input_ids=input_ids, attention_mask=attention_mask)
                logits_list.append(logits)

            # Average predictions from all models
            avg_logits = torch.mean(torch.stack(logits_list), dim=0)
            _, predicted = torch.max(avg_logits, dim=1)

            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    # Calculate micro F1 score
    f1 = f1_score(all_labels, all_predictions, average='micro')
    print(f"Micro F1 Score: {f1:.4f}")

# Train the models
train([model_bert, model_roberta], train_loader, test_loader, epochs=3, accumulation_steps=4)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/247k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/487k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/433 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/442M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3 completed.
Micro F1 Score: 0.7406
Epoch 2/3 completed.
Micro F1 Score: 0.7279
Epoch 3/3 completed.
Micro F1 Score: 0.7767


In [11]:
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup, RobertaTokenizer, RobertaModel
import pandas as pd
from sklearn.metrics import f1_score
import torch.nn as nn

# Define a custom dataset
class SentimentDataset(Dataset):
    def __init__(self, file_path, tokenizer, max_len):
        self.data = pd.read_csv(file_path)
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        text = str(self.data.iloc[index, 0])
        label = self.data.iloc[index, 1]
        if label == 'positive':
            label = 1
        elif label == 'negative':
            label = 2
        else:
            label = 0

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_token_type_ids=True
        )

        return {
            'input_ids': torch.tensor(inputs['input_ids'], dtype=torch.long),
            'attention_mask': torch.tensor(inputs['attention_mask'], dtype=torch.long),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Define custom models
class SimpleBERTModel(nn.Module):
    def __init__(self, model_name, num_labels):
        super(SimpleBERTModel, self).__init__()
        self.bert = BertModel.from_pretrained(model_name)
        self.fc = nn.Linear(768, num_labels)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = outputs.pooler_output
        logits = self.fc(sequence_output)
        return logits

class SimpleRoBERTaModel(nn.Module):
    def __init__(self, model_name, num_labels):
        super(SimpleRoBERTaModel, self).__init__()
        self.roberta = RobertaModel.from_pretrained(model_name)
        self.fc = nn.Linear(768, num_labels)

    def forward(self, input_ids, attention_mask):
        outputs = self.roberta(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = outputs.pooler_output
        logits = self.fc(sequence_output)
        return logits

# Load data
tokenizer_bert = BertTokenizer.from_pretrained('bert-base-german-dbmdz-uncased')
tokenizer_roberta = RobertaTokenizer.from_pretrained('roberta-base')

train_dataset = SentimentDataset('/content/drive/MyDrive/sentiment-train.csv', tokenizer_bert, max_len=128)
test_dataset = SentimentDataset('/content/drive/MyDrive/sentiment-test.csv', tokenizer_bert, max_len=128)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Initialize models
model_bert = SimpleBERTModel('bert-base-german-dbmdz-uncased', num_labels=3)
model_roberta = SimpleRoBERTaModel('roberta-base', num_labels=3)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_bert.to(device)
model_roberta.to(device)

# Training function
def train(models, train_loader, test_loader, epochs=3):
    optimizers = [AdamW(model.parameters(), lr=2e-5) for model in models]
    total_steps = len(train_loader) * epochs
    schedulers = [get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps) for optimizer in optimizers]

    for epoch in range(epochs):
        for model in models:
            model.train()

        for i, batch in enumerate(train_loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            logits_list = []
            loss_list = []

            for model, optimizer in zip(models, optimizers):
                optimizer.zero_grad()
                logits = model(input_ids=input_ids, attention_mask=attention_mask)
                logits_list.append(logits)
                loss_fct = nn.CrossEntropyLoss()
                loss = loss_fct(logits, labels)
                loss_list.append(loss)
                loss.backward()
                optimizer.step()
            for scheduler in schedulers:
                scheduler.step()

        print(f"Epoch {epoch + 1}/{epochs} completed.")
        evaluate(models, test_loader)

def evaluate(models, test_loader):
    for model in models:
        model.eval()
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            logits_list = []
            for model in models:
                logits = model(input_ids=input_ids, attention_mask=attention_mask)
                logits_list.append(logits)

            # Average predictions from all models
            avg_logits = torch.mean(torch.stack(logits_list), dim=0)
            _, predicted = torch.max(avg_logits, dim=1)

            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    # Calculate micro F1 score
    f1 = f1_score(all_labels, all_predictions, average='micro')
    print(f"Micro F1 Score: {f1:.4f}")

# Train the models
train([model_bert, model_roberta], train_loader, test_loader, epochs=3)


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-base and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 1/3 completed.
Micro F1 Score: 0.7761
Epoch 2/3 completed.
Micro F1 Score: 0.7941
Epoch 3/3 completed.
Micro F1 Score: 0.7914


In [12]:
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup
import pandas as pd
from sklearn.metrics import f1_score
import torch.nn as nn

# Define a custom dataset
class SentimentDataset(Dataset):
    def __init__(self, file_path, tokenizer, max_len):
        self.data = pd.read_csv(file_path)
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        text = str(self.data.iloc[index, 0])
        label = self.data.iloc[index, 1]
        # Convert labels to integers (0: neutral, 1: positive, 2: negative)
        if label == 'positive':
            label = 1
        elif label == 'negative':
            label = 2
        else:
            label = 0

        inputs = self.tokenizer.encode_plus(
            text,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_token_type_ids=True
        )

        return {
            'input_ids': torch.tensor(inputs['input_ids'], dtype=torch.long),
            'attention_mask': torch.tensor(inputs['attention_mask'], dtype=torch.long),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Define custom models
class CustomBERTModel(nn.Module):
    def __init__(self, model_name, num_labels):
        super(CustomBERTModel, self).__init__()
        self.bert = BertModel.from_pretrained(model_name)
        self.cnn = nn.Conv1d(in_channels=768, out_channels=768, kernel_size=3, padding=1)
        self.fc = nn.Linear(768, num_labels)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        sequence_output = outputs.last_hidden_state
        cnn_output = self.cnn(sequence_output.permute(0, 2, 1)).max(dim=2)[0]
        logits = self.fc(cnn_output)
        return logits

# Load data
tokenizer_bert = BertTokenizer.from_pretrained('bert-base-uncased')

train_dataset = SentimentDataset('/content/drive/MyDrive/sentiment-train.csv', tokenizer_bert, max_len=128)
test_dataset = SentimentDataset('/content/drive/MyDrive/sentiment-test.csv', tokenizer_bert, max_len=128)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

# Initialize models
model_bert = CustomBERTModel('bert-base-uncased', num_labels=3)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_bert.to(device)

# Training function
def train(models, train_loader, test_loader, epochs=3, accumulation_steps=4):
    optimizers = [torch.optim.AdamW(model.parameters(), lr=2e-5) for model in models]
    total_steps = len(train_loader) * epochs
    schedulers = [get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps) for optimizer in optimizers]

    for epoch in range(epochs):
        for model in models:
            model.train()

        for i, batch in enumerate(train_loader):
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            logits_list = []
            loss_list = []

            for model, optimizer in zip(models, optimizers):
                optimizer.zero_grad()
                logits = model(input_ids=input_ids, attention_mask=attention_mask)
                logits_list.append(logits)
                loss_fct = nn.CrossEntropyLoss()
                loss = loss_fct(logits, labels)
                loss_list.append(loss)
                loss.backward()

            if (i + 1) % accumulation_steps == 0:
                for optimizer in optimizers:
                    optimizer.step()
                for scheduler in schedulers:
                    scheduler.step()

                # Reset gradients
                for model in models:
                    model.zero_grad()

        print(f"Epoch {epoch + 1}/{epochs} completed.")
        evaluate(models, test_loader)

def evaluate(models, test_loader):
    for model in models:
        model.eval()
    all_labels = []
    all_predictions = []

    with torch.no_grad():
        for batch in test_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            logits_list = []
            for model in models:
                logits = model(input_ids=input_ids, attention_mask=attention_mask)
                logits_list.append(logits)

            # Average predictions from all models
            avg_logits = torch.mean(torch.stack(logits_list), dim=0)
            _, predicted = torch.max(avg_logits, dim=1)

            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(predicted.cpu().numpy())

    # Calculate micro F1 score
    f1 = f1_score(all_labels, all_predictions, average='micro')
    print(f"Micro F1 Score: {f1:.4f}")

# Train the models
train([model_bert], train_loader, test_loader, epochs=3, accumulation_steps=4)


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Epoch 1/3 completed.
Micro F1 Score: 0.7112
Epoch 2/3 completed.
Micro F1 Score: 0.7146
Epoch 3/3 completed.
Micro F1 Score: 0.7119


In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
