In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
pip install transformers

In [None]:
pip install SentencePiece

In [None]:
import torch
import torch.optim as optim
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
from transformers import AlbertTokenizer, AlbertForSequenceClassification  
from sklearn.model_selection import train_test_split
import numpy as np

# Load data from .npy file
data = np.load("/content/drive/MyDrive/BrainTeaser/word_puzzle.npy", allow_pickle=True)[()]
questions = [entry['question'] for entry in data]
options = [entry['choice_list'] for entry in data]
correct_indices = [entry['label'] for entry in data]

# Tokenize and preprocess the data using ALBERT tokenizer
tokenizer = AlbertTokenizer.from_pretrained("albert-xxlarge-v2")

# Split the data into training and validation sets (80% train, 20% validation)
train_questions, val_questions, train_options, val_options, train_correct_indices, val_correct_indices = train_test_split(questions, options, correct_indices, test_size=0.2)

# Tokenize your data
train_encodings = tokenizer(train_questions, train_options, truncation=True, padding=True, return_tensors='pt', max_length=64)
val_encodings = tokenizer(val_questions, val_options, truncation=True, padding=True, return_tensors='pt', max_length=64)

# Create input IDs and attention masks
train_input_ids = train_encodings['input_ids']
train_attention_mask = train_encodings['attention_mask']

val_input_ids = val_encodings['input_ids']
val_attention_mask = val_encodings['attention_mask']

# Create labels tensor
train_labels = torch.tensor(train_correct_indices)
val_labels = torch.tensor(val_correct_indices)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Create TensorDatasets
train_dataset = TensorDataset(train_input_ids, train_attention_mask, train_labels)
val_dataset = TensorDataset(val_input_ids, val_attention_mask, torch.tensor(val_correct_indices))

# Define a data loader
batch_size = 16  
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)

# Define your ALBERT model
model = AlbertForSequenceClassification.from_pretrained("albert-xxlarge-v2", num_labels=4)

# Define your loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.AdamW(model.parameters(), lr=1e-4)

num_epochs = 1
model.to(device)

# Compile the model
model.train()

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, batch in enumerate(train_loader, 0):
        inputs, attention_mask, labels = batch
        inputs = inputs.to(device)
        attention_mask = attention_mask.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = model(input_ids=inputs, attention_mask=attention_mask, labels=labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch + 1}, Loss: {running_loss / (i + 1)}")

print("Finished Training")


In [None]:
# Set the model to evaluation mode
model.eval()

# Variables to keep track of the number of correct predictions and total examples
correct_predictions = 0
total_examples = 0

# Iterate through the validation data
for batch in val_loader:
    inputs, attention_mask, labels = batch
    inputs = inputs.to(device)
    attention_mask = attention_mask.to(device)
    labels = labels.to(device)

    # Forward pass to get predictions
    with torch.no_grad():
        outputs = model(input_ids=inputs, attention_mask=attention_mask)

    # Get predicted labels
    predicted_labels = torch.argmax(outputs.logits, dim=1)

    # Calculate the number of correct predictions in this batch
    correct_predictions += (predicted_labels == labels).sum().item()
    total_examples += labels.numel()

# Calculate the accuracy
accuracy = correct_predictions / total_examples
print(f"Validation Accuracy: {accuracy * 100:.2f}%")
