In [None]:
# Imports
import torch
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
from transformers import ViTForImageClassification, AutoImageProcessor
from datasets import load_dataset
from sklearn.metrics import accuracy_score, classification_report
from tqdm import tqdm

# Device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# 1️⃣ Load MELD dataset from Hugging Face
dataset = load_dataset("felixyue/meld")

# Check sample
print(dataset['train'][0])

# 2️⃣ Feature extractor for ViT
feature_extractor = AutoImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')

# 3️⃣ Image transform function
transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=feature_extractor.image_mean, std=feature_extractor.image_std)
])

def preprocess_images(example):
    # MELD dataset provides 'img_path' column with image file paths
    image = Image.open(example['img_path']).convert('RGB')
    example['pixel_values'] = transform(image)
    return example

# Apply transforms
train_dataset = dataset['train'].map(preprocess_images)
val_dataset   = dataset['validation'].map(preprocess_images)
test_dataset  = dataset['test'].map(preprocess_images)

# Set format for PyTorch
columns = ['pixel_values', 'emotion']
train_dataset.set_format(type='torch', columns=columns)
val_dataset.set_format(type='torch', columns=columns)
test_dataset.set_format(type='torch', columns=columns)

# 4️⃣ DataLoaders
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=16)
test_loader  = DataLoader(test_dataset, batch_size=16)

# 5️⃣ Load pretrained ViT
num_labels = 7  # MELD has 7 emotions
model = ViTForImageClassification.from_pretrained(
    'google/vit-base-patch16-224-in21k',
    num_labels=num_labels
)
model.to(device)

# 6️⃣ Optimizer
from transformers import AdamW
optimizer = AdamW(model.parameters(), lr=2e-5)

# 7️⃣ Training loop
epochs = 3
for epoch in range(epochs):
    model.train()
    train_loss = 0
    for batch in tqdm(train_loader, desc=f"Training Epoch {epoch+1}"):
        pixel_values = batch['pixel_values'].to(device)
        labels = batch['emotion'].to(device)
        
        outputs = model(pixel_values=pixel_values, labels=labels)
        loss = outputs.loss
        train_loss += loss.item()
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    print(f"Epoch {epoch+1} - Avg Loss: {train_loss/len(train_loader):.4f}")
    
    # Validation
    model.eval()
    preds, true_labels = [], []
    with torch.no_grad():
        for batch in val_loader:
            pixel_values = batch['pixel_values'].to(device)
            labels = batch['emotion'].to(device)
            
            outputs = model(pixel_values=pixel_values)
            logits = outputs.logits
            preds.extend(torch.argmax(logits, axis=1).cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    
    print("Validation Accuracy:", accuracy_score(true_labels, preds))
    print(classification_report(true_labels, preds))

# 8️⃣ Test evaluation
model.eval()
preds, true_labels = [], []
with torch.no_grad():
    for batch in test_loader:
        pixel_values = batch['pixel_values'].to(device)
        labels = batch['emotion'].to(device)
        
        outputs = model(pixel_values=pixel_values)
        logits = outputs.logits
        preds.extend(torch.argmax(logits, axis=1).cpu().numpy())
        true_labels.extend(labels.cpu().numpy())

print("Test Accuracy:", accuracy_score(true_labels, preds))
print(classification_report(true_labels, preds))

Using device: cpu


DatasetNotFoundError: Dataset 'felixyue/meld' doesn't exist on the Hub or cannot be accessed.

: 