In [1]:
import torch
import evaluate
import numpy as np

from data import GODData
from transformers import Trainer, TrainingArguments, VideoMAEConfig, VideoMAEForVideoClassification

In [2]:
# load data
print("Loading data...")
train_dataset = GODData(
    subject="01", 
    session_id="01", 
    task="perception", 
    train=True, 
    limit_size=200,
)
eval_dataset = GODData(
    subject="01", 
    session_id="01", 
    task="perception", 
    train=False, 
    limit_size=50,
)

print(f"# train: {len(train_dataset):>5}\n# test: {len(eval_dataset):>5}")

Loading data...
# train:   200
# test:    50


In [3]:
# instantiate model
print("Instantiating model...")
config = VideoMAEConfig(
    image_size=64,
    num_channels=3,
    num_frames=50,
    num_labels=150,
    problem_type="single_label_classification",
)

model = VideoMAEForVideoClassification(config)

Instantiating model...


In [4]:
# data collation
def data_collator(datapoints):
    batch = {}
    batch["pixel_values"] = torch.stack([dp[0].permute(1, 0, 2, 3) for dp in datapoints])
    batch["labels"] = torch.stack([dp[1] for dp in datapoints])
    return batch

In [5]:
# log metrics
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

In [6]:
# instantiate trainer
print("Instantiating trainer...")
training_args = TrainingArguments(
    output_dir="test_trainer", 
    evaluation_strategy="epoch",
    num_train_epochs=10,
    per_device_train_batch_size=16,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
)

Instantiating trainer...


In [7]:
print("Training...")
trainer.train()

***** Running training *****
  Num examples = 200
  Num Epochs = 10
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 16
  Gradient Accumulation steps = 1
  Total optimization steps = 130
  Number of trainable parameters = 86342550
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Training...


Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mv15hv4[0m. Use [1m`wandb login --relogin`[0m to force relogin


OutOfMemoryError: CUDA out of memory. Tried to allocate 118.00 MiB (GPU 0; 10.76 GiB total capacity; 4.17 GiB already allocated; 118.56 MiB free; 4.35 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF