In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import AdamW
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms, datasets
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
import pandas as pd
import numpy as np
from PIL import Image
from sklearn.metrics import precision_recall_fscore_support ,accuracy_score

2024-07-31 15:32:56.296685: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-31 15:32:56.296821: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-31 15:32:56.435171: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
# Load and preprocess the dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

# Load the datasets
train_dataset = datasets.ImageFolder(root='/kaggle/input/ferplus/train', transform=transform)
test_dataset = datasets.ImageFolder(root='/kaggle/input/ferplus/test', transform=transform)
val_dataset = datasets.ImageFolder(root='/kaggle/input/ferplus/validation', transform=transform)

In [3]:
class CustomImageDataset(Dataset):
    def __init__(self, image_folder_dataset):
        self.image_folder_dataset = image_folder_dataset

    def __len__(self):
        return len(self.image_folder_dataset)

    def __getitem__(self, idx):
        img, label = self.image_folder_dataset[idx]
        return {"pixel_values": img, "labels": label}

# Create custom datasets
train_dataset = CustomImageDataset(train_dataset)
test_dataset = CustomImageDataset(train_dataset)
val_dataset = CustomImageDataset(val_dataset)

In [4]:
# Load the Vision Transformer model
model = AutoModelForImageClassification.from_pretrained(
    'microsoft/beit-base-patch16-224-pt22k-ft22k',
    num_labels=8,
#     output_attentions=False,
#     output_hidden_states=False,
#     attention_probs_dropout_prob=0.5,
#     hidden_dropout_prob=0.5,
    force_download=True,
    ignore_mismatched_sizes=True,
)
USE_CPU = False
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)
model.to(device)

config.json:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/414M [00:00<?, ?B/s]

  return self.fget.__get__(instance, owner)()
Some weights of BeitForImageClassification were not initialized from the model checkpoint at microsoft/beit-base-patch16-224-pt22k-ft22k and are newly initialized because the shapes did not match:
- classifier.weight: found shape torch.Size([21841, 768]) in the checkpoint and torch.Size([8, 768]) in the model instantiated
- classifier.bias: found shape torch.Size([21841]) in the checkpoint and torch.Size([8]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cuda


BeitForImageClassification(
  (beit): BeitModel(
    (embeddings): BeitEmbeddings(
      (patch_embeddings): BeitPatchEmbeddings(
        (projection): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
      )
      (dropout): Dropout(p=0.0, inplace=False)
    )
    (encoder): BeitEncoder(
      (layer): ModuleList(
        (0): BeitLayer(
          (attention): BeitAttention(
            (attention): BeitSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=False)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
              (relative_position_bias): BeitRelativePositionBias()
            )
            (output): BeitSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.0, inplace=False)
            )
          )
          (int

In [5]:
# Define training arguments
training_args = TrainingArguments(
    output_dir='model/results',
    num_train_epochs=8,
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    eval_strategy="epoch",  # Changed to use epochs
    save_strategy="epoch",        # Changed to use epochs
    logging_dir='model/logs',
    learning_rate=1e-5,
    weight_decay=0.01,
    logging_steps=10,            # You might want to keep this or adjust it
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    save_safetensors=False,
    report_to=[],
)

# Define a function to compute metrics
def compute_metrics(p):
    pred, labels = p
    pred = np.argmax(pred, axis=1)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, pred, average='macro')
    acc = accuracy_score(labels, pred)
    return {
        'accuracy': acc,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Initialize the Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics,
)

In [6]:
# Train the model
trainer.train()

# Evaluate the model
eval_result = trainer.evaluate()



Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.5153,0.693082,0.758782,0.758104,0.765521,0.756478
2,0.3539,0.783536,0.766695,0.760829,0.775245,0.761995
3,0.2483,0.9262,0.755545,0.747871,0.773309,0.750751
4,0.1661,0.912758,0.759621,0.751771,0.768044,0.754048
5,0.1593,1.013309,0.756624,0.750079,0.776365,0.752671
6,0.115,1.084404,0.758782,0.752469,0.776772,0.754794
7,0.1069,1.173588,0.753747,0.745408,0.776919,0.748177
8,0.0413,1.174948,0.753387,0.745587,0.774152,0.748628


