In [1]:
%pip install transformers datasets

Note: you may need to restart the kernel to use updated packages.


In [10]:
# Importing libraries
from transformers import AutoModelForImageClassification, ConvNextImageProcessor, TrainingArguments, Trainer
from datasets import load_dataset
from torchvision.transforms import (Compose, Resize, CenterCrop, ToTensor, Normalize)
import torch
import numpy as np

In [11]:
# Loading the dataset
dataset = load_dataset("cifar10")

In [12]:
# Lets transform the dataset to torch tensors

transform = Compose([
    Resize(224),  # Resizing the image to 224x224 pixels as required by ResNet-50
    CenterCrop(224),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [13]:
# Lets preprocess the dataset

def preprocess_function(examples):
    images = [transform(image.convert("RGB")) for image in examples['img']]
    inputs = {'pixel_values': torch.stack(images)}
    inputs['labels'] = examples['label']
    return inputs

prepared_dataset = dataset.with_transform(preprocess_function)

In [17]:
# Lets split the dataset
train_dataset = prepared_dataset['train']
test_dataset = prepared_dataset['test']

In [18]:
# Loading the pre-trained ResNet-50 model along with the feature extractor
model = AutoModelForImageClassification.from_pretrained('microsoft/resnet-50', num_labels=10, ignore_mismatched_sizes=True)
feature_extractor = ConvNextImageProcessor.from_pretrained('microsoft/resnet-50')

Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([10]) in the model instantiated
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([10, 2048]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:
# Lets define the training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy='epoch',
    save_strategy='epoch',
    logging_dir='./logs',
    logging_steps=10,
)

In [20]:
# Lets define the compute_metrics function
def compute_metrics(p):
    preds = np.argmax(p.predictions, axis=1)
    return {'accuracy': (preds == p.label_ids).astype(np.float32).mean().item()}