In [2]:
# Importing libraries
from huggingface_hub import login, HfApi
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer, AutoProcessor, AutoFeatureExtractor
from datasets import load_dataset
from torchvision import transforms
import torch
import numpy as np
import streamlit as st
from PIL import Image
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("Using CPU")

Using GPU: NVIDIA L4


In [3]:
# Loading the dataset
dataset = load_dataset("cifar100")
print(dataset['train'][5])

{'img': <PIL.PngImagePlugin.PngImageFile image mode=RGB size=32x32 at 0x7FB252A21DB0>, 'fine_label': 86, 'coarse_label': 5}


In [4]:
# Lets preprocess the dataset

def preprocess_function(examples):
    transform = transforms.Compose([
        transforms.ToTensor(),  # Convert images to PyTorch tensors
    ])

    examples['pixel_values'] = [transform(img.convert("RGB")) for img in examples['img']]
    return examples

In [5]:
# Lets apply the preprocessing to the dataset

dataset = dataset.map(preprocess_function, batched=True)
dataset = dataset.rename_column('fine_label', 'labels')  # Rename the column
dataset.set_format(type='torch', columns=['pixel_values', 'labels'])

In [6]:
# Lets split the dataset
train_dataset = dataset['train']
test_dataset = dataset['test']

In [7]:
# Loading the pre-trained ResNet-50 model along with the feature extractor
from transformers import AutoModelForImageClassification

model = AutoModelForImageClassification.from_pretrained('microsoft/resnet-50', num_labels=100, ignore_mismatched_sizes=True)

model.to(device)

Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([100]) in the model instantiated
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([100, 2048]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ResNetForImageClassification(
  (resnet): ResNetModel(
    (embedder): ResNetEmbeddings(
      (embedder): ResNetConvLayer(
        (convolution): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activation): ReLU()
      )
      (pooler): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (encoder): ResNetEncoder(
      (stages): ModuleList(
        (0): ResNetStage(
          (layers): Sequential(
            (0): ResNetBottleNeckLayer(
              (shortcut): ResNetShortCut(
                (convolution): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (normalization): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (layer): Sequential(
                (0): ResNetConvLayer(
                  (convolution): Conv2d(64

In [8]:
# Lets define the training arguments
training_args = TrainingArguments(
    output_dir="./resnet50_finetuned_model",
    evaluation_strategy="epoch",
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
)



In [9]:
# Perform training
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

In [10]:
# Train the model
trainer.train()

Epoch,Training Loss,Validation Loss
1,4.0988,4.652433
2,3.5103,3.667621
3,3.051,3.157103
4,3.1203,2.985719
5,3.0278,3.12961


In [None]:
# Evaluate the model
trainer.evaluate()

In [None]:
# Save the model locally
trainer.save_model("./resnet50_finetuned_model_final")