In [None]:
%pip install transformers datasets
%pip install accelerate==0.27.2
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

In [21]:
# Importing libraries
from huggingface_hub import login, HfApi
from transformers import AutoModelForImageClassification, TrainingArguments, Trainer, AutoProcessor, AutoFeatureExtractor
from datasets import load_dataset
from torchvision import transforms
import torch
import numpy as np
import gradio as gr
from PIL import Image
if torch.cuda.is_available():
    device = torch.device("cuda")
    print(f"Using GPU: {torch.cuda.get_device_name(0)}")
else:
    device = torch.device("cpu")
    print("Using CPU")

Using GPU: NVIDIA GeForce RTX 3050 Laptop GPU


In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [23]:
# Loading the dataset
dataset = load_dataset("cifar10")

In [24]:
# Lets preprocess the dataset

def preprocess_function(examples):
    transform = transforms.Compose([
        transforms.ToTensor(),  # Convert images to PyTorch tensors
    ])

    examples['pixel_values'] = [transform(img.convert("RGB")) for img in examples['img']]
    return examples

In [25]:
# Lets apply the preprocessing to the dataset

dataset = dataset.map(preprocess_function, batched=True)

dataset.set_format(type='torch', columns=['pixel_values', 'label'])

In [26]:
# Lets split the dataset
train_dataset = dataset['train']
test_dataset = dataset['test']

In [27]:
# Loading the pre-trained ResNet-50 model along with the feature extractor
model = AutoModelForImageClassification.from_pretrained('microsoft/resnet-50', num_labels=10, ignore_mismatched_sizes=True)
model.to(device)

Some weights of ResNetForImageClassification were not initialized from the model checkpoint at microsoft/resnet-50 and are newly initialized because the shapes did not match:
- classifier.1.bias: found shape torch.Size([1000]) in the checkpoint and torch.Size([10]) in the model instantiated
- classifier.1.weight: found shape torch.Size([1000, 2048]) in the checkpoint and torch.Size([10, 2048]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


ResNetForImageClassification(
  (resnet): ResNetModel(
    (embedder): ResNetEmbeddings(
      (embedder): ResNetConvLayer(
        (convolution): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
        (normalization): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (activation): ReLU()
      )
      (pooler): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    )
    (encoder): ResNetEncoder(
      (stages): ModuleList(
        (0): ResNetStage(
          (layers): Sequential(
            (0): ResNetBottleNeckLayer(
              (shortcut): ResNetShortCut(
                (convolution): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
                (normalization): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
              )
              (layer): Sequential(
                (0): ResNetConvLayer(
                  (convolution): Conv2d(64

In [None]:
# Lets define the training arguments
training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    per_device_train_batch_size=32,
    per_device_eval_batch_size=32,
    num_train_epochs=5,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    save_total_limit=2,
)

In [None]:
# Lets define the compute_metrics function
def compute_metrics(p):
    pred, labels = p
    pred = np.argmax(pred, axis=1)
    accuracy = (pred == labels).mean()
    return {"accuracy": accuracy}

In [None]:
# Perform training
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

In [None]:
# Train the model
trainer.train()

In [None]:
# Evaluate the model
trainer.evaluate()

In [None]:
# Lets login to the Hugging Face account
login()

In [None]:
# Lets save the model to the Hugging Face Hub
model_path = "./results/checkpoint-7500"
model = AutoModelForImageClassification.from_pretrained(model_path)

In [40]:
# Load the pre-trained model from Hugging Face
model_name = "youzaina001/cifar10-resnet50"
model = AutoModelForImageClassification.from_pretrained(model_name).to(device)
  # Replace with the appropriate processor if available

# Manually define preprocessing for Gradio
def manual_preprocess(image):
    transform = transforms.Compose([
        transforms.Resize((32, 32)),
        transforms.ToTensor(),
    ])
    return transform(image).unsqueeze(0).to(device)

In [41]:
# Lets build a Gradio interface for the model
def classify_image(image):
    
    # Preprocess the image
    image = image.convert("RGB")
    inputs = manual_preprocess(image)
    inputs = {"pixel_values": inputs}

    # Predict the class of the image
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits

    # Get the predicted class
    predicted_class_idx = logits.argmax(-1).item()
    predicted_class = model.config.id2label[predicted_class_idx]
    return predicted_class

In [42]:
# Lets create a Gradio interface

gface = gr.Interface(
    fn=classify_image,
    inputs=gr.components.Image(type="pil", label="Input Image"),
    outputs=gr.components.Label(num_top_classes=3),
    title="ResNet-50-CIFAR-10 Image Classifier",
    description="A pre-trained ResNet-50 model fine-tuned on the CIFAR-10 dataset.",
)

In [43]:
gface.launch()

Running on local URL:  http://127.0.0.1:7865

To create a public link, set `share=True` in `launch()`.




In [None]:
# Lets upload the model to the Hugging Face Hub
api = HfApi()
api.create_repo(repo_id="youzaina001/cifar10-resnet50")
model.push_to_hub("youzaina001/cifar10-resnet50")