In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import torch.backends.cudnn as cudnn
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import matplotlib.pyplot as plt
import time
import os
import copy
import random

import os
import random
from sklearn.model_selection import train_test_split
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset

In [4]:
def set_seed(seed):
    torch.manual_seed(seed)
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

set_seed(100)

In [5]:
print(torch.__version__)

2.3.1+cu118


In [8]:
# Data augmentation and normalization for training
# Just normalization for validation
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

data_dir = 'cat_dog_dataset'

# Create a full dataset and then split it
full_dataset = datasets.ImageFolder(data_dir, transform=data_transforms['train'])

# Splitting indices for train and test sets
train_indices, test_indices = train_test_split(list(range(len(full_dataset))), test_size=0.2, random_state=100)

# Creating train and test subsets
train_dataset = Subset(full_dataset, train_indices)
test_dataset = Subset(full_dataset, test_indices)

# Applying appropriate transformations
train_dataset.dataset.transform = data_transforms['train']
test_dataset.dataset.transform = data_transforms['test']

# Data loaders
dataloaders = {
    'train': DataLoader(train_dataset, batch_size=6, shuffle=True, num_workers=4),
    'test': DataLoader(test_dataset, batch_size=6, shuffle=False, num_workers=4)
}

dataset_sizes = {
    'train': len(train_dataset),
    'test': len(test_dataset)
}

class_names = full_dataset.classes

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("Is CUDA available:", torch.cuda.is_available())
print("CUDA version:", torch.version.cuda)
print("Number of GPUs:", torch.cuda.device_count())
print("GPU Name:", torch.cuda.get_device_name(0))
# Verify that the dataloaders are working correctly
print(f'Train dataset size: {dataset_sizes["train"]}')
print(f'Test dataset size: {dataset_sizes["test"]}')
print(f'Class names: {class_names}')

Is CUDA available: True
CUDA version: 11.8
Number of GPUs: 1
GPU Name: NVIDIA GeForce RTX 4070 Ti
Train dataset size: 19967
Test dataset size: 4992
Class names: ['cats', 'dogs']


In [9]:
dataloaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x258fe20f140>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x258fe20fc80>}

Model


In [10]:
# load dino model
dinov2_vits14 = torch.hub.load('facebookresearch/dinov2', 'dinov2_vits14')

Using cache found in C:\Users\Yaniv/.cache\torch\hub\facebookresearch_dinov2_main


In [11]:
class DinoVisionTransformerClassifier(nn.Module):
    def __init__(self):
        super(DinoVisionTransformerClassifier, self).__init__()
        self.transformer = dinov2_vits14
        self.classifier = nn.Sequential(
            nn.Linear(384, 256),
            nn.ReLU(),
            nn.Linear(256, 2)
        )
    
    def forward(self, x):
        x = self.transformer(x)
        x = self.transformer.norm(x)
        x = self.classifier(x)
        return x


In [12]:
import torch.optim as optim

model = DinoVisionTransformerClassifier()


model1 = models.resnet18(weights='IMAGENET1K_V1')
num_ftrs = model1.fc.in_features
model1.fc = nn.Linear(num_ftrs, 2)
model1 = model1.to(device)


criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
optimizer = optim.Adam(model.parameters(), lr=0.000001)

In [13]:
len(dataloaders["train"])

3328

In [14]:
model = model.to(device)

Train

In [17]:
for epoch in range(6):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(dataloaders["train"], 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs.to(device))
        loss = criterion(outputs, labels.to(device))
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 50 == 49:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 50:.3f}')
            running_loss = 0.0

print('Finished Training')

[1,    50] loss: 0.007
[1,   100] loss: 0.004
[1,   150] loss: 0.003
[1,   200] loss: 0.011
[1,   250] loss: 0.001
[1,   300] loss: 0.007
[1,   350] loss: 0.006
[1,   400] loss: 0.004
[1,   450] loss: 0.002
[1,   500] loss: 0.000
[1,   550] loss: 0.001
[1,   600] loss: 0.001
[1,   650] loss: 0.008
[1,   700] loss: 0.004
[1,   750] loss: 0.001
[1,   800] loss: 0.004
[1,   850] loss: 0.000
[1,   900] loss: 0.001
[1,   950] loss: 0.001
[1,  1000] loss: 0.001
[1,  1050] loss: 0.000
[1,  1100] loss: 0.001
[1,  1150] loss: 0.001
[1,  1200] loss: 0.009
[1,  1250] loss: 0.016
[1,  1300] loss: 0.001
[1,  1350] loss: 0.018
[1,  1400] loss: 0.003
[1,  1450] loss: 0.029
[1,  1500] loss: 0.001
[1,  1550] loss: 0.006
[1,  1600] loss: 0.002
[1,  1650] loss: 0.001
[1,  1700] loss: 0.002
[1,  1750] loss: 0.001
[1,  1800] loss: 0.001
[1,  1850] loss: 0.008
[1,  1900] loss: 0.006
[1,  1950] loss: 0.003
[1,  2000] loss: 0.002
[1,  2050] loss: 0.002
[1,  2100] loss: 0.000
[1,  2150] loss: 0.000
[1,  2200] 

Testing

In [18]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in dataloaders["test"]:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model(images.to(device))
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted.to("cpu") == labels).sum().item()

print(f'Accuracy of the network on the {len(dataloaders["test"])*6} test images: {100 * correct // total} %')

Accuracy of the network on the 4992 test images: 99 %


Resnet classification

In [19]:
model1 = model1.to(device)

for epoch in range(6):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(dataloaders["train"], 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model1(inputs.to(device))
        loss = criterion(outputs, labels.to(device))
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 50 == 49:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 50:.3f}')
            running_loss = 0.0

print('Finished Training')

[1,    50] loss: 0.802
[1,   100] loss: 0.790
[1,   150] loss: 0.798
[1,   200] loss: 0.782
[1,   250] loss: 0.788
[1,   300] loss: 0.830
[1,   350] loss: 0.822
[1,   400] loss: 0.780
[1,   450] loss: 0.783
[1,   500] loss: 0.797
[1,   550] loss: 0.786
[1,   600] loss: 0.813
[1,   650] loss: 0.787
[1,   700] loss: 0.754
[1,   750] loss: 0.818
[1,   800] loss: 0.818
[1,   850] loss: 0.798
[1,   900] loss: 0.815
[1,   950] loss: 0.809
[1,  1000] loss: 0.811
[1,  1050] loss: 0.795
[1,  1100] loss: 0.770
[1,  1150] loss: 0.771
[1,  1200] loss: 0.810
[1,  1250] loss: 0.785
[1,  1300] loss: 0.793
[1,  1350] loss: 0.825
[1,  1400] loss: 0.808
[1,  1450] loss: 0.775
[1,  1500] loss: 0.822
[1,  1550] loss: 0.798
[1,  1600] loss: 0.799
[1,  1650] loss: 0.790
[1,  1700] loss: 0.795
[1,  1750] loss: 0.793
[1,  1800] loss: 0.797
[1,  1850] loss: 0.789
[1,  1900] loss: 0.796
[1,  1950] loss: 0.813
[1,  2000] loss: 0.822
[1,  2050] loss: 0.805
[1,  2100] loss: 0.775
[1,  2150] loss: 0.786
[1,  2200] 

In [20]:
correct = 0
total = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in dataloaders["test"]:
        images, labels = data
        # calculate outputs by running images through the network
        outputs = model1(images.to(device))
        # the class with the highest energy is what we choose as prediction
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted.to("cpu") == labels).sum().item()

print(f'Accuracy of the network on the {len(dataloaders["test"])*6} test images: {100 * correct // total} %')

Accuracy of the network on the 4992 test images: 39 %


In [21]:
correct

1962

In [22]:
total

4992