In [1]:
# Imports here
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms 

from PIL import Image
import json
from matplotlib.ticker import FormatStrFormatter



In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


# **Steps**

Step 1: Load Dataset <p>
Step 2: Transform the Dataset <p>
Step 3: Create Model <p>
Step 4: Train Model <p>
Step 5: Save the Model <p>
Step 6: Load the Model <p>


# Step 1: Load Dataset



In [3]:
data_dir = './dataset'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'
test_dir = data_dir + '/test'

In [4]:
with open('cat_to_name.json', 'r') as f:
    cat_to_name = json.load(f)

# Step 2: Transform the Dataset

The pre-trained networks you'll use were trained on the ImageNet dataset where each color channel was normalized separately. For all three sets you'll need to normalize the means and standard deviations of the images to what the network expects. For the means, it's [0.485, 0.456, 0.406] and for the standard deviations [0.229, 0.224, 0.225], calculated from the ImageNet images. These values will shift each color channel to be centered at 0 and range from -1 to 1.


In [5]:
# Define your transforms for the training, validation, and testing sets
train_transforms = transforms.Compose([transforms.RandomRotation(30),
                                       transforms.RandomResizedCrop(224),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406], 
                                                            [0.229, 0.224, 0.225])])

test_transforms = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], 
                                                           [0.229, 0.224, 0.225])])

validation_transforms = transforms.Compose([transforms.Resize(256),
                                            transforms.CenterCrop(224),
                                            transforms.ToTensor(),
                                            transforms.Normalize([0.485, 0.456, 0.406], 
                                                                 [0.229, 0.224, 0.225])])


trainset = torchvision.datasets.ImageFolder(root=train_dir, transform=train_transforms)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=0)

testset = torchvision.datasets.ImageFolder(root=valid_dir, transform=test_transforms)
validloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=0)

# Step 3: Create Model



In [6]:
import torch.nn as nn
import torchvision

import torch.nn as nn
import torchvision

class DeformableCNNModel(nn.Module):
    def __init__(self):
        super(DeformableCNNModel, self).__init__()

        # Convolutional weights
        self.weight1 = nn.Parameter(torch.Tensor(32, 3, 3, 3))
        self.weight2 = nn.Parameter(torch.Tensor(64, 32, 3, 3))
        self.weight3 = nn.Parameter(torch.Tensor(128, 64, 3, 3))
        
        # Offset (and mask if required) for deformable convolution
        self.offsets1 = nn.Conv2d(3, 2*3*3, kernel_size=3, padding=1, stride=1)
        self.offsets2 = nn.Conv2d(32, 2*3*3, kernel_size=3, padding=1, stride=1)
        self.offsets3 = nn.Conv2d(64, 2*3*3, kernel_size=3, padding=1, stride=1)
        
        # Rest of the model remains similar
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 56 * 56, 512)
        self.fc2 = nn.Linear(512, 102)
        self.dropout = nn.Dropout(0.2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = torchvision.ops.deform_conv2d(x, self.offsets1(x), self.weight1, padding=1)
        x = self.relu(x)
        
        x = torchvision.ops.deform_conv2d(x, self.offsets2(x), self.weight2, padding=1)
        x = self.pool(self.relu(x))
        
        x = torchvision.ops.deform_conv2d(x, self.offsets3(x), self.weight3, padding=1)
        x = self.pool(self.relu(x))
        
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x




In [7]:
model = DeformableCNNModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

In [8]:
# Model instantiation and dummy input
model_test = DeformableCNNModel()
input_test = torch.randn(64, 3, 224, 224)  # Random input tensor

# Forward pass
output_test = model_test(input_test)
print(output_test.shape)


torch.Size([64, 102])


# Step 4: Train Model

In [9]:
import torch

num_epochs = 1

# Set model to train mode
model.train()

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # Unpack data
        inputs, labels = data[0].to(device),data[1].to(device)
        #print(inputs.shape)
        #print(labels.shape)
        

        # Make sure there's no size mismatch
        
        # Clear gradients
        optimizer.zero_grad()

        # Forward pass
        # Uncomment this line if you do not want to use mixup
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
      
        # Backward pass
        loss.backward()
        
        # Update weights
        optimizer.step()
        
        running_loss += loss.item()
        
        # Print loss every 100 batches
        if (i+1) % 100 == 0:
            print('[%d, %5d] loss: %.3f' % (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0
        # print("Inputs Mixed Shape:", inputs_mixed.shape)
        # print("Labels A Shape:", labels_a.shape)
        # print("Labels B Shape:", labels_b.shape)
        # print("Model Outputs Shape:", outputs.shape)

print('Finished Training')


[1,   100] loss: 4.623
Finished Training


In [10]:
dataiter = iter(trainloader)
images, labels = next(dataiter)
print(images.shape)
print(labels.shape)

torch.Size([64, 3, 224, 224])
torch.Size([64])


In [11]:
correct = 0
total = 0

with torch.no_grad():
    for data in validloader:
        inputs, labels = data[0].to(device), data[1].to(device)

        outputs = model(inputs)
        # Remove the reshaping line if it's not needed
        # outputs = outputs.view(inputs.shape[0], -1)

        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the %d test images: %d %%' % (total, 100 * correct / total))


Accuracy of the network on the 818 test images: 2 %


# Step 5: Save the Model

Now that your network is trained, save the model so you can load it later for making predictions. You probably want to save other things such as the mapping of classes to indices which you get from one of the image datasets: image_datasets['train'].class_to_idx. You can attach this to the model as an attribute which makes inference easier later on.

model.class_to_idx = image_datasets['train'].class_to_idx

Remember that you'll want to completely rebuild the model later so you can use it for inference. Make sure to include any information you need in the checkpoint. If you want to load the model and keep training, you'll want to save the number of epochs as well as the optimizer state, optimizer.state_dict. You'll likely want to use this trained model in the next part of the project, so best to save it now.

In [12]:


torch.save(model,'./epoch1withdeform.pth')

# Step 6: Load the Model
At this point it's good to write a function that can load a checkpoint and rebuild the model. That way you can come back to this project and keep working on it without having to retrain the network.

In [13]:
model = torch.load('epoch100.pth').to(device)
# If you also saved other attributes like optimizer state
model.state_dict()



FileNotFoundError: [Errno 2] No such file or directory: 'epoch100.pth'