In [23]:
# Imports here
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms, models
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms 

from PIL import Image
import json
from matplotlib.ticker import FormatStrFormatter

# **Steps**

Step 1: Load Dataset <p>
Step 2: Transform the Dataset <p>
Step 3: Create Model <p>
Step 4: Train Model <p>
Step 5: Save the Model <p>
Step 6: Load the Model <p>


# Step 1: Load Dataset



In [24]:
data_dir = './dataset'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'
test_dir = data_dir + '/test'

In [25]:
with open('cat_to_name.json', 'r') as f:
    cat_to_name = json.load(f)

In [26]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

cuda:0


# Step 2: Transform the Dataset

The pre-trained networks you'll use were trained on the ImageNet dataset where each color channel was normalized separately. For all three sets you'll need to normalize the means and standard deviations of the images to what the network expects. For the means, it's [0.485, 0.456, 0.406] and for the standard deviations [0.229, 0.224, 0.225], calculated from the ImageNet images. These values will shift each color channel to be centered at 0 and range from -1 to 1.


In [27]:
# Define your transforms for the training, validation, and testing sets
train_transforms = transforms.Compose([transforms.RandomRotation(30),
                                       transforms.RandomResizedCrop(224),
                                       transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.485, 0.456, 0.406], 
                                                            [0.229, 0.224, 0.225])])

test_transforms = transforms.Compose([transforms.Resize(256),
                                      transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.485, 0.456, 0.406], 
                                                           [0.229, 0.224, 0.225])])

validation_transforms = transforms.Compose([transforms.Resize(256),
                                            transforms.CenterCrop(224),
                                            transforms.ToTensor(),
                                            transforms.Normalize([0.485, 0.456, 0.406], 
                                                                 [0.229, 0.224, 0.225])])


trainset = torchvision.datasets.ImageFolder(root=train_dir, transform=train_transforms)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=0)

testset = torchvision.datasets.ImageFolder(root=valid_dir, transform=test_transforms)
validloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=0)

# Step 3: Create Model



In [28]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 56 * 56, 512)  # Adjusted this line
        self.fc2 = nn.Linear(512, 102)
        self.dropout = nn.Dropout(0.2)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.pool(self.relu(self.conv3(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x


In [29]:
model = CNNModel().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Step 4: Train Model

In [30]:

num_epochs = 1


model.train()
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        
        inputs, labels = data[0].to(device),data[1].to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        outputs = outputs.view(inputs.shape[0], -1)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        print('[%d, %5d] loss: %.3f' %
                (epoch + 1, i + 1, running_loss / 100))
        running_loss = 0.0

print('Finished Training')

[1,     1] loss: 0.046
[1,     2] loss: 0.228
[1,     3] loss: 0.170
[1,     4] loss: 0.080
[1,     5] loss: 0.049
[1,     6] loss: 0.048
[1,     7] loss: 0.046
[1,     8] loss: 0.046
[1,     9] loss: 0.046
[1,    10] loss: 0.046
[1,    11] loss: 0.046
[1,    12] loss: 0.046
[1,    13] loss: 0.046
[1,    14] loss: 0.045
[1,    15] loss: 0.046
[1,    16] loss: 0.046
[1,    17] loss: 0.046
[1,    18] loss: 0.046
[1,    19] loss: 0.046
[1,    20] loss: 0.046
[1,    21] loss: 0.046
[1,    22] loss: 0.045
[1,    23] loss: 0.046
[1,    24] loss: 0.046
[1,    25] loss: 0.044
[1,    26] loss: 0.046
[1,    27] loss: 0.045
[1,    28] loss: 0.046
[1,    29] loss: 0.045
[1,    30] loss: 0.044
[1,    31] loss: 0.045
[1,    32] loss: 0.044
[1,    33] loss: 0.044
[1,    34] loss: 0.044
[1,    35] loss: 0.045
[1,    36] loss: 0.043
[1,    37] loss: 0.043
[1,    38] loss: 0.045
[1,    39] loss: 0.042
[1,    40] loss: 0.043
[1,    41] loss: 0.043
[1,    42] loss: 0.041
[1,    43] loss: 0.043
[1,    44] 

In [31]:
dataiter = iter(trainloader)
images, labels = next(dataiter)
print(images.shape)
print(labels.shape)

torch.Size([64, 3, 224, 224])
torch.Size([64])


In [34]:
correct = 0
total = 0
with torch.no_grad():
    for data in validloader:
        inputs, labels = data[0].to(device), data[1].to(device)

        outputs = model(inputs)
        outputs = outputs.view(inputs.shape[0], -1)
        
        _, predicted = torch.max(outputs.data, 1)
        
        predicted = predicted.view(-1)
        
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
       
    
    print('Accuracy of the network on the %d test images: %d %%' % (total, 100 * correct / total))

Accuracy of the network on the 818 test images: 10 %


# Step 5: Save the Model

Now that your network is trained, save the model so you can load it later for making predictions. You probably want to save other things such as the mapping of classes to indices which you get from one of the image datasets: image_datasets['train'].class_to_idx. You can attach this to the model as an attribute which makes inference easier later on.

model.class_to_idx = image_datasets['train'].class_to_idx

Remember that you'll want to completely rebuild the model later so you can use it for inference. Make sure to include any information you need in the checkpoint. If you want to load the model and keep training, you'll want to save the number of epochs as well as the optimizer state, optimizer.state_dict. You'll likely want to use this trained model in the next part of the project, so best to save it now.

In [None]:

torch.save(model,'./test.pth')

# Step 6: Load the Model
At this point it's good to write a function that can load a checkpoint and rebuild the model. That way you can come back to this project and keep working on it without having to retrain the network.

In [None]:
model = torch.load('epoch100.pth').to(device)
# If you also saved other attributes like optimizer state
model.state_dict()