In [None]:
%cd /content/drive/MyDrive/catproj

Test Connection, mount google drive, set env

In [None]:
import torch
import numpy as np
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
torch.cuda.is_available() # if false install cuda first

view an image, explore the data set:
50,000 images of size (32, 32, 3)

Image set: CIFAR-10

In [None]:
# source for this code block: pytorch official documentation cifar10 tutorial
# https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

batch_size = 4

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size,
                                         shuffle=False, num_workers=2)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')

In [None]:
# source for this code block: pytorch official documentation cifar10 tutorial
# https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html

# functions to show an image
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    plt.show()


# get some random training images
dataiter = iter(trainloader)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))
# print labels
print(' '.join('%5s' % classes[labels[j]] for j in range(batch_size)))

Below is the model

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class ResBlock(nn.Module):
    def __init__(self, in_channel, out_channel, stride=1, kernel_size=3):
        super(ResBlock, self).__init__()
        self.padding = kernel_size // 2; # auto padding
        self.normal = nn.Sequential(
            nn.Conv2d(in_channel, out_channel, kernel_size=kernel_size, 
                      stride=stride, padding=self.padding, bias=False),
            nn.BatchNorm2d(out_channel),
            nn.ReLU(inplace=True),
            nn.Conv2d(out_channel, out_channel, kernel_size=kernel_size, 
                      stride=1, padding=1, bias=False),
            nn.BatchNorm2d(out_channel)
        )

        # shortcut block
        if (stride==1 and in_channel==out_channel):
            self.shortcut = nn.Identity()
        else:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(out_channel)
            )

    def forward(self, x):
        out = self.normal(x)
        out += self.shortcut(x)
        out = F.relu(out)
        return out


In [None]:

class myNet(nn.Module):
    def __init__(self, ResBlock):
        super().__init__() # original: 32 * 32 * 3
        self.conv1 = nn.Conv2d(3, 64, 3, padding=1) # in-channel, out-channel, kernal-size 32 * 32 * 64
        self.resnet = ResBlock(64, 128) # 32 * 32 * 256
        self.pool = nn.MaxPool2d(2, 2) # kernel-size, stride 16 * 16 * 128
        self.conv2 = nn.Conv2d(128, 256, 5) # 12 * 12 * 256 -> 6 * 6 * 256 (pool twice)
        self.fc = nn.Linear(256 * 6 * 6, 1) 

    def forward(self, x):
        out = self.conv1(x)
        out = self.resnet(out)
        
        out = self.pool(out)
        out = self.pool(F.relu(self.conv2(out)))
        # print("----------"*5)
        # print(out)
        out = torch.flatten(out, 1)
        out = self.fc(out)
        out = torch.sigmoid(out)
        return out

net = myNet(ResBlock)

In [None]:
device = torch.device("cuda")
net.to(device)

In [None]:
import torch.optim as optim

criterion = nn.BCELoss().to(device)
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
NUM_EPOCHS = 8
CAT = 3

In [None]:
def convert_label(labels):
    l = (labels==CAT)
    l = l.reshape(-1, 1)
    return l.float()

In [None]:

for epoch in range(NUM_EPOCHS):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        
        inputs, labels = data
        labels = convert_label(labels)
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = net(inputs)
        
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # below code is cited from: 
        # https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
        # print statistics
        running_loss += loss.item()
        if i % 2000 == 1999:    # print every 2000 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 2000))
            err_loss.append(running_loss / 2000)
            running_loss = 0.0
            
print('Finished Training')

In [None]:
PATH = f'cifar_resnet_binary{NUM_EPOCHS}.pth'
torch.save(net.state_dict(), PATH)

Testing

In [None]:

PATH = f"cifar_resnet_binary{NUM_EPOCHS}.pth"
net = myNet(ResBlock)
net.load_state_dict(torch.load(PATH, map_location=torch.device('cpu')))

In [None]:
def get_prediction(outputs):
    p = outputs>0.5
    p = p.reshape(-1, 1)
    return p.int()

In [None]:
correct = 0
total = 0
cnt = 0
label_cnt = 0
# since we're not training, we don't need to calculate the gradients for our outputs
with torch.no_grad():
    for data in testloader:
        images, labels = data
        labels = convert_label(labels)
        
        # calculate outputs by running images through the network
        outputs = net(images)
        # the class with the highest energy is what we choose as prediction
        # _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        predicted = get_prediction(outputs)
        labels = labels.int()

        correct += (predicted == labels).sum().item()

        cnt += (predicted == 1).sum().item()
        label_cnt += (labels == 1).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))
print('Total number of cats:' + label_cnt)
print('Number of cats recognized:' + cnt)


Test customized data

In [None]:
PATH = "cifar_resnet_binary08.pth"
net = myNet(ResBlock)
net.load_state_dict(torch.load(PATH, map_location=torch.device('cpu')))

In [None]:
# convert an image to a tensor for model input
def image_to_input(img):
    '''
    @param img: numpy array of dim (3, 32, 32)
    @return res: torch tensor ready to pass into NN
    '''
    # x y z -> y z x
    res = np.transpose(img, (2, 0, 1))
    res = torch.from_numpy(res)
    res = (res/256 - 0.5) * 2 
    res = torch.unsqueeze(res, 0)
    return res

In [None]:
# from path of an image, convert it to numpy array
from PIL import Image
def resize_img(PATH):
    im = Image.open(PATH)  
    im = im.resize((32, 32)) 
    im = np.array(im)
    return im

In [None]:
def get_prediction(outputs, threshold=0.2):
    # the smaller the threshold, more easier to recognize a cat, but 
    # also more likely to recognize non-cat objects as cats
    p = outputs>threshold
    p = p.reshape(-1, 1)
    return p.int()

In [None]:
def test_img(PATH, net):
    img_input = image_to_input(resize_img(PATH))
    output = net(img_input)
    predicted = get_prediction(output)
    if (predicted.item() == 0):
        return "Doesn't look like a cat"
    else:
        return "This looks like a cat"

In [None]:
test_img("wechatimg.jpeg", net)