In [1]:
import os
import random

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet50

import albumentations as A
from albumentations.pytorch import ToTensorV2

import numpy as np
from PIL import Image

In [2]:
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    np.random.seed(seed)
    random.seed(seed)

In [3]:
cat_train_dir = '../data/training_set/training_set/cats'
dog_train_dir = '../data/training_set/training_set/dogs'
cat_valid_dir = '../data/valid_set/valid_set/cats'
dog_valid_dir = '../data/valid_set/valid_set/dogs'

cat_train_filenames = sorted([os.path.join(cat_train_dir, f) for f in os.listdir(cat_train_dir) if not f.endswith('.DS_Store')])
dog_train_filenames = sorted([os.path.join(dog_train_dir, f) for f in os.listdir(dog_train_dir) if not f.endswith('.DS_Store')])
cat_valid_filenames = sorted([os.path.join(cat_valid_dir, f) for f in os.listdir(cat_valid_dir) if not f.endswith('.DS_Store')])
dog_valid_filenames = sorted([os.path.join(dog_valid_dir, f) for f in os.listdir(dog_valid_dir) if not f.endswith('.DS_Store')])
train_images_filenames = [*cat_train_filenames, *dog_train_filenames]
valid_images_filenames = [*cat_valid_filenames, *dog_valid_filenames]
images_filepaths = [*cat_train_filenames, *dog_train_filenames, *cat_valid_filenames, *dog_valid_filenames]

print(len(images_filepaths))
print(train_images_filenames[0])

10028
../data/training_set/training_set/cats/cat.1.jpg


In [4]:
train_transforms = A.Compose([A.Resize(height=256, width=256),
                              A.HorizontalFlip(p=0.5),
                              A.CLAHE(always_apply=False, p=1.0, clip_limit=(4, 4), tile_grid_size=(8, 8)),
                              A.Normalize(mean=(0.5, 0.5, 0.5),
                                          std=(0.5, 0.5, 0.5),
                                          max_pixel_value=255.0,
                                          always_apply=True),
                              ToTensorV2(always_apply=True)])
valid_transforms = A.Compose([A.Resize(height=256, width=256),
                              A.CLAHE(always_apply=False, p=1.0, clip_limit=(4, 4), tile_grid_size=(8, 8)),
                              A.Normalize(mean=(0.5, 0.5, 0.5),
                                          std=(0.5, 0.5, 0.5),
                                          max_pixel_value=255.0,
                                          always_apply=True),
                              ToTensorV2(always_apply=True)])

In [5]:
class CatandDogDataset(Dataset):
    def __init__(self, filenames, transforms):
        self.filenames = filenames
        self.transforms = transforms
    
    def __len__(self):
        return len(self.filenames)
    
    def __getitem__(self, idx):
        img_path = self.filenames[idx]
        img = Image.open(img_path)
        img = np.array(img)
        transformed_img = self.transforms(image=img)['image']
        label = 1 if 'cat' in img_path else 0
        return transformed_img, label

In [6]:
train_dataset = CatandDogDataset(train_images_filenames, transforms=train_transforms)
valid_dataset = CatandDogDataset(valid_images_filenames, transforms=valid_transforms)

train_loader = DataLoader(train_dataset,
                          batch_size=64,
                          num_workers=4,
                          shuffle=True,
                          drop_last=True)
valid_loader = DataLoader(valid_dataset,
                          batch_size=64,
                          num_workers=4,
                          shuffle=False,
                          drop_last=True)

In [7]:
inputs, labels = next(iter(train_loader))
print(inputs.size())
print(labels)

torch.Size([64, 3, 256, 256])
tensor([0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0,
        0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1,
        0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1])


In [8]:
class ResNet50(nn.Module):
    def __init__(self, num_classes):
        super(ResNet50, self).__init__()
        self.model = resnet50(pretrained=False)
        self.num_ftrs = self.model.fc.in_features
        self.model.fc = nn.Linear(self.num_ftrs, num_classes)

    def forward(self, x):
        x = self.model(x)
        return x

In [9]:
model = ResNet50(num_classes=2)

In [10]:
optimizer = optim.Adam(model.parameters(), lr=0.001, betas=(0.9, 0.999), eps=1e-08)
criterion = nn.CrossEntropyLoss()

In [11]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

model = model.to(device)
criterion = criterion.to(device)

set_seed(42)

cuda:0


In [12]:
def train(model, train_loader, valid_loader, optimizer, criterion, num_epochs):
    best_acc = 0.0
    best_epoch = 1
    for epoch in range(num_epochs):
        print('-'*40)
        print(f'Epoch : {epoch+1}/{num_epochs}')
        epoch_loss = 0.0
        epoch_corrects = 0
        model.train()
        for batch_in, batch_out in train_loader:
            batch_in = batch_in.to(device)
            batch_out = batch_out.to(device)
            
            y_pred = model(batch_in)
            _, preds = torch.max(y_pred, 1)
            
            loss = criterion(y_pred, batch_out)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item() * batch_in.size(0)
            epoch_corrects += torch.sum(preds == batch_out.data)
        
        epoch_loss = epoch_loss / len(train_loader.dataset)
        epoch_acc = epoch_corrects.double() / len(train_loader.dataset)
        
        print(f'train Loss : {epoch_loss:.4f} Acc : {epoch_acc:.4f}')
        
        epoch_loss = 0.0
        epoch_corrects = 0
        model.eval()
        for batch_in, batch_out in valid_loader:
            batch_in = batch_in.to(device)
            batch_out = batch_out.to(device)
            
            optimizer.zero_grad()
            
            with torch.no_grad():
                y_pred = model(batch_in)
                _, preds = torch.max(y_pred, 1)
                
                loss = criterion(y_pred, batch_out)
                
                epoch_loss += loss.item() * batch_in.size(0)
                epoch_corrects += torch.sum(preds == batch_out.data)
                
        epoch_loss = epoch_loss / len(valid_loader.dataset)
        epoch_acc = epoch_corrects.double() / len(valid_loader.dataset)
        
        if epoch_acc >= best_acc:
            best_acc = epoch_acc
            best_epoch = epoch + 1
            torch.save(model, '../checkpoints/catvsdog.pt')
            
        print(f'valid Loss : {epoch_loss:.4f} Acc : {epoch_acc:.4f}')
        print(f'best Acc: {best_acc:.4f}')
        print(f'best Epoch : {best_epoch}')
        print('-'*40)
        print()

train(model, train_loader, valid_loader, optimizer, criterion, num_epochs=1)

----------------------------------------
Epoch : 1/1
train Loss : 0.6839 Acc : 0.6275
valid Loss : 0.5798 Acc : 0.6639
best Acc: 0.6639
best Epoch : 1
----------------------------------------



In [13]:
test_dir = '../data/test_img'

test_images_filenames = [os.path.join(test_dir, f) for f in os.listdir(test_dir)]

print(len(test_images_filenames))
print(test_images_filenames[0])

4
../data/test_img/karina.jpg


In [14]:
test_transforms = A.Compose([A.Resize(height=256, width=256),
                             A.CLAHE(always_apply=False, p=1.0, clip_limit=(4, 4), tile_grid_size=(8, 8)),
                             A.Normalize(mean=(0.5, 0.5, 0.5),
                                          std=(0.5, 0.5, 0.5),
                                          max_pixel_value=255.0,
                                          always_apply=True),
                             ToTensorV2(always_apply=True)])

In [15]:
class CatandDogInferenceDataset(Dataset):
    def __init__(self, filenames, transforms):
        self.filenames = filenames
        self.transforms = transforms

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        img_path = self.filenames[idx]
        img = Image.open(img_path)
        img = np.array(img)
        transformed_img = self.transforms(image=img)["image"]
        img_name = img_path.split("/")[-1]
        return transformed_img, img_name

In [16]:
test_dataset = CatandDogInferenceDataset(test_images_filenames, transforms=test_transforms)

test_loader = DataLoader(test_dataset,
                         batch_size=1,
                         num_workers=4,
                         shuffle=False,
                         drop_last=False)

In [17]:
inputs, filename = next(iter(test_loader))
print(inputs.size())

torch.Size([1, 3, 256, 256])


In [18]:
def inference(model, test_loader):
    model.eval()
    for batch_in, img_name in test_loader:
        batch_in = batch_in.to(device)

        y_pred = model(batch_in)
        y_pred = F.softmax(y_pred, dim=1)
        _, preds = torch.max(y_pred, 1)

        print(
            f'"{img_name[0]}" is ' + "a cat"
            if preds[0] == 1
            else f'"{img_name[0]}" is ' + "a dog"
        )

model = torch.load('../checkpoints/catvsdog.pt')
model.to(device)
inference(model, test_loader)

"karina.jpg" is a dog
"cat.jpeg" is a dog
"dog.jpeg" is a dog
"aroo.jpeg" is a dog
