In [1]:
import torch
import glob
import numpy as np
import torchvision
import glob
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import time
from tqdm import tqdm

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [3]:
device

device(type='cuda')

In [4]:
data = glob.glob('../data/train/*.jpg')

In [5]:
labels = []
for x in data:
    # dog = 1
    # cat = 0
    if 'dog' in x:
        labels.append(1)
    else:
        labels.append(0)

In [6]:
train_data = [(data[x], labels[x]) for x in range(len(data))]

In [7]:
def split_datasets(dataset):
    train, test = train_test_split(dataset, test_size=0.05, random_state=42)
    train, val = train_test_split(train, test_size=0.05, random_state=42)
    return train, val, test

In [8]:
train, val, test = split_datasets(train_data)

In [9]:
print(f'Train: {len(train)}')
print(f'Val: {len(val)}')
print(f'Test: {len(test)}')

Train: 22562
Val: 1188
Test: 1250


In [10]:
class CatDogDataeset(Dataset):
    def __init__(self, data_list):
        self.imgs = [x[0] for x in data_list]
        self.labels = [x[1] for x in data_list]
    def __len__(self):
        return len(self.imgs)
    def __getitem__(self, idx):
        img = torchvision.io.read_image(self.imgs[idx])
        img = torchvision.transforms.Resize(size = (256, 256))(img)
        img = img / 255
        return (img, self.labels[idx])

In [11]:
train_dataset = CatDogDataeset(train)
train_loader = DataLoader(train_dataset, batch_size = 32, shuffle = True)
val_dataset = CatDogDataeset(val)
val_loader = DataLoader(val_dataset, batch_size = 32, shuffle = True)
data_dict = {
    'train': train_loader,
    'val': val_loader
}

In [70]:
res_50_model = torchvision.models.resnet50(weights=None, progress=True)
num_flts = res_50_model.fc.in_features
res_50_model.fc = nn.Linear(num_flts, 2)
res_50_model = res_50_model.to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(res_50_model.parameters(), lr=0.01)

In [71]:
def train_model(model, criterion, optimizer, num_epochs = 5):
    since = time.time()
    best_acc = 0.0
    for epoch in range(num_epochs):
        print(f'Epoch: {epoch}/{num_epochs - 1}')
        print('-' * 10)
        for phase in ['train', 'val']:
            if phase == 'train': 
                model.train() # Setting model on training mode
            else:
                model.eval() # Setting model on eval mode
            # This monitors mostly batch level metrics.
            running_loss = 0
            running_corrects = 0
            print(f'For: {phase}')
            for imgs, labels in tqdm(data_dict[phase]):
                imgs = imgs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad() # This just zeros the gradients not actaul all weights
                # only forward propogation
                # track hist if it's in train mode
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(imgs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # if phase is train then do backprop and optimize as well
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                # Stats
                running_loss += loss.item() * imgs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
            epoch_loss = running_loss / len(data_dict[phase])
            epoch_acc = running_corrects.double() / len(data_dict[phase])

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    time_elapsed = time.time() - since
    print(f'Time spent: {time_elapsed}')

In [72]:
x = train_model(res_50_model, criterion, optimizer)

Epoch: 0/4
----------
For: train


100%|██████████| 706/706 [02:22<00:00,  4.94it/s]


train Loss: 31.5450 Acc: 17.2465
For: val


100%|██████████| 38/38 [00:04<00:00,  9.27it/s]


val Loss: 24.2968 Acc: 17.8684
Epoch: 1/4
----------
For: train


100%|██████████| 706/706 [02:23<00:00,  4.91it/s]


train Loss: 22.3363 Acc: 20.0127
For: val


100%|██████████| 38/38 [00:04<00:00,  9.23it/s]


val Loss: 34.8845 Acc: 16.5263
Epoch: 2/4
----------
For: train


100%|██████████| 706/706 [02:23<00:00,  4.91it/s]


train Loss: 20.8006 Acc: 20.9518
For: val


100%|██████████| 38/38 [00:04<00:00,  9.27it/s]


val Loss: 56.1664 Acc: 17.5263
Epoch: 3/4
----------
For: train


100%|██████████| 706/706 [02:23<00:00,  4.91it/s]


train Loss: 19.5992 Acc: 21.9278
For: val


100%|██████████| 38/38 [00:04<00:00,  9.28it/s]


val Loss: 33.2255 Acc: 18.8947
Epoch: 4/4
----------
For: train


100%|██████████| 706/706 [02:23<00:00,  4.92it/s]


train Loss: 18.5638 Acc: 22.6671
For: val


100%|██████████| 38/38 [00:04<00:00,  9.27it/s]

val Loss: 27.4522 Acc: 20.8684
Time spent: 738.1961028575897





Making model with keras type arch

In [12]:
res_50_model = torchvision.models.resnet50(weights=None, progress=True)
num_flts = res_50_model.fc.in_features
res_50_model.fc = nn.Sequential(
    nn.Linear(num_flts, 1), # this way we can use this layer as linear
    nn.Sigmoid() # Apply sigmoid after 
)
res_50_model = res_50_model.to(device)
criterion = nn.BCELoss()
optimizer = optim.SGD(res_50_model.parameters(), lr=0.01)

In [15]:
def train_model(model, criterion, optimizer, num_epochs = 5):
    since = time.time()
    best_acc = 0.0
    for epoch in range(num_epochs):
        print(f'Epoch: {epoch}/{num_epochs - 1}')
        print('-' * 10)
        for phase in ['train', 'val']:
            if phase == 'train': 
                model.train() # Setting model on training mode
            else:
                model.eval() # Setting model on eval mode
            # This monitors mostly batch level metrics.
            running_loss = 0
            running_corrects = 0
            print(f'For: {phase}')
            for imgs, labels in tqdm(data_dict[phase]):
                imgs = imgs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad() # This just zeros the gradients not actaul all weights
                # only forward propogation
                # track hist if it's in train mode
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(imgs)
                    labels = labels.reshape(-1, 1).float()
                    preds = torch.where(outputs < 0.5, 0.0, 1.0)
                    loss = criterion(outputs, labels)
                    # if phase is train then do backprop and optimize as well
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                # Stats
                running_loss += loss.item() * imgs.size(0)
                running_corrects += torch.sum(preds == labels.data)
                
            epoch_loss = running_loss / len(data_dict[phase])
            epoch_acc = running_corrects.double() / len(data_dict[phase])

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')
    time_elapsed = time.time() - since
    print(f'Time spent: {time_elapsed}')

In [16]:
trained = train_model(res_50_model, criterion, optimizer)

Epoch: 0/4
----------
For: train


100%|██████████| 706/706 [02:57<00:00,  3.98it/s]


train Loss: 22.0004 Acc: 19.4448
For: val


100%|██████████| 38/38 [00:05<00:00,  6.64it/s]


val Loss: 25.2253 Acc: 19.5000
Epoch: 1/4
----------
For: train


100%|██████████| 706/706 [02:57<00:00,  3.98it/s]


train Loss: 20.5346 Acc: 20.7677
For: val


100%|██████████| 38/38 [00:05<00:00,  6.49it/s]


val Loss: 46.6605 Acc: 15.1579
Epoch: 2/4
----------
For: train


100%|██████████| 706/706 [02:58<00:00,  3.95it/s]


train Loss: 19.2167 Acc: 21.9008
For: val


100%|██████████| 38/38 [00:05<00:00,  6.77it/s]


val Loss: 21.0793 Acc: 20.0526
Epoch: 3/4
----------
For: train


100%|██████████| 706/706 [02:49<00:00,  4.17it/s]


train Loss: 18.0891 Acc: 22.9079
For: val


100%|██████████| 38/38 [00:05<00:00,  6.48it/s]


val Loss: 44.1247 Acc: 19.9737
Epoch: 4/4
----------
For: train


100%|██████████| 706/706 [02:58<00:00,  3.97it/s]


train Loss: 17.1755 Acc: 23.5198
For: val


100%|██████████| 38/38 [00:06<00:00,  6.22it/s]

val Loss: 47.2904 Acc: 16.1316
Time spent: 909.8950083255768



