In [1]:
import torch
import os
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from torchvision.io import read_image

import glob
from pandas.core.common import flatten



In [3]:
# Creating a custom Dataset

train_image_paths = []
classes = []

for data_path in glob.glob(training_data_path + '/*'):
    classes.append(data_path.split('/')[-1]) 
    train_image_paths.append(glob.glob(data_path + '/*'))
    # Take the last dir as a class, Cat or Dog, and add to classes.
    # Take the paths for each image to train_image_paths.

train_image_paths = list(flatten(train_image_paths))

print(train_image_paths[5])
# Transforms the list of lists(2 in this case) in to a list.

# Same thing with test data.
test_image_paths = []

for data_path in glob.glob(test_data_path + '/*'):
    test_image_paths.append(glob.glob(data_path + '/*'))
    
test_image_paths = list(flatten(test_image_paths))

print("Train size: ", len(train_image_paths))
print("Test size: ", len(test_image_paths))

/home/vitor/Downloads/PetImages/training_data/Dog/7472.jpg
Train size:  24190
Test size:  812


In [4]:
# Index each class

idx_to_class = {i:j for i, j in enumerate(classes)}
print(idx_to_class)

class_to_idx = {value:key for key,value in idx_to_class.items()}
print(class_to_idx)

{0: 'Dog', 1: 'Cat'}
{'Dog': 0, 'Cat': 1}


In [5]:
class CatOrDogImageDataset(Dataset):
    def __init__(self, train_image_paths, classes, transform=None, target_transform=None):
        self.img_dir = train_image_paths
        self.img_labels = classes
        
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = train_image_paths[idx]
        image = read_image(img_path)
        # Converts to Tensor.
        label = self.img_labels[idx]
        label = class_to_idx.get(label, None)
        #Get the path and label of one data point.

        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        #Make any necessary transformation.

        return image, label
        # Return the image and the respective label.

In [6]:
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, CenterCrop, ToTensor, Lambda, Normalize, Resize

img_transform = Compose(
    [Resize(400),
    CenterCrop(400),])
# Since the images do not have the same dimensions, we need to do something. I chose cropping.
# ToTensor transforms the image into a FloatTensor and scales the pixel values.

target_transform = Lambda(lambda y: torch.zeros(
    2, dtype=torch.float).scatter_(dim=0, index=torch.tensor(y), value=1))
# Apply a desired lambda function defined by us

In [7]:
# Geting the Datasets

training_data = CatOrDogImageDataset(train_image_paths, classes, img_transform, target_transform)
testing_data = CatOrDogImageDataset(test_image_paths, classes, img_transform, target_transform)

In [8]:
train_dataloader = DataLoader(training_data, batch_size=4, shuffle=True)
test_dataloader = DataLoader(testing_data, batch_size=4, shuffle=True)

In [9]:
# Let's build
from torch import nn
import torch.nn.functional as F

In [10]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [11]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(150544, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

In [12]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [18]:
for epoch in range(10):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # Get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        inputs = inputs/255

        labels = torch.argmax(labels, dim=1)

        # Zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        if i % 20 == 19:    # print every 2000 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

Finished Training
