In [1]:
import torch
import os
import cv2 as cv
from torch.utils.data import Dataset
from torchvision import datasets
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from torchvision.io import read_image
from PIL import Image
import torchvision.transforms as transforms

import glob
from pandas.core.common import flatten



In [2]:
import torchvision
import numpy as np
# Function to show an image

def imshow(img):
    for i in range(0, 2):
        img_np = img[i].numpy()
        img_np_trans = np.transpose(img_np, (1, 2, 0))
        img_c = img_np_trans / 255
        plt.imshow(img_c)
        plt.show()

def single_imshow(img):
    for i in range(0, 2):
        img_np = img.numpy()
        img_np_trans = np.transpose(img_np, (1, 2, 0))
        img_c = img_np_trans / 255
        plt.imshow(img_c)
        plt.show()

In [3]:
cwd = os.getcwd()

training_data_path = cwd + '/PetImages/training_data'
test_data_path = cwd + '/PetImages/test_data'

In [4]:
train_image_paths = []
classes = []

for data_path in glob.glob(training_data_path + '/*'):
    classes.append(data_path.split('/')[-1]) 
    train_image_paths.append(glob.glob(data_path + '/*'))
    # Take the last dir as a class, Cat or Dog, and add to classes.
    # Take the paths for each image to train_image_paths.

train_image_paths = list(flatten(train_image_paths))

print(train_image_paths[5])
# Transforms the list of lists(2 in this case) in to a list.

# Same thing with test data.
test_image_paths = []

for data_path in glob.glob(test_data_path + '/*'):
    test_image_paths.append(glob.glob(data_path + '/*'))
    
test_image_paths = list(flatten(test_image_paths))

print("Train size: ", len(train_image_paths))
print("Test size: ", len(test_image_paths))

/home/vitor/Documents/ProgramasPessoais/machine_learning/PetImages/training_data/Dog/7472.jpg
Train size:  25000
Test size:  812


### Funtion for exception reading image

Sometimes read_image can't read the image, this function fixes those cases

In [5]:
def load_image_cv(image_path):
    try:
        # Load the image using OpenCV
        img = cv.imread(image_path)
        if img is None:
            raise ValueError("The file is not a recognized image format or could not be read.")

        # Convert the image from BGR to RGB
        img = cv.cvtColor(img, cv.COLOR_BGR2RGB)
        
        # Convert the image to a PIL Image
        img = Image.fromarray(img)
        
        # Convert the image to a tensor
        transform = transforms.ToTensor()
        img_tensor = transform(img)
        
        return img_tensor
    except Exception as e:
        print(f"An error occurred: {e}")

def load_image_PIL(img_path_PIL):
    img = Image.open(img_path_PIL).convert('RGB')
    
    # Convert the image to a tensor
    transform = transforms.ToTensor()
    img_tensor = transform(img)

    print("Last Resort")

    return img_tensor

### Indexes from class to a number respresenting the class and vice-versa

In [6]:
# Index each class

idx_to_class = {i:j for i, j in enumerate(classes)}
print(idx_to_class)

class_to_idx = {value:key for key,value in idx_to_class.items()}
print(class_to_idx)

{0: 'Dog', 1: 'Cat'}
{'Dog': 0, 'Cat': 1}


In [7]:
path = cwd + '/PetImages/training_data/Cat/1.jpg'

image_for_error = load_image_cv(path)
label_error = 1


In [8]:
class CatOrDogImageDataset(Dataset):
    def __init__(self, train_image_paths, labels, transform=None, target_transform=None):
        self.img_dir = train_image_paths
        self.img_labels = labels
    
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = train_image_paths[idx] 

        try: 
            image = read_image(img_path)
        except:
            try:
                image = load_image_cv(img_path)
            except:
                try:
                    image = load_image_PIL(img_path)
                except:
                    return image_for_error, label_error
        # Reads image as Tensor.

        if image is None:
            
            print("WTF error here", img_path)
        
        label = self.img_labels[idx]
        label = class_to_idx.get(label, None)
        # Get the path and label of one data point.

        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        # Make any necessary transformation.

        return image, label
        # Return the image and the respective label.

In [9]:
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, CenterCrop, ToTensor, Lambda, Normalize, Resize

img_transform = Compose(
    [Resize(400),
     CenterCrop(400),])
# Since the images do not have the same dimensions, we need to do something. I chose cropping.
# ToTensor transforms the image into a FloatTensor and scales the pixel values.

In [10]:
# Geting image labels list

training_labels = []
testing_labels = []

# For each image folder with a class images, we iterate through them and
# take the folder class name and append to the list of labels

for class_path in glob.glob(training_data_path + '/*'):
    label = (class_path.split('/')[-1])
    for img_path in glob.glob(class_path + '/*'):
        training_labels.append(label)    

for class_path in glob.glob(test_data_path + '/*'):
    label = (class_path.split('/')[-1])
    for img_path in glob.glob(class_path + '/*'):
        testing_labels.append(label)   

In [11]:
#Geting the Datasets

training_data = CatOrDogImageDataset(train_image_paths, training_labels, img_transform)
testing_data = CatOrDogImageDataset(test_image_paths, testing_labels, img_transform)

In [12]:
train_dataloader = DataLoader(training_data, batch_size=4, shuffle=True)
test_dataloader = DataLoader(testing_data, batch_size=4, shuffle=True)

In [13]:
# Let's build
from torch import nn
import torch.nn.functional as F

In [14]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


In [15]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(150544, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x


net = Net()

In [16]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [17]:
for epoch in range(10):  # Loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(train_dataloader, 0):
        # Get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        inputs = inputs/255

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 100 == 0:    # Print every 5 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 2000:.3f}')
            running_loss = 0.0

print('Finished Training')

[1,     1] loss: 0.001


Corrupt JPEG data: 228 extraneous bytes before marker 0xd9


[1,   101] loss: 0.059
An error occurred: The file is not a recognized image format or could not be read.
WTF error here /home/vitor/Documents/ProgramasPessoais/machine_learning/PetImages/training_data/Dog/11675.jpg


TypeError: Unexpected type <class 'NoneType'>

In [None]:
dataiter = iter(test_dataloader)
images, labels = next(dataiter)

imshow(images)

In [None]:
dataiter = iter(test_dataloader)
images, labels = next(dataiter)

images = images/255


outputs = net(images)