In [1]:
%env PYTORCH_ENABLE_MPS_FALLBACK=1

env: PYTORCH_ENABLE_MPS_FALLBACK=1


In [8]:
!tar -xf /kaggle/input/imagenette2-160/imagenette2-160.tgz

In [9]:
!pwd

/kaggle/working


In [2]:
import os

# Set the environment variable to enable CPU fallback for MPS
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'

# AlexNet - The paper which started the deep learning revolution!!!

AlexNet is a seminal paper in the field of deep learning, it was one of the first deep neural networks trained in the wild on a real
problem. ImageNet is the dataset this model trained on and the dataset is a classification task, with 1.2 million images and 1000 classes.

When AlexNet was released it changed the computer vision game, shifting focus from handcrafted specific methods to more general CV 
methods, i.e. neural networks. It was 10.3% ahead in top-1 against the next best competitor (top-1 means of the classes how many did it
get correct).

In this session I will provide you with an AlexNet cookbook to help you implement the model.

Link to the paper: https://proceedings.neurips.cc/paper_files/paper/2012/file/c399862d3b9d6b76c8436e924a68c45b-Paper.pdf

## Boiler plate code blocks

In [3]:
import torch
import torchvision

import torch.nn as nn
from torch.utils.data import DataLoader

import torchvision.transforms as transforms

from torch.optim.lr_scheduler import ReduceLROnPlateau

In [4]:
if torch.backends.mps.is_available():
	device = torch.device("mps")
	x = torch.ones(1, device=device)
	print(x)

elif torch.backends.cuda.is_built():
	device = torch.device("cuda")
	x = torch.ones(1, device=device)
	print (x)
else:
	device = None
	print ("MPS device not found.")

tensor([1.], device='cuda:0')


We will be using the ImageNette dataset (a subset of ImageNet as ImageNet is around 125gb so it's not feasible to run without a GPU and a lot of time)

To download ImageNette run these following commands:

mkdir Data && cd data 
wget https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz
tar -xvf imagenette2-160.tgz 


# The AlexNet CookBook
<h2><b> For today's session I will provide you with a list of ingredients, which will be all the layers you need </b></h2>

Beware some things are missing from this cookbook and you will need to utilise the paper to figure out the exact details.

AlexNet Implementation Ingredients list:

** n represents the number of total layers, so if n = 3 then there will be 3 of those layers **

- Convolutional layers:
	- conv1 - 11x11 @ 96 
	- conv2 - 5x5 @ 256
	- conv3 - 3x3 @ 384
	- conv4 - 3x3 @ 384
	- conv5 - 3x3 @ 256

- Fully connected layers:
	- fc1 - in_features = ?, out_features = 4096
	- fc2 - in_features = 4096, out_features = 4096
	- fc3 - in_features = 4096, out_features = 1000

- Non-Linear activation layers:
	- relu_n - ReLU layers follow every layer

- Norm layers: 
	- LRN_n - Follows specific layers, check the paper for more details or ask me :) - size=?, alpha=?, beta=?, k=?

- maxpool layers:
	- mp_n - Once again follows specific layers, check the paper for more details or ask me :)

And that is all you need to construct AlexNet!!!

In [20]:
class AlexNet(nn.Module):
	def __init__(self):
		super().__init__()

		self.conv1 = nn.Conv2d(
			kernel_size=(11, 11),
			in_channels=3,
			out_channels=96,
			stride=4,
			# padding=5,
		)
		self.relu1 = nn.ReLU()
		self.LRN1 = nn.LocalResponseNorm(
			size=5, alpha=10**(-4), beta=0.75, k=2
		)
		# output 1

		self.maxpool1 = nn.MaxPool2d(
			kernel_size=(3, 3),
			stride=(2, 2),
		)
		self.conv2 = nn.Conv2d(
			kernel_size=(5, 5),
			in_channels=96,
			out_channels=256,
			padding=2,
		)
		self.relu2 = nn.ReLU()
		self.LRN2 = nn.LocalResponseNorm(
			size=5, alpha=10**(-4), beta=0.75, k=2
		)
		# output 2

		self.maxpool2 = nn.MaxPool2d(
			kernel_size=(3, 3),
			stride=(2, 2),
			# stride=2,
		)
		self.conv3 = nn.Conv2d(
			kernel_size=(3, 3),
			in_channels=256,
			out_channels=384,
			padding=1,
		)
		self.relu3 = nn.ReLU()
		# output 3

		self.conv4 = nn.Conv2d(
			kernel_size=(3, 3),
			in_channels=384,
			out_channels=384,
			padding=1
		)
		self.relu4 = nn.ReLU()
		# output 4

		self.conv5 = nn.Conv2d(
			kernel_size=(3, 3),
			in_channels=384,
			out_channels=256,
			padding=1
		)
		self.relu5 = nn.ReLU()
		self.maxpool3 = nn.MaxPool2d(
			kernel_size=(3, 3),
			stride=(2, 2),
			# stride=2
		)
		# output 5

		self.fc1 = nn.Linear(9216, 4096)
		self.relu6 = nn.ReLU()
		self.dropout1 = nn.Dropout(p=0.5)

		self.fc2 = nn.Linear(4096, 4096)
		self.relu7 = nn.ReLU()
		self.dropout2 = nn.Dropout(p=0.5)

		self.fc3 = nn.Linear(4096, 1000)
# 		self.fc3 = nn.Linear(4096, 10)  # If using CIFAR10
		# self.relu8 = nn.ReLU() is this supposed to be here?
		self.softmax = nn.Softmax()

	def forward(self, x):
		# This block represents first convolution
		x = self.conv1(x)
# 		print(f'Shape after conv1: {x.shape}')
		x = self.relu1(x)
# 		print(f'Shape after ReLU1: {x.shape}')
		x = self.LRN1(x)
# 		print(f'Shape after LRN1: {x.shape}')
		x = self.maxpool1(x)
# 		print(f'Shape after MP1: {x.shape}\n')

		# Second convolution
		x = self.conv2(x)
# 		print(f'Shape after conv2: {x.shape}')
		x = self.relu2(x)
# 		print(f'Shape after ReLU2: {x.shape}')
		x = self.LRN2(x)
# 		print(f'Shape after LRN2: {x.shape}')
		x = self.maxpool2(x)
# 		print(f'Shape after MP2: {x.shape}\n')

		# Third convolution
		x = self.conv3(x)
# 		print(f'Shape after conv3: {x.shape}')
		x = self.relu3(x)
# 		print(f'Shape after ReLU3: {x.shape}\n')

		# Fourth convolution
		x = self.conv4(x)
# 		print(f'Shape after conv4: {x.shape}')
		x = self.relu4(x)
# 		print(f'Shape after relu4: {x.shape}\n')

		# Fifth convolution
		x = self.conv5(x)
# 		print(f'Shape after conv5: {x.shape}')
		x = self.relu5(x)
# 		print(f'Shape after relu5: {x.shape}')
		x = self.maxpool3(x)
# 		print(f'Shape after MP3: {x.shape}\n')

		# Before passing to the fully connected layer we must flatten the tensor as nn.Linear expects a matrix (2d input)
		x = torch.flatten(x, start_dim=1)
# 		print(x.shape)

		# First fully connected layer
		x = self.fc1(x)
# 		print(f'Shape after fc1: {x.shape}')
		x = self.relu6(x)
# 		print(f'Shape after relu6: {x.shape}')
		x = self.dropout1(x)
# 		print(f'Shape after dropout1: {x.shape}\n')

		# Second fully connected layer
		x = self.fc2(x)
# 		print(f'Shape after fc2: {x.shape}')
		x = self.relu7(x)
# 		print(f'Shape after relu7: {x.shape}')
		x = self.dropout2(x)
# 		print(f'Shape after dropout2: {x.shape}\n')

		# Final FC layer
		x = self.fc3(x)
# 		print(f'Shape after fc3: {x.shape}')
		# x = self.relu8(x)
		# print(f'Shape after relu8: {x.shape}')
# 		x = self.softmax(x)
# 		print(f'Shape after softmax: {x.shape}\n')

		return x

model = AlexNet()

if device is not None:
	model.to(device)

In [14]:
# Define transformations
transform = transforms.Compose(
    [transforms.Resize((227, 227)),
     transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

TRAIN_DATA_DIR = 'imagenette2-160/train'
# TRAIN_DATA_DIR = './Data/imagenette2-160/train'
TEST_DATA_DIR = 'imagenette2-160/val'
# TEST_DATA_DIR = './Data/imagenette2-160/val'

# Load ImageNette dataset
trainset = torchvision.datasets.ImageFolder(
        TRAIN_DATA_DIR, transform=transform
    )
train_loader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=4)


testset= torchvision.datasets.ImageFolder(
        TEST_DATA_DIR, transform=transform
    )
test_loader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=4)

In [19]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.005, weight_decay=0.0005, momentum=0.9)

# Define learning rate scheduler
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3)

import time

for epoch in range(35):
    print(f'Epoch [{epoch + 1}/{35}] starts...')
    start_time = time.time()

    model.train()  # Set model to training mode
    running_loss = 0.0

    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    train_loss = running_loss / len(train_loader)
    print(f'Epoch [{epoch + 1}] training loss: {train_loss:.3f}')

    # Validation phase
    model.eval()  # Set model to evaluation mode
    val_running_loss = 0.0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for data in test_loader:  # Assuming test_loader is used as a validation loader
            inputs, labels = data
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

    val_loss = val_running_loss / len(test_loader)
    val_accuracy = 100 * val_correct / val_total
    print(f'Epoch [{epoch + 1}] validation loss: {val_loss:.3f}, accuracy: {val_accuracy:.2f}%')

    # Update the LR scheduler with validation loss
    scheduler.step(val_loss)
#     print(f'LR: {scheduler.get_last_lr()}')

    end_time = time.time()
    print(f'Epoch [{epoch + 1}/{35}] ends. Time taken: {end_time - start_time:.2f} seconds\n')

Epoch [1/35] starts...
Epoch [1] training loss: 2.303
Epoch [1] validation loss: 2.303, accuracy: 9.10%
Epoch [1/35] ends. Time taken: 18.65 seconds

Epoch [2/35] starts...
Epoch [2] training loss: 2.302
Epoch [2] validation loss: 2.303, accuracy: 9.10%
Epoch [2/35] ends. Time taken: 18.24 seconds

Epoch [3/35] starts...
Epoch [3] training loss: 2.301
Epoch [3] validation loss: 2.301, accuracy: 9.10%
Epoch [3/35] ends. Time taken: 18.41 seconds

Epoch [4/35] starts...
Epoch [4] training loss: 2.293
Epoch [4] validation loss: 2.280, accuracy: 11.69%
Epoch [4/35] ends. Time taken: 18.52 seconds

Epoch [5/35] starts...
Epoch [5] training loss: 2.209
Epoch [5] validation loss: 2.108, accuracy: 22.88%
Epoch [5/35] ends. Time taken: 18.25 seconds

Epoch [6/35] starts...
Epoch [6] training loss: 2.008
Epoch [6] validation loss: 1.957, accuracy: 31.49%
Epoch [6/35] ends. Time taken: 18.41 seconds

Epoch [7/35] starts...
Epoch [7] training loss: 1.928
Epoch [7] validation loss: 1.892, accuracy:

In [32]:
# Example code using PyTorch and torchvision
import torch
from torchvision import models, transforms
from PIL import Image
import cv2

def test_predict():
    # # Load pre-trained ResNet model
    # resnet_model = models.resnet50(pretrained=True)
    # resnet_model.eval()

#     # Preprocess input image
#     transform = transforms.Compose([
#         transforms.Resize((224, 224)),
#         transforms.ToTensor(),
#         transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
#     ])

#     # image_path = 'path_to_dog_image.jpg'
#     image_path = 'imagenette2-160/val/n01440764/ILSVRC2012_val_00009111.JPEG'
#     image = Image.open(image_path)
#     input_tensor = transform(image)
#     input_batch = input_tensor.unsqueeze(0)

#     # Make prediction
#     with torch.no_grad():
#     #     output = resnet_model(input_batch)
#         output = model(input_batch)


#     # Get predicted class
#     _, predicted_class = torch.max(output, 1)
#     print(f"Predicted class index: {predicted_class.item()}")

    model.eval()  # Set model to evaluation mode
    val_running_loss = 0.0
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for data in test_loader:  # Assuming test_loader is used as a validation loader
            inputs, labels = data
            
            print(type(data))
            print(type(data[0]))
            
            tensor = data[0]
            tensor  = tensor.cpu().numpy() # make sure tensor is on cpu
            cv2.imwrite("image.png", tensor)
            
            break
            
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            val_running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

#     val_loss = val_running_loss / len(test_loader)
#     val_accuracy = 100 * val_correct / val_total
#     print(f'Validation loss: {val_loss:.3f}, accuracy: {val_accuracy:.2f}%')

test_predict()

<class 'list'>
<class 'torch.Tensor'>


libpng error: Invalid IHDR data


In [28]:
help(cv2.imwrite)

Help on built-in function imwrite:

imwrite(...)
    imwrite(filename, img[, params]) -> retval
    .   @brief Saves an image to a specified file.
    .   
    .   The function imwrite saves the image to the specified file. The image format is chosen based on the
    .   filename extension (see cv::imread for the list of extensions). In general, only 8-bit unsigned (CV_8U)
    .   single-channel or 3-channel (with 'BGR' channel order) images
    .   can be saved using this function, with these exceptions:
    .   
    .   - With OpenEXR encoder, only 32-bit float (CV_32F) images can be saved.
    .     - 8-bit unsigned (CV_8U) images are not supported.
    .   - With Radiance HDR encoder, non 64-bit float (CV_64F) images can be saved.
    .     - All images will be converted to 32-bit float (CV_32F).
    .   - With JPEG 2000 encoder, 8-bit unsigned (CV_8U) and 16-bit unsigned (CV_16U) images can be saved.
    .   - With PAM encoder, 8-bit unsigned (CV_8U) and 16-bit unsigned (CV_16U) i

In [30]:
!ls

imagenette2-160
