<a href="https://colab.research.google.com/github/zherenz/PyTorch-Basics-22/blob/main/torch_basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dataset & DataLoader<br>
torch.utils.data.Dataset<br>
torch.utils.data.Dataset (Segmentation)<br>
torchvision.datasets<br>
torchvision.datasets.ImageFolder<br>

## torch.utils.data.Dataset

In [1]:
# a binary classification
# txt file: [0]height [1]weight [2]gender

import torch

class GenderDataset(torch.utils.data.Dataset):
    def __init__(self, txt):
        with open(txt) as f:
            lines = f.readlines()
        data = []
        for line in lines:
            line = line.strip('\n')
            words = line.split()
            data.append((float(words[0]) / 2.0, float(words[1]) / 80.0, int(words[2])))
        self.data = data

    def __getitem__(self, index):
        return torch.tensor([self.data[index][0], self.data[index][1]]), self.data[index][2]

    def __len__(self):
        return len(self.data)

train_data = GenderDataset(txt='data_gender_train.txt')
train_loader = torch.utils.data.DataLoader(train_data, batch_size=5, shuffle=True)

print(train_data[0])

(tensor([0.8207, 1.0084]), 1)


### Load image type data from CSV

df = pandas.read_csv("xxx.csv") <br>
datas0 = datafile.iloc[:,0].values &emsp; shape:(10000, 784)<br>
datas1 = datafile.iloc[:,1:].values &emsp; shape: (10000, )<br>
<br>
ToTensor: Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] <br>
to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] <br>

**DataLoader: (batch_size, channel, H, W) --> torch.Size([2, 1, 28, 28])**

In [7]:
# FashionMNIST
# https://www.kaggle.com/zalando-research/fashionmnist
import torch, pandas
import numpy as np
import torchvision.transforms as ttf

class FMDataset(torch.utils.data.Dataset):
    def __init__(self, datafile, transform=None):
        self.transform = transform
        self.images = datafile.iloc[:,1:].values.astype(np.uint8) # from 0-255
        # print(self.images.shape) # (10000, 784)
        self.labels = datafile.iloc[:, 0].values
        # print(self.labels.shape) # (10000,)
        
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        # len(self.images[idx]) = 784
        image = self.images[idx]
        label = int(self.labels[idx])
        if self.transform is not None:
            # numpy image: H x W x C
            image = image.reshape(28, 28, 1)
            # ToTensor: Converts a PIL Image or numpy.ndarray (H x W x C) in the range [0, 255] 
            # to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 1.0] 
            image = self.transform(image)
        else:
            # torch image: C x H x W
            image = image.reshape(1, 28, 28)
            image = torch.tensor(image/255., dtype=torch.float)
        label = torch.tensor(label, dtype=torch.long)
        return image, label

# csv {label: 1, pixel1: 0, pixel2: 0, ...}
test_df = pandas.read_csv("fashion-mnist_test.csv")
test_data = FMDataset(test_df, ttf.ToTensor())
test_loader = torch.utils.data.DataLoader(test_data, batch_size=2, shuffle=False)

print(next(iter(test_loader))[0].shape)
# torch.Size([2, 1, 28, 28])
# (batch_size, channel, H, W)

torch.Size([2, 1, 28, 28])


## Dataset (Segmentation)

In [2]:
import torch, cv2

class SegmentationDataset(torch.utils.data.Dataset):
	def __init__(self, imagePaths, maskPaths, transforms):
		# store the image and mask filepaths, and augmentation
		# transforms
		self.imagePaths = imagePaths
		self.maskPaths = maskPaths
		self.transforms = transforms
  
	def __len__(self):
		return len(self.imagePaths)

	def __getitem__(self, idx):
		# load the image from disk, swap its channels from BGR to RGB,
		# and read the associated mask from disk in grayscale mode
		image = cv2.imread(self.imagePaths[idx])
		image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
		mask = cv2.imread(self.maskPaths[idx], 0)

		if self.transforms is not None:
			image = self.transforms(image)
			mask = self.transforms(mask)

		return (image, mask)

## torchvision.datasets

In [None]:
from torchvision import datasets
import torchvision
from torch.utils.data import DataLoader
import torchvision.transforms as ttf

# torchvision.datasets.FashionMNIST
train_data = datasets.FashionMNIST(root="data", train=True, download=False, transform=ttf.ToTensor())
test_data = datasets.FashionMNIST(root="data", train=False, download=False, transform=ttf.ToTensor())

train_loader = DataLoader(train_data, batch_size=128, shuffle=True, num_workers=2, drop_last=True)
test_loader = DataLoader(test_data, batch_size=128, shuffle=False, num_workers=2)

# torchvision.datasets.CIFAR10
train_set = torchvision.datasets.CIFAR10(root='/content/drive/MyDrive/deep_learning/data', train=True, download=False, transform=ttf.ToTensor())
train_loader = torch.utils.data.DataLoader(train_set, batch_size=128, shuffle=True, num_workers=2)


# torchvision.datasets.ImageNet(root: str, split: str = 'train', **kwargs: Any)
imagenet_data = torchvision.datasets.ImageNet('path/to/imagenet_root/')
data_loader = torch.utils.data.DataLoader(imagenet_data,
                                          batch_size=4,
                                          shuffle=True,
                                          num_workers=2)

# We have loaded that dataset into the DataLoader and can iterate through the dataset as needed. 
# Each iteration below returns a batch of train_features and train_labels (containing batch_size=64 features and labels respectively).

## torchvision.datasets.ImageFolder

In [None]:
import torch
import torchvision
import torchvision.transforms as ttf

train_data = torchvision.datasets.ImageFolder(train_path, transform=ttf.ToTensor())
val_data = torchvision.datasets.ImageFolder(val_path, transform=ttf.ToTensor())

train_loader = torch.utils.data.DataLoader(train_data, batch_size=128, num_workers=4, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=128, num_worker=4, shuffle=False)

In [None]:
train_transforms = [ttf.ToTensor(), ttf.RandomHorizontalFlip(), ttf.RandomAffine(degrees=(-15, 15), scale=(0.98, 1.03)), ttf.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]
val_transforms = [ttf.ToTensor(), ttf.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))]

# root/dog/xxx.png
# root/dog/xxy.png
# root/dog/[...]/xxz.png

# root/cat/123.png
# root/cat/nsdf3.png
# root/cat/[...]/asd932_.png

# Model<br>
torch.nn.Module<br>
torch.nn.Sequential<br>
torchvision.models.resnet18<br>

## torch.nn.Module (LeNet) <br>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Model(nn.Module):

    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
        x = torch.flatten(self.num_flat_features(x), start_dim=1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


model = Model()
model = model.cuda()
print(model)

## torch.nn.Sequential

In [None]:
import torch
import torch.nn as nn

model = nn.Sequential(
    nn.Conv2d(3, 32, 3),
    nn.BatchNorm2d(32),
    nn.ReLU(),
    nn.MaxPool2d(2,2),
    
    nn.Conv2d(32, 64, 3),
    nn.BatchNorm2d(64),
    nn.ReLU(),
    nn.MaxPool2d(2,2),
    # print the size of the output --> output.size() / output.shape
    # to find out the input size of fc 
    
    # flatten from 2nd dimesion (batch)
    nn.Flatten(),
    nn.Linear(64 * 6 * 6, 4096),
    nn.ReLU(),
    nn.Dropout(0.2),
    nn.Linear(4096, 10)
)

model = model.cuda()
print(model)

## torchvision.models.resnet18 <br>
model = torchvision.models.resnet18() <br>
model.fc = nn.Linear(512, out_feats) <br>

In [10]:
from torchvision import models
import torch.nn as nn

model = models.resnet18(pretrained=False)
# print(model)

# modify input channels
model.conv1 = nn.Conv2d(1, 64, kernel_size=(7,7), stride=(2,2), padding=(3,3), bias=False)

# (fc): Linear(in_features=512, out_features=1000, bias=True)
# model.fc = nn.Linear(512, out_features)
model.fc = nn.Linear(512, 2)

# Loss Function

In [None]:
criterion = torch.nn.CrossEntropyLoss()

criterion = torch.nn.BCELoss(weight=None, size_average=None, reduce=None, reduction='mean')
criterion = torch.nn.L1Loss(size_average=None, reduce=None, reduction='mean')
criterion = torch.nn.MSELoss(size_average=None, reduce=None, reduction='mean')
criterion = torch.nn.SmoothL1Loss(size_average=None, reduce=None, reduction='mean', beta=1.0)

## BCE-Dice Loss

In [None]:
class DiceBCELoss(nn.Module):
    def __init__(self, weight=None, size_average=True):
        super(DiceBCELoss, self).__init__()

    def forward(self, inputs, targets, smooth=1):
        inputs = F.sigmoid(inputs)       
        inputs = inputs.view(-1)
        targets = targets.view(-1)
        intersection = (inputs * targets).sum()                     
        dice_loss = 1 - (2.*intersection + smooth)/(inputs.sum() + targets.sum() + smooth)  
        BCE = F.binary_cross_entropy(inputs, targets, reduction='mean')
        Dice_BCE = BCE + dice_loss
        
        return Dice_BCE

## CrossEntrophy Loss

The CrossEntropy function, in PyTorch, expects the output from your model to be of the shape - **[batch, num_classes, H, W]** (pass this directly to your loss function)<br>
and the ground truth to be of shape **[batch, H, W]** where H, W in your case is 256, 256.<br>
Also please make sure the ground truth is of type long by calling .long() on the tensor<br>

# Train & Eval

In [None]:
def train():
    
    # model = Model()
    # model = model.cuda()
    # train_data = MyDataset()
    # train_loader = DataLoader(train_data, 128)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.98)
    epochs = 5
    
    for epoch in range(epochs):
        model.train()
        total_loss = 0
        for data, label in train_loader:
            data, label = data.cuda(), label.cuda()
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(label, output)
            loss.backward()
            optimizer.step()
            total_loss += float(loss)  # or loss.item() detached from cuda and tensor
        
        scheduler.step()
        train_loss = total_loss / len(train_loader)
        print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))

In [None]:
def val(epoch):       
    model.eval()
    total_loss = 0
    num_correct = 0
    with torch.no_grad():
        for data, label in val_loader:
            data, label = data.cuda(), label.cuda()
            output = model(data)
            loss = criterion(output, label)
            total_loss += loss.item()
            preds = torch.argmax(output, 1)
            num_correct += int((preds == label.data).sum())
    
    val_loss = total_loss / len(val_loader)
    acc = 100 * num_correct / len(val_set)
    print('Epoch: {}  Val Loss: {}  Acc: {}'.format(epoch, val_loss, acc))

# Optimizer & Scheduler


In [None]:
# torch.optim.ASGD
# torch.optim.Adadelta
# torch.optim.Adagrad
# torch.optim.Adam
# torch.optim.AdamW
# torch.optim.Adamax
# torch.optim.LBFGS
# torch.optim.RMSprop
# torch.optim.Rprop
# torch.optim.SGD
# torch.optim.SparseAdam

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98)
# scheduler.step()

optimizer = torch.optim.Adam(model.parameters(), lr=2.0e-3, weight_decay=5e-6)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, factor=0.7, patience=2)
# scheduler.step(val_loss)