In [4]:
# necessary imports 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from time import time
from matplotlib import style
from PIL import Image

# imports for making models
import torch
import torch.nn as nn
from torchsummary import summary

# taking images as input 
import os
import cv2
device = 'cpu' # will change if there will be a GPU accelerator

### Taking Images Inputs
1. Will specify the paths and directories.
2. Will make the dataset simply.
3. Will make dataloaders.

In [None]:
def dataset(images_dir, mask_dir):
    data = []
    img_list = os.listdir(images_dir)
    for i in range(len(img_list)): # len(img_list)
        if i%100==0:
            print(f"{i} images processed")
        img_path = os.path.join(images_dir, img_list[i])
        mask_path = os.path.join(mask_dir, img_list[i].replace(".jpg", "_mask.jpg"))
        img = Image.open(img_path).convert('RGB')
        img = np.array(img.resize((256, 256))) # have to put both mask and image on same scale
        mask = Image.open(mask_path).convert('L')
        mask = np.array(mask.resize((256, 256))) # have to put both mask and image on same scale
        
        
        '''
        plt.subplot(1, 2, 1)
        plt.imshow(img)
        plt.axis('off')
        plt.subplot(1, 2, 2)
        plt.imshow(mask)
        plt.axis('off')
        plt.show()
        '''
        
        data.append((img, mask))
    
    return data


In [None]:
images_dir = 'C:\\Users\\Ayush\\Desktop\\Let_us_start_once_again\\ComputerVision\\ImageSegmentation\\CARVANA\\train'
mask_dir = 'C:\\Users\\Ayush\\Desktop\\Let_us_start_once_again\\ComputerVision\\ImageSegmentation\\CARVANA\\train_masks'        
data = dataset(images_dir, mask_dir)

In [None]:
def make_dataloaders(dataset, batch_size):
    # i'm ignoring class imabalnce (if any) in randomly genrated train and test sets
    train_data, test_data = torch.utils.data.random_split(dataset, [5000, 88])
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, shuffle=True)
    return train_loader, test_loader

In [None]:
def plot_batch(dataloader, batch_size):
    for _, batch in enumerate(dataloader, 0):
        for i in range(batch_size):
            #style.use('ggplot')
            plt.subplot(1, 2, 1)
            plt.imshow(batch[0][i])
            plt.axis('off')
            plt.subplot(1, 2, 2)
            plt.imshow(batch[1][i], cmap='gray')
            plt.axis('off')
            plt.show()
        break

In [None]:
batch_size = 32
train_loader, test_loader = make_dataloaders(data, batch_size=batch_size)

In [None]:
plot_batch(train_loader, batch_size)

In [None]:
#a[0] = a[0].type(torch.float32)
# remember this every fucking time

### Now forming the UNET model
* first, we will be forming the block 
* Then we will construct the network 

In [76]:
class block(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(block, self).__init__()
        all_layers = []
        all_layers.append(nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, padding=1))
        all_layers.append(nn.BatchNorm2d(out_channels))
        all_layers.append(nn.ReLU())
        
        all_layers.append(nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1))
        all_layers.append(nn.BatchNorm2d(out_channels))
        all_layers.append(nn.ReLU())

        self.model = nn.Sequential(*all_layers)
        
        
    def forward(self, x):
        return self.model(x)
            

In [77]:
temp_block = block(in_channels=3, out_channels=64).to(device)
summary(temp_block, (3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 256, 256]           1,792
       BatchNorm2d-2         [-1, 64, 256, 256]             128
              ReLU-3         [-1, 64, 256, 256]               0
            Conv2d-4         [-1, 64, 256, 256]          36,928
       BatchNorm2d-5         [-1, 64, 256, 256]             128
              ReLU-6         [-1, 64, 256, 256]               0
Total params: 38,976
Trainable params: 38,976
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.75
Forward/backward pass size (MB): 192.00
Params size (MB): 0.15
Estimated Total Size (MB): 192.90
----------------------------------------------------------------


In [84]:
class UNET(nn.Module):
    def __init__(self):
        super(UNET, self).__init__()
        self.pool = nn.MaxPool2d(kernel_size=2)
        # downsampling part
        self.block1 = block(in_channels=3, out_channels=64)
        self.block2 = block(in_channels=64, out_channels=128)
        self.block3 = block(in_channels=128, out_channels=256)
        self.block4 = block(in_channels=256, out_channels=512)
        self.block5 = block(in_channels=512, out_channels=1024)
        # upsampling part
        self.convt1 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=1024, out_channels=512, kernel_size=2, stride=2),
            nn.BatchNorm2d(512),
            nn.ReLU()
        )
        self.block6 = block(in_channels=1024, out_channels=512)
        
        self.convt2 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=512, out_channels=256, kernel_size=2, stride=2),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )
        self.block7 = block(in_channels=512, out_channels=256)
        
        self.convt3 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=256, out_channels=128, kernel_size=2, stride=2),
            nn.BatchNorm2d(128),
            nn.ReLU()
        )
        self.block8 = block(in_channels=256, out_channels=128)
        
        self.convt4 = nn.Sequential(
            nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=2, stride=2),
            nn.BatchNorm2d(64),
            nn.ReLU()
        )
        self.block9 = block(in_channels=128, out_channels=64)
        self.final_layer = nn.Conv2d(in_channels=64, out_channels=1, kernel_size=1)
        
    
    
    def forward(self, x):
        # x - (3, 256, 256)
        # downsampling first
        skip1 = self.block1(x) # (64, 256, 256)
        skip2 = self.block2(self.pool(skip1)) # (128, 128, 128)
        skip3 = self.block3(self.pool(skip2)) # (256, 64, 64)
        skip4 = self.block4(self.pool(skip3)) # (512, 32, 32)
        op = self.block5(self.pool(skip4)) # (1024, 16, 16)
        # upsampling
        op = self.convt1(op) # (512, 32, 32)
        op = self.block6(torch.cat([skip4, op], 1)) # (512, 32, 32)
        
        op = self.convt2(op) # (256, 64, 64)
        op = self.block7(torch.cat([skip3, op], 1)) # (256, 64, 64)
        
        op = self.convt3(op) # (128, 128, 128)
        op = self.block8(torch.cat([skip2, op], 1)) # (128, 128, 128)
        
        op = self.convt4(op) # (64, 256, 256)
        op = self.block9(torch.cat([skip1, op], 1)) # (64, 256, 256)
        
        return self.final_layer(op) # (1, 256, 256)

In [85]:
model = UNET().to(device)
summary(model, (3, 256, 256))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 256, 256]           1,792
       BatchNorm2d-2         [-1, 64, 256, 256]             128
              ReLU-3         [-1, 64, 256, 256]               0
            Conv2d-4         [-1, 64, 256, 256]          36,928
       BatchNorm2d-5         [-1, 64, 256, 256]             128
              ReLU-6         [-1, 64, 256, 256]               0
             block-7         [-1, 64, 256, 256]               0
         MaxPool2d-8         [-1, 64, 128, 128]               0
            Conv2d-9        [-1, 128, 128, 128]          73,856
      BatchNorm2d-10        [-1, 128, 128, 128]             256
             ReLU-11        [-1, 128, 128, 128]               0
           Conv2d-12        [-1, 128, 128, 128]         147,584
      BatchNorm2d-13        [-1, 128, 128, 128]             256
             ReLU-14        [-1, 128, 1

In [None]:
lr = 1e-3
batch_size = 16
epochs = 10
segment = Segment().to(device)
optimizer = optim.Adam(segment.parameters(),lr=lr)
criterion = nn.CrossEntropyLoss()
summary(segment,(3,64,64))

In [None]:
def train(epochs):
    tic = time()
    LOSS = []
    for epoch in range(1,epochs+1):
        total_loss = 0
        for i in range(len(train_loader)):
            # shape of images is (-1,3,112,112)
            images = (train_loader[i][0][0]/255.0).to(device)
            # shape of masks is initially (-1,3,101,101) 
            masks = (train_loader[i][1][0]/1.0).to(device)
            # shape of masks will now be converted to (-1,101,101)
            masks = masks.mean(axis=1)
            masks[masks>0] = 1.0 # this will convert the all elements=3 as category 1
            # forward propoagtaion
            model_output = segment(images)
            model_output = model_output.reshape(-1,64,64)
            loss = criterion(masks,model_output).sum()
            total_loss+= loss.item()
            # backward propoagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if i%100==0:
                print('Epoch [{}] ({}/{}), train_loss = {:.4f}, time = {:.2f} sec'.format(epoch, i, len(train_loader), loss.item(), time() - tic ))       
        print("\n")
        LOSS.append(total_loss/len(train_loader))
    return LOSS
            

In [None]:
LOSS = train(epochs)

In [None]:
# plotting the training loss
plt.plot(range(1, epochs+1), LOSS)
plt.xlabel('Number of epochs')
plt.ylabel('Loss')
plt.title("Loss vs NumEpochs")
plt.show()

In [None]:
# predictions
image = (train_loader[0][0][0][1]/255.0).to(device)
image = image.reshape(1,3,64,64)

model_output = segment(image) # shape is (1,1,64,64)
model_output = model_output.reshape(1,64,64)
model_output[model_output>=0.5] = 1.0
model_output[model_output<0.5] = 0.0
print("model_output", model_output.shape)

mask = (train_loader[0][1][0][1]/1.0).to(device) # shape is (1,3,64,64)
mask = mask.mean(axis=0) # shape is (1,64,64)
mask = mask.reshape(1,64,64)
print("mask",mask.shape)
mask[mask>0] = 1.0

print(torch.abs(mask-model_output).sum().item())

#fig = plt.figure(figsize=(6,2))
image = (image*255).cpu().detach().numpy().reshape(3,64,64)
model_output = model_output.cpu().detach().numpy().reshape(64,64)
mask = mask.cpu().numpy().reshape(64,64)

f, axarr = plt.subplots(nrows=1,ncols=3)
plt.sca(axarr[0]); 
plt.imshow(np.transpose(image,(1,2,0))); plt.title('title 1')
plt.sca(axarr[1]); 
plt.imshow(mask,cmap='gray'); plt.title('title 2')
plt.sca(axarr[2]); 
plt.imshow(model_output,cmap='gray'); plt.title('title 3')
plt.show()

'''#fig.add_subplot(2,3,1)
plt.imshow(np.transpose(image.reshape(3,64,64), (1,2,0)))

#fig.add_subplot(2,3,2)
plt.imshow(mask.reshape(64,64),cmap='gray')

#fig.add_subplot(2,3,3)
plt.imshow(model_output.reshape(64,64),cmap='gray')
plt.axis('off')'''

In [None]:
image = (train_loader[0][0][0][1]/1.0)
image = image.reshape(3,64,64)
plt.imshow(np.transpose(image,(1,2,0)))
plt.show()

In [None]:
#predictions
image = (train_loader[0][0][0][1]/255.0).to(device)
image = image.reshape(1,3,64,64)

model_output = segment(image).reshape(64,64).cpu().detach().numpy()
model_output[model_output>0.5] = 1.0
model_output[model_output<=0.5] = 0.0
model_output.sum()
