In [14]:
import torch
import torchvision
from PIL import Image
import torchvision.transforms as T
import torch.nn as nn
import math
import torchvision.transforms.functional as TF
from torchvision.utils import save_image

In [15]:
def crop_sc(tensor, target_tensor): 
#crop the skip connection to size of upsample target 
#tensor -> upsample
#tensor -> skip connection
    tensor = TF.resize(tensor, size=target_tensor.shape[2:]) 
    return tensor

'''
The double convolution function gets as input the number of 
input channels and output channels. For the first layer this 
will be 1 and 64 because the input image is black and white.
Note that if you had an RGB image the input channels would be 3.
'''

'''
ReLU is an activation function, F(x) = max(0,x) 
if x =< 0 then f(x) = 0 
if x > 0 then f(x) = x
negatives are mapped to 0 therefore It is not a linear function ofc, 
so introduces non linearity which provides better understanding of the 
complex features

sigmoid maps activations between 0,1 where when the network is large 
causes vanishing gradients, ReLU solves the vanishing gradients problem
'''

def double_conv(in_ch, out_ch):
#apply 2 convolution operations sequantially        
    conv = nn.Sequential(
        nn.Conv2d(in_ch, out_ch, 3),
        nn.ReLU(inplace = True), 
            
        nn.Conv2d(out_ch, out_ch, 3),
        nn.ReLU(inplace = True)
    )
    return conv

'''
Defining a U-Net class that implements the nn.Module which is 
used as a base class for all neural network modules in PyTorch. 
It provides a standard interface for creating and managing the 
parameters of a module, as well as methods for forwarding input 
through the module.
'''

class UNet(nn.Module):
    
    '''
Input is an RGB image, output is a segmentation mask consisting of 3 colors
Background: black, Obj1: green, Obj2: blue 
out_channels depends on the number of classes you have in your output mask. 
Features are the number of filters to be applied at each layer. 
Number of filters are up to you and to the need of your application. 
It is common that deeper layers have higher amount of filters bc they extract 
more meaningful features. 
    '''

    def __init__(self, in_channels = 3, out_channels = 3, features = [64,128,256,512,1024]):
        
        super(UNet, self).__init__()
        self.max_pool = nn.MaxPool2d(kernel_size = 2, stride = 2)
        
        #Using ModuleList to store Convolutions to be applied at each layer.
        
        self.downs = nn.ModuleList()
        self.ups = nn.ModuleList()
        
        '''
        Save double convolution operations in the downs ModuleList
        downs[0]: double_conv(3,64) 
        downs[1]: double_conv(64,128)
        downs[2]: double_conv(128,256) 
        downs[3]: double_conv(256,512)
        downs[4]: double_conv(512,1024)
        '''
        for feature in features:
            self.downs.append(double_conv(in_channels,feature))
            in_channels = feature
  
            
        '''
        Save transpose convolution operations in the ups ModuleList
        after each transpose convolution there is a double convolution applied
        ups[0]: ConvTranspose2d(1024,512) 
        ups[1]: double_conv(1024,512)
        ups[2]: ConvTranspose2d(512,256) 
        ups[3]: double_conv(512,256)
        ups[4]: ConvTranspose2d(256,128)
        ups[5]: double_conv(256,128) 
        ups[6]: ConvTranspose2d(128,64)
        ups[7]: double_conv(128,64) 
        '''               
        for feature in reversed(features[1:]):
            self.ups.append(nn.ConvTranspose2d(feature,feature//2,2,2))
            self.ups.append(double_conv(feature,feature//2))
            
        
        #Bottleneck is the last layer before starting the expanding path
        #no max pooling is applied after bottleneck, 
        #so store the operations that will be applied seperately

        self.bottleneck = double_conv(features[-1]//2,features[-1]) #512, 1024
        self.out = nn.Conv2d(features[0],2,3) #64,2,3

    def forward(self, x):
        skip_connections = []  
        print("x",x.size())
        for down in self.downs[:-1]:
            x = down(x)
            print("before max pool",x.size())
            skip_connections.append(x) #we will use this in concatination with up parts
            x = self.max_pool(x)
            
            
        x = self.bottleneck(x) #apply the doubleconv at the bottleneck step, bottleneck ile basla
        print("convolved bottleneck: ",x.size())
        
        skip_connections = skip_connections[::-1] #to reverse the list
        
        for idx in range(0, len(self.ups) , 2):
            print(idx)
            print(self.ups[idx])
            x = self.ups[idx](x) # apply transpose conv, start with bottleneck 
            s_con = skip_connections[idx//2] #bc idx = 0,2,4,6,8.. get skip connection, skip connection is the last layer form contracting path
            print("sizes", x.size(),s_con.size())
            #s_con is 64x64x512, 136x136x256, 
            y = crop_sc(s_con,x) #crop the skip connectio 64 needs to be resized to 56
            
                #concat
            concat = torch.cat([x,y] ,1) #concat bottleneck 512x56x56 with cropped 64 from contracting layer (56x56x512)
            ##element wise addition output is 56x56x512
            print('doubleconv id',idx+1)
            x = self.ups[idx+1](concat) #apply double conv
            #double convs are stored in indexes 1,3,5
                   
        return self.out(x)


In [16]:
import torch
import torchvision
import torchvision.transforms as T
from PIL import Image

#transform = T.ToPILImage()

In [17]:
#model gets as input a single sample -sample image converted to torch tensor- outputs another image 

if __name__ == "__main__":
    image = torch.rand((3,3,572,572))
    model = UNet()
    print(model(image))
    output = model(image)
    model.forward


x torch.Size([3, 3, 572, 572])
before max pool torch.Size([3, 64, 568, 568])
before max pool torch.Size([3, 128, 280, 280])
before max pool torch.Size([3, 256, 136, 136])
before max pool torch.Size([3, 512, 64, 64])
convolved bottleneck:  torch.Size([3, 1024, 28, 28])
0
ConvTranspose2d(1024, 512, kernel_size=(2, 2), stride=(2, 2))
sizes torch.Size([3, 512, 56, 56]) torch.Size([3, 512, 64, 64])
doubleconv id 1
2
ConvTranspose2d(512, 256, kernel_size=(2, 2), stride=(2, 2))
sizes torch.Size([3, 256, 104, 104]) torch.Size([3, 256, 136, 136])
doubleconv id 3
4
ConvTranspose2d(256, 128, kernel_size=(2, 2), stride=(2, 2))
sizes torch.Size([3, 128, 200, 200]) torch.Size([3, 128, 280, 280])
doubleconv id 5
6
ConvTranspose2d(128, 64, kernel_size=(2, 2), stride=(2, 2))
sizes torch.Size([3, 64, 392, 392]) torch.Size([3, 64, 568, 568])
doubleconv id 7
tensor([[[[-1.9879e-03, -2.7690e-03, -1.5430e-03,  ..., -1.0176e-03,
           -1.2969e-03, -3.3652e-03],
          [-3.9479e-04, -6.4619e-04, -3.64

## Data Set 


In [12]:
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as transforms
from imageio import imread
import os

class DT_DataSet(Dataset):
    #when an object of class DT_DataSet is created the attributes given would be masks and images folder names
    def __init__(self,masks_path,images_path,transform = None):
        self.masks_path = masks_path
        self.images_path = images_path
        self.transform = transform
        
        #load the whole dataset, holds name of each single image in the dataset
        self.masks = sorted(os.listdir(self.masks_path))
        self.images = sorted(os.listdir(self.images_path))
    
    def __len__(self):
        return len(self.masks)
        
    def __getitem__(self,index):
        #loads single data sample in the dataset, dataloader uses __getitem__ to get a single pair of (image,mask)
        #combines multiple of these together to form a batch
    
        #combine image name with the folder name to create a path for the image(onesingle image in the dataset not the whole set)
        mask_filename,image_filename = os.path.join(self.masks_path, self.masks[index]),os.path.join(self.images_path, self.images[index])
        
        #read the image as an array
        mask, image = imread(mask_filename), imread(image_filename)
     
        if self.transform != None:
            mask, image = self.transform(mask),self.transform(image)
            
        return mask, image
#give folder name iterate through each file in the folder
#dataset object holds the entire dataset
#train data will be augmented images, validation data will be training images
val_mask_path = r"C:\Users\derbent.z\Desktop\Training attempts\training_0302\mask train set"
val_image_path = r"C:\Users\derbent.z\Desktop\Training attempts\training_0302\train set"
dataset = DT_DataSet(val_mask_path, val_image_path,transforms.ToTensor())
#dataloader loads this dataset in batched for the training of the network
dataloader = DataLoader(dataset, batch_size=32, shuffle=True)