In [1]:
%matplotlib inline
import numpy as np 
import pandas as pd 
import pickle
import torch
from matplotlib import pyplot as plt 
import torchvision.models as models
from torchvision import transforms
import torch.nn as nn 
from tqdm import tqdm, tqdm_notebook
import matplotlib.pyplot as plt
import cv2
import os
from IPython.display import clear_output
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from pathlib import Path
from torch.utils.data import Dataset, DataLoader
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [2]:
df = pd.read_csv('../input/severstal-steel-defect-detection/train.csv')
df.head()

In [3]:
df1 = df.pivot(index='ImageId', columns='ClassId', values='EncodedPixels')
df1.reset_index(inplace=True)
d = {'ImageId': df1['ImageId'], '1': df1[1], '2': df1[2], '3': df1[3], '4': df1[4]}
train_df = pd.DataFrame(d)
train_df.fillna('',inplace=True)
train_df['ClassesList'] = (train_df.iloc[:,1:]!='').astype(np.int8).values.tolist()
train_df.head()

In [4]:
d1 = {'ImageId': train_df['ImageId'], 'ClassesList': train_df['ClassesList']}
train_df1 = pd.DataFrame(d1)
train_df1['EncodedPixels'] = train_df1['ClassesList']
for i in range(6666):
    c = []
    v = ['1', '2', '3','4']
    for j in range(4):
        if train_df[v[j]][i] != '':
            c.append(train_df[v[j]][i])
    train_df1['EncodedPixels'][i] = c
train_df1.head(13)

In [5]:
def make_mask( encoded, shape=(1600,256)):
    # Инициализируем закодированные пиксели
    # encoded = df['EncodedPixels'].iloc[idx]
    # Делим на два списка в соответствии с кодировкой
    if isinstance(encoded, str):
        encoded = list(map(int, encoded.split(' ')))
#     print(encoded)
    full,pixel,number = [],[],[]
    [pixel.append(encoded[i]) if i%2==0 else number.append(encoded[i]) for i in range(0, len(encoded))]
    # "Раскрываем" кодировку, получаем индексы закрашенных пикселей
    k=0
    for i in range(len(number)):
        for j in range(number[i]):
            ind = pixel[i]+j
            full.append(ind-1)
        k +=number[i]
    # Создаем массив под готовое изображение    
    mask = np.zeros((1600*256,1), dtype=int)
    # Закрашиваем соответствующие пиксели
    mask[full] = 255
#     for p, n in zip(pixel, number):
#         mask[p:p + n] = 255
    #преобразем к размерам фотографий металла
    res = np.reshape(mask,(1600, 256)).T
    res = Image.fromarray(res.astype(np.uint8))
    
    return res

In [6]:
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
else:
    print('CUDA is available!  Training on GPU ...')

In [7]:
# разные режимы датасета 
DATA_MODES = ['train', 'val', 'test']
# все изображения будут масштабированы к размеру 224x224 px
RESCALE_SIZE_1 = 800
RESCALE_SIZE_2 = 128
# работаем на видеокарте
DEVICE = torch.device("cuda")

In [8]:
class SteelDataset(Dataset):
    def __init__(self, names, df, mode):
        super().__init__()
        self.names = names
        self.mode = mode
        if self.mode != 'test':
            self.df = df

        if self.mode not in DATA_MODES:
            print(f"{self.mode} is not correct; correct modes: {DATA_MODES}")
            raise NameError

        self.len_ = len(self.names)
        
    def __len__(self):
        return self.len_
      
    def load_sample(self, file, mode):
#         image = cv2.imread('../input/severstal-steel-defect-detection/train_images/'+ file, cv2.IMREAD_UNCHANGED)
        if mode == 'test':
            image = Image.open('../input/severstal-steel-defect-detection/test_images/'+ file).convert("RGB")
        else:
            image = Image.open('../input/severstal-steel-defect-detection/train_images/'+ file).convert("RGB")
        image.load()
#         print(np.array(image).shape)
        return image
    
    def __getitem__(self, index):
        transform = transforms.Compose([
            transforms.ToTensor(),
#            transforms.RandomHorizontalFlip(),
#              transforms.Normalize([0.485], [0.229]) 
        ])
        img = self.load_sample(self.names[index], self.mode)
        img = self._prepare_sample(img)
        img = np.array(img)
        max_value = 256 ** ((img.dtype == np.uint16) + 1) - 1
        img = (img / max_value).astype(np.float32)
        img = transform(img)

        if self.mode == 'test':
            return img
        else:
            masks = list(self.df['EncodedPixels'].loc[self.df['ImageId'] == self.names[index]])
            mask = []
            
            for i in range(len(masks[0])):
                mask.append(np.array(self._prepare_sample(make_mask(masks[0][i]))))
            
            num_objs = len(masks[0])
            boxes = []
            for i in range(num_objs):
                pos = np.where(mask[i])
                xmin = np.min(pos[1])
                xmax = np.max(pos[1])
                ymin = np.min(pos[0])
                ymax = np.max(pos[0])
                boxes.append([xmin, ymin, xmax, ymax])
            mask = np.array(mask)
            mask = mask / 255
            mask = torch.as_tensor(mask, dtype=torch.uint8)
            labels = list(self.df['ClassesList'].loc[self.df['ImageId'] == self.names[index]])
            cls = []
            for i in range(4):
                if labels[0][i]!=0:
                    cls.append(i+1)
            label = torch.as_tensor(cls)
                
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            iscrowd = torch.zeros((num_objs,), dtype=torch.uint8)
            area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
            image_id = torch.tensor([index])
            
            target = {}
            target["boxes"] = boxes
            target["labels"] = label
            target["image_id"] = image_id
            target["area"] = area
            target["iscrowd"] = iscrowd
            target["masks"] = mask
            
#             print(img)
#             print(len(target))
#             print("boxes", target["boxes"].shape,target["boxes"].dtype)
#             print("labels", target["labels"].shape, target["labels"].dtype)
#             print("image_id", target["image_id"].shape, target["image_id"].dtype)
#             print("area", target["area"].shape, target["area"].dtype)
#             print("iscrowd", target["iscrowd"].shape,target["iscrowd"].dtype)
#             print("masks", target["masks"].shape, target["masks"].dtype)
#             img = transform(img)
#             target["masks"] = transform(target["masks"])
            return img, target
        
    def _prepare_sample(self, image):
#         print(image.shape)
        image = image.resize((RESCALE_SIZE_1, RESCALE_SIZE_2))
#         image = cv2.resize(src=image, dsize=(image.shape[0], RESCALE_SIZE_2, RESCALE_SIZE_1))
#         print(image.shape)
        return np.array(image)

In [9]:
from sklearn.model_selection import train_test_split
train_val_names = train_df['ImageId']
train_files,val_files = train_test_split(train_val_names, train_size=0.75)
val_dataset = SteelDataset(list(val_files), train_df1,  mode='val')
train_dataset = SteelDataset(list(train_files),train_df1, mode='train')

In [10]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

In [11]:
def load_model():
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 5)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       5)
    return model

In [12]:
def fit_epoch(model, train_loader, criterion, optimizer, batch_size):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    processed_data = 0
#     print(len(train_loader))
    for inputs, labels in train_loader:
#         print(np.array(inputs).shape)
#         inputs = list(input for input in inputs)
#         labels = [{k: v for k, v in t.items()} for t in labels]
        inputs = list((input.float()).to(DEVICE) for input in inputs)
        labels = [{k: v.to(DEVICE) for k, v in t.items()} for t in labels]
        optimizer.zero_grad()
        outputs = model(inputs, labels)
        loss = sum(x for x in outputs.values())
        loss.backward()
        optimizer.step()
#         preds = torch.sigmoid(outputs) 
        running_loss += loss.item() #* inputs.size(0)
#         running_corrects += dice_coef(preds, labels) #написать функцию
#         print(inputs.dtype)
#         print(np.asarray(inputs).dtype)
#         processed_data += np.asarray(input).size(0)
              
#     train_loss = running_loss / processed_data
#     train_acc = running_corrects.cpu().numpy() / processed_data
    train_loss = running_loss / len(train_loader)
    return train_loss
#     return train_loss, train_acc

In [13]:
def eval_epoch(model, val_loader, criterion, batch_size):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    loss = 0
    loss_mask = 0
    for inputs, labels in val_loader:
#         print([len(a) for a in inputs])
#         inputs = list(input for input in inputs)
#         labels = [{k: v for k, v in t.items()} for t in labels]
        inputs = list((input.float()).to(DEVICE) for input in inputs)
        labels = [{k: v.to(DEVICE) for k, v in t.items()} for t in labels]
#         inputs = inputs.to(DEVICE)
#         labels = labels.to(DEVICE)

        with torch.set_grad_enabled(False):
            outputs = model(inputs,labels)
#             print(outputs)
#             print(outputs[0]['scores'])
#             loss_mask = 
            for x in range(len(outputs)):
                loss += sum(loss for loss in outputs[x]['scores'])
#             loss = sum(x for x in outputs.values())
#             loss = bce_dice_loss(labels, outputs)
#             preds = torch.sigmoid(outputs)
        running_loss += loss # * inputs.size(0)
#         running_corrects += dice_coef(preds,labels)  #написать функцию
#         processed_size += np.array(inputs).size(0)
    val_loss = running_loss / len(val_loader)
#     val_loss = running_loss / processed_size
#     val_acc = running_corrects.double() / processed_size
#     return val_loss, val_acc
    # Visualize tools
    clear_output(wait=True)
    
#     print(outputs[0]["masks"].dtype)
#     print(outputs[0][0,0]["masks"])
#     plt.figure(figsize=(10,10))
    plt.figure(figsize=(13, 9))
    for k in range(2): 
        plt.subplot(3, 2, k+1)
        plt.imshow(np.rollaxis(inputs[k].detach().cpu().numpy(), 0, 3), cmap='gray')
        plt.title('Real')
        plt.axis('off')
        
        plt.subplot(3, 2, k+3)
#         print(labels[k]["masks"].detach().cpu().numpy()[0])
#         print(labels[k]["masks"].detach().cpu().numpy()[0].shape)
#         print(len(labels[k]["masks"].detach().cpu().numpy()))
        plt.imshow(np.sum(labels[k]["masks"].detach().cpu().numpy(),0), cmap='gray')
        plt.title('True' +'\n' + str(labels[k]["labels"]))
        plt.axis('off')
        
        if outputs[k]["masks"].detach().cpu().numpy().size != 0:
            plt.subplot(3, 2, k+5)
#             outputs[k]["masks"] = torch.as_tensor(outputs[k]["masks"], dtype=torch.float32)
            plt.imshow(np.sum(outputs[k]["masks"].detach().cpu().numpy(),0).squeeze(), cmap='gray')
            plt.title('Output'+'\n'+str(outputs[k]["labels"]))
            plt.axis('off')
    plt.show()
    return val_loss

In [14]:
def collate_fn(batch):
    return tuple(zip(*batch))

In [15]:
def train(train_files, val_files, model, epochs, batch_size):
    train_loader = DataLoader(train_dataset, batch_size=batch_size,num_workers = 2, shuffle=True,collate_fn = collate_fn)
    val_loader = DataLoader(val_dataset, batch_size=batch_size,num_workers = 2, shuffle=False,collate_fn = collate_fn)
#     print(train_loader)
    history = []
    log_template = "\nEpoch {ep:03d} train_loss: {t_loss:0.4f} \
    val_loss {v_loss:0.4f} train_acc {t_acc:0.4f} val_acc {v_acc:0.4f}"
    criterion = 0
    
    with tqdm(desc="epoch", total=epochs) as pbar_outer:
        params = [p for p in model.parameters() if p.requires_grad]
        opt =  torch.optim.SGD(params, lr=0.0005,
                            momentum=0.9, weight_decay=0.0005)
#         lr_scheduler = torch.optim.lr_scheduler.StepLR(opt,
#                                                    step_size=10,
#                                                    gamma=0.1)
        for epoch in range(epochs):
#         train_loss, train_acc = fit_epoch(model, train_loader, criterion, opt)
            train_loss = fit_epoch(model, train_loader, criterion, opt, batch_size)
#          print("loss", train_loss)
#             lr_scheduler.step()    
#          val_loss, val_acc = eval_epoch(model, val_loader, criterion, batch_size)
            val_loss = eval_epoch(model, val_loader, criterion, batch_size)
#           history.append((train_loss, train_acc, val_loss, val_acc))
            history.append((train_loss, 0, val_loss, 0))
#           print("loss", val_loss)
            pbar_outer.update(1)
            tqdm.write(log_template.format(ep=epoch+1, t_loss=train_loss,\
                                           v_loss=val_loss, t_acc=0, v_acc=0))
            
    return history

In [16]:
model = load_model()
model.to(DEVICE)

In [17]:
history = train(train_dataset, val_dataset, model, epochs = 30, batch_size = 15)

In [18]:
loss, acc, val_loss, val_acc = zip(*history)

In [19]:
plt.figure(figsize=(15, 9))
plt.plot(loss, label="train_loss")
# plt.plot(val_loss.cpu(), label="val_loss")
# plt.legend(loc='best')
plt.xlabel("epochs")
plt.ylabel("loss")
plt.show()

In [20]:
cpu_device = torch.device("cpu")

In [21]:
def predict(model, test_loader,df):
    model.eval()
    j = 0
    for inputs in test_loader:
#         logits = []
        inputs = list((input.float()).to(DEVICE) for input in inputs)
#         with torch.no_grad():
        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
            for i in range(len(inputs)):
                for k in range(len(outputs[i]['masks'])):
                    for n in range(4):
                        if outputs[i]['labels'][k] == n+1:
                            if df['EncodedPixels'][i+j*5][n] == []:
                                df['EncodedPixels'][i+j*5][n].append(outputs[i]['masks'][k].numpy())
                            else:
#                                 print(df['EncodedPixels'][i+j*5][n][0])
                                df['EncodedPixels'][i+j*5][n] = df['EncodedPixels'][i+j*5][n][0]+outputs[i]['masks'][k].numpy()
#                 df['EncodedPixels'][i+j*5].append(outputs[i]['masks'])
                df['ClassId'][i+j*5]= np.unique(outputs[i]['labels'].numpy())
#                 print(df.head())
            if j ==100  or j == 0 or j == 500 or j == 1000:
                print(j)
            j+=1
#             logits.append(outputs)

    return df

In [22]:
def predict_one_random_sample(model, dataset):
    i = int(np.random.uniform(0,500))
    inputs = test_dataset[i]
    plt.figure(figsize=(13, 9))
    plt.imshow(np.rollaxis(inputs.numpy(), 0, 3), cmap='gray')
    plt.title('input')
    plt.axis('off')
    plt.show()
    
    with torch.no_grad():
        inputs = [inputs.to(DEVICE)]
        model.eval()
        outputs = model(inputs)

    plt.figure(figsize=(13, 9))
    plt.imshow(np.sum(outputs[0]['masks'].detach().cpu().numpy(),0).squeeze(), cmap='gray')
    plt.title('Output'+'\n'+str(outputs[0]['labels']))
    plt.axis('off')
    plt.show()

In [23]:
TEST_DIR = Path('../input/severstal-steel-defect-detection/test_images')
test_files = sorted(list(TEST_DIR.rglob('*.jpg')))
for i in range(len(test_files)):
    test_files[i] = str(test_files[i]).replace('../input/severstal-steel-defect-detection/test_images/', '')

test_dataset = SteelDataset(test_files,df, mode="test")
len(test_files)

In [24]:
predict_one_random_sample(model, test_dataset)

In [25]:
test_loader = DataLoader(test_dataset, shuffle=False, batch_size=5)

In [28]:
d = {'ImageId': test_files}
test_df = pd.DataFrame(d)
test_df['EncodedPixels'] = [[[],[],[],[]] for i in range(len(test_files))]
test_df['ClassId'] = [[] for i in range(len(test_files))]

In [29]:
probs = predict(model, test_loader,test_df)

In [33]:
probs.head(10)

In [None]:
probs['EncodedPixels'][2][0]

In [None]:
img = Image.open('../input/severstal-steel-defect-detection/test_images/'+ probs['ImageId'][2]).convert("RGB")
img.load()
img = img.resize((800, 128))
plt.figure(figsize=(13, 9))
plt.imshow(img, cmap='gray')
plt.axis('off')
plt.show()
plt.figure(figsize=(13, 9))
plt.imshow((probs['EncodedPixels'][2][0]+probs['EncodedPixels'][2][2]).squeeze(), cmap='gray')
plt.axis('off')
plt.show()


In [95]:
def from_mask_to_encoding(mask):
#     print(mask.shape)
#     plt.figure(figsize=(13, 9))
#     plt.imshow(mask, cmap='gray')
#     plt.axis('off')
#     plt.show()
#     mask = Image.fromarray(mask.astype(np.uint8))
#     plt.figure(figsize=(13, 9))
#     plt.imshow(mask, cmap='gray')
#     plt.axis('off')
#     plt.show()
#     mask = mask.resize((1600, 256))
#     plt.figure(figsize=(13, 9))
#     plt.imshow(mask, cmap='gray')
#     plt.axis('off')
#     plt.show()
#     mask = np.array(mask)
#     print(mask)
    if mask.shape[0]==128:
        pixels= mask.T.flatten()
    else:
        pixels= mask.T
    pixels = np.concatenate([[0], pixels, [0]])
#     print(pixels)
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 1
    runs[1::2] -= runs[:-1:2]
    return ' '.join(str(x) for x in runs)

In [None]:
# def from_mask_to_encoding(mask):

#     flat = mask.flatten()

#     padded = np.concatenate([[0], flat, [0]])
    
#     runs = np.where(padded[1:] != padded[:-1])[0] 
#     runs += 1
#     runs[1::2] -= runs[0::2]
#     return ' '.join(str(x) for x in runs)

In [100]:
test_files_result = []
for i in range(len(test_files)):
    for j in range(4):
        test_files_result.append(test_files[i])
d1 = {'ImageId': test_files_result}
res_df = pd.DataFrame(d1)
res_df['EncodedPixels'] = ['' for i in range(len(test_files_result))]
res_df['ClassId'] = [0 for i in range(len(test_files_result))]
res_df.head(8)

In [101]:
for i in range(len(test_files)):
    for j in range(4):
#         if probs['EncodedPixels'][i][j] != []:
#             print(probs['EncodedPixels'][i][j][0][0], i,j)
#             print(from_mask_to_encoding(probs['EncodedPixels'][i][j][0][0]))
#             res_df['EncodedPixels'][4*i+j].append(from_mask_to_encoding(probs['EncodedPixels'][i][j][0][0]))
        for k in range(len(probs['ClassId'][i])):
            if probs['ClassId'][i][k] ==j+1:
                res_df['ClassId'][4*i+j] = probs['ClassId'][i][k]
                res_df['EncodedPixels'][4*i+j]= from_mask_to_encoding(probs['EncodedPixels'][i][j][0][0])
res_df.head(8)

In [103]:
for i in range(len(res_df)):
    if res_df['ClassId'][i]==0:
        res_df['ClassId'][i]=''

In [105]:
res_df.to_csv('./simple_cnn_baseline.csv', index=False)