In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import tifffile as tiff
import cv2
import os
from tqdm.notebook import tqdm
import zipfile
import rasterio
from rasterio.windows import Window
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
import gc
import glob
import segmentation_models_pytorch as smp

from albumentations import (Compose, Normalize)
from albumentations.pytorch import ToTensorV2

import warnings
warnings.filterwarnings("ignore")

sz = 512
reduce = 2
DATA = 'input/test/'
df_sample = pd.read_csv('input/sample_submission.csv')

def enc2mask(encs, shape):
    img = np.zeros(shape[0]*shape[1], dtype=np.uint8)
    for m,enc in enumerate(encs):
        if isinstance(enc,np.float) and np.isnan(enc): continue
        s = enc.split()
        for i in range(len(s)//2):
            start = int(s[2*i]) - 1
            length = int(s[2*i+1])
            img[start:start+length] = 1 + m
    return img.reshape(shape).T

def mask2enc(mask, n=1):
    pixels = mask.T.flatten()
    encs = []
    for i in range(1,n+1):
        p = (pixels == i).astype(np.int8)
        if p.sum() == 0: encs.append(np.nan)
        else:
            p = np.concatenate([[0], p, [0]])
            runs = np.where(p[1:] != p[:-1])[0] + 1
            runs[1::2] -= runs[::2]
            encs.append(' '.join(str(x) for x in runs))
    return encs

def rle_encode_less_memory(img):
    pixels = img.T.flatten()
    pixels[0] = 0
    pixels[-1] = 0
    runs = np.where(pixels[1:] != pixels[:-1])[0] + 2
    runs[1::2] -= runs[::2]
    return ' '.join(str(x) for x in runs)

s_th = 40
p_th = 1000 * (sz//256) ** 2
identity = rasterio.Affine(1, 0, 0, 0, 1, 0)

device = 'cuda'

mean = np.array([0.63759809, 0.4716141, 0.68231112])
std = np.array([0.16475244, 0.22850685, 0.14593643])

def img2tensor(img, dtype:np.dtype=np.float32):
    if img.ndim == 2:
        img = np.expand_dims(img, 2)
    img = np.transpose(img, (2, 0, 1))
    return torch.from_numpy(img.astype(dtype, copy=False))

class HubMAPDataset(Dataset):
    def __init__(self, idx, sz=sz, reduce=reduce):
        self.data = rasterio.open(os.path.join(DATA, idx+'.tiff'), transform=identity, num_threads='all_cpus')
        if self.data.count != 3:
            subdatasets = self.data.subdatasets
            self.layers = []
            if len(subdatasets) > 0:
                for i, subdataset in enumerate(subdatasets, 0):
                    self.layers.append(rasterio.open(subdataset))
        self.shape = self.data.shape
        self.reduce = reduce
        self.sz = reduce * sz #1024
        self.pad0 = (self.sz - self.shape[0]%self.sz) % self.sz # x方向pad
        self.pad1 = (self.sz - self.shape[1]%self.sz) % self.sz # y方向pad
        self.n0max = (self.shape[0] + self.pad0) // self.sz # x反向最多多少个patch
        self.n1max = (self.shape[1] + self.pad1) // self.sz # y
        
    def __len__(self):
        return self.n0max * self.n1max
    
    def __getitem__(self, idx):
        n0, n1 = idx//self.n1max, idx%self.n1max
        x0, y0 = -self.pad0//2 + n0*self.sz, -self.pad1//2 + n1*self.sz
        p00, p01 = max(0, x0), min(x0+self.sz, self.shape[0])
        p10, p11 = max(0, y0), min(y0+self.sz, self.shape[1])
        img = np.zeros((self.sz, self.sz, 3), np.uint8)

        if self.data.count == 3:
            img[(p00-x0):(p01-x0),(p10-y0):(p11-y0)] = np.moveaxis(self.data.read([1,2,3],
                window=Window.from_slices((p00,p01),(p10,p11))), 0, -1)
        else:
            for i,layer in enumerate(self.layers):
                img[(p00-x0):(p01-x0),(p10-y0):(p11-y0),i] =\
                  layer.read(1,window=Window.from_slices((p00,p01),(p10,p11)))
        
        if self.reduce != 1:
            img = cv2.resize(img,(self.sz//reduce,self.sz//reduce),
                             interpolation = cv2.INTER_AREA)

        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        h, s, v = cv2.split(hsv)

        if (s > s_th).sum() <= p_th or img.sum() <= p_th:
            return img2tensor((img / 255.0 - mean) / std), -1
        else:
            return img2tensor((img / 255.0 - mean) / std), idx

class Model_pred:
    def __init__(self, net512s:None, dl, tta:bool=False, half:bool=False):
        self.net512s = net512s
        self.net256s = net256s
        self.dl = dl
        self.tta = tta
        self.half = half
        
    def __iter__(self):
        count=0
        with torch.no_grad():
            for x, y in iter(self.dl):
                if ((y >= 0).sum() > 0): #exclude empty images
                    x = x[y >= 0].to(device)
                    y = y[y >= 0]

                    py = None                    
                    for model in self.net512s:
                        p = model(x)
                        p = torch.sigmoid(p).detach()
                        if py is None: py = p.detach()
                        else: py += p.detach()      
                    py /= len(self.net512s)
                    
                    py = F.upsample(py, scale_factor=2, mode="bilinear")
                    py = py.permute(0,2,3,1).float().cpu()
                    
                    batch_size = len(py)
                    for i in range(batch_size):
                        yield py[i],y[i]
                        count += 1
                    
    def __len__(self):
        return len(self.dl.dataset)

def inference(ds, test_loader, nets):
    mask = torch.zeros(len(ds), ds.sz, ds.sz, dtype=torch.int8)
    with torch.no_grad():
        for x, y in test_loader:
            if ((y >= 0).sum() > 0): #exclude empty images
                x = x[y >= 0].to(device)
                y = y[y >= 0]

                py = None                    
                for model in nets:
                    p = model(x)
                    p = torch.sigmoid(p).detach()
                    if py is None:
                        py = p.detach()
                    else:
                        py += p.detach()      
                py /= len(nets)
                
                py = F.upsample(py, scale_factor=2, mode="bilinear")
                py = py.permute(0, 2, 3, 1).float().cpu()
            
                valid_mask = len(py)
                for i in range(valid_mask):
                    mask[y[i]] = (py[i].squeeze(-1) >= 0.39)
    return mask

net512s = []
for model in glob.glob('checkpoint/*512*.pth'):
    net = smp.Unet(encoder_name='timm-efficientnet-b4', encoder_weights=None, classes=1, activation=None)
    net = torch.nn.DataParallel(net)
    net = net.cuda()
    checkpoint = torch.load(model)
    net.load_state_dict(checkpoint['net'])
    net.eval()
    net512s.append(net)

net256s = []
for model in glob.glob('checkpoint/*fpn*.pth'):
    net = smp.FPN(encoder_name='timm-efficientnet-b4', encoder_weights=None, classes=1, activation=None)
    checkpoint = torch.load(model)
    net.load_state_dict(checkpoint['net'])
    net = net.cuda()
    net.eval()
    net256s.append(net)

def submission_generator():
    names, preds = [], []
    for idx, row in tqdm(df_sample.iterrows(), total=len(df_sample)):
        ds = HubMAPDataset(idx=row['id'], sz=512, reduce=2)
        test_loader = DataLoader(ds, batch_size=16, shuffle=False, pin_memory=True, num_workers=0)
        mp = Model_pred(net512s, dl=test_loader)
        
        mask = torch.zeros(len(ds), ds.sz, ds.sz, dtype=torch.int8)
        for p, i in iter(mp):
            mask[i.item()] = p.squeeze(-1) >= 0.35
        
        mask = mask.view(ds.n0max, ds.n1max, ds.sz, ds.sz).\
            permute(0, 2, 1, 3).reshape(ds.n0max * ds.sz, ds.n1max * ds.sz)
        mask = mask[ds.pad0 // 2 : -(ds.pad0 - ds.pad0 // 2)\
                        if ds.pad0 > 0 else ds.n0max*ds.sz,
                    ds.pad1//2:-(ds.pad1-ds.pad1//2)\
                        if ds.pad1 > 0 else ds.n1max*ds.sz]
        
        rle = rle_encode_less_memory(mask.numpy())

        names.append(row['id'])
        preds.append(rle)
        del mask, ds, test_loader
        gc.collect()

    df = pd.DataFrame({'id': names, 'predicted': preds})
    df.to_csv('submission.csv',index=False)
    return df

def submission():
    names, preds = [], []
    for idx, row in tqdm(df_sample.iterrows(), total=len(df_sample)):
        ds = HubMAPDataset(idx=row['id'], sz=512, reduce=2)
        test_loader = DataLoader(ds, batch_size=16, shuffle=False, pin_memory=True, num_workers=0)
        
        mask = inference(ds, test_loader, net512s)
        mask = mask.view(ds.n0max, ds.n1max, ds.sz, ds.sz).\
            permute(0, 2, 1, 3).reshape(ds.n0max * ds.sz, ds.n1max * ds.sz)
        mask = mask[ds.pad0 // 2 : -(ds.pad0 - ds.pad0 // 2)\
                        if ds.pad0 > 0 else ds.n0max*ds.sz,
                    ds.pad1//2:-(ds.pad1-ds.pad1//2)\
                        if ds.pad1 > 0 else ds.n1max*ds.sz]
        
        rle = rle_encode_less_memory(mask.numpy())

        names.append(row['id'])
        preds.append(rle)
        del mask, ds, test_loader
        gc.collect()

    df = pd.DataFrame({'id': names, 'predicted': preds})
    df.to_csv('submission.csv',index=False)
    return df

# df = submission_generator()
df = submission()
df

HBox(children=(FloatProgress(value=0.0, max=5.0), HTML(value='')))




RuntimeError: The size of tensor a (1024) must match the size of tensor b (2048) at non-singleton dimension 3

In [1]:
import pandas as pd

df = pd.read_csv('../submission.csv')
df

Unnamed: 0,id,predicted
0,2ec3f1bb9,60738309 24 60762290 39 60786274 48 60810258 5...
1,3589adb90,68600092 16 68600120 34 68629517 75 68658947 8...
2,d488c759a,535323817 19 535370474 26 535417130 36 5354637...
3,aa05346ff,52764540 19 52795254 32 52825970 41 52856687 4...
4,57512b7f1,328886086 2 328919318 29 328952554 40 32898579...
