# class probe

we compare classification results between unet and hc  


In [35]:
import multiprocessing as mp
import sys

import pandas as pd
import torch
import PIL
from PIL import Image
import numpy as np

sys.path.insert(0, '../../data/siim-pneumothorax')
from mask_functions import *

In [2]:
hc_df = pd.read_csv('hc_0_4folds_cls.csv')
unet_df = pd.read_csv('unet_20190716-171659_4folds_cls.csv')


In [13]:
def translate(rle):
    if rle == '-1':
        return 'pos'
    else:
        return 'neg'

# we are using cls probe csv, so -1 means positive prediction
def cls_count(df):
    pos_count = 0
    neg_count = 0
    
    cls_list = list(df['EncodedPixels'])
    for cls in cls_list:
        if cls == '-1':
            pos_count += 1
        else:
            neg_count += 1
            
    return pos_count, neg_count

In [8]:
hc_pos, hc_neg = cls_count(hc_df)
u_pos, u_neg = cls_count(unet_df)

print('hc - pos: {}, neg: {}'.format(hc_pos, hc_neg))
print('unet - pos: {}, neg: {}'.format(u_pos, u_neg))

hc - pos: 202, neg: 1175
unet - pos: 202, neg: 1175


In [16]:
hc_cls_list = list(hc_df['EncodedPixels'])
u_cls_list = list(unet_df['EncodedPixels'])
id_list = list(hc_df['ImageId'])

diff_id_list = []

for i in range(len(hc_df)):
    if hc_cls_list[i] != u_cls_list[i]:
        if hc_cls_list[i] == '1 2' and u_cls_list[i] == '-1':
            print('id: {}, hc: {}, u: {}'.format(id_list[i], translate(hc_cls_list[i]), translate(u_cls_list[i])))


id: 1.2.276.0.7230010.3.1.4.8323329.6967.1517875202.154226, hc: neg, u: pos
id: 1.2.276.0.7230010.3.1.4.8323329.6540.1517875198.820092, hc: neg, u: pos
id: 1.2.276.0.7230010.3.1.4.8323329.6789.1517875201.161293, hc: neg, u: pos
id: 1.2.276.0.7230010.3.1.4.8323329.6473.1517875198.462294, hc: neg, u: pos
id: 1.2.276.0.7230010.3.1.4.8323329.6703.1517875199.611760, hc: neg, u: pos
id: 1.2.276.0.7230010.3.1.4.8323329.615.1517875163.703375, hc: neg, u: pos
id: 1.2.276.0.7230010.3.1.4.8323329.6874.1517875201.664174, hc: neg, u: pos
id: 1.2.276.0.7230010.3.1.4.8323329.6109.1517875196.574624, hc: neg, u: pos
id: 1.2.276.0.7230010.3.1.4.8323329.6721.1517875199.820400, hc: neg, u: pos
id: 1.2.276.0.7230010.3.1.4.8323329.6159.1517875196.803533, hc: neg, u: pos
id: 1.2.276.0.7230010.3.1.4.8323329.6338.1517875197.693438, hc: neg, u: pos
id: 1.2.276.0.7230010.3.1.4.8323329.6602.1517875199.139989, hc: neg, u: pos
id: 1.2.276.0.7230010.3.1.4.8323329.5840.1517875191.649829, hc: neg, u: pos
id: 1.2.276.0

## let's try to directly merge mask

In [18]:
unet_mask_df = pd.read_csv('unet_20190716-171659_4folds.csv')
unet_mask_list = list(unet_mask_df['EncodedPixels'])

merge_mask_list = []

for i, hc_cls in enumerate(hc_cls_list):
    if hc_cls == '1 2':
        merge_mask_list.append('-1')
    else:
        merge_mask_list.append(unet_mask_list[i])


In [19]:
sub_df = pd.DataFrame({'ImageId': id_list, 'EncodedPixels': merge_mask_list})
sub_df.loc[sub_df.EncodedPixels=='', 'EncodedPixels'] = '-1'
sub_df.to_csv('merge.csv', index=False)
sub_df.head()

Unnamed: 0,ImageId,EncodedPixels
0,1.2.276.0.7230010.3.1.4.8323329.6106.151787519...,-1
1,1.2.276.0.7230010.3.1.4.8323329.6588.151787519...,-1
2,1.2.276.0.7230010.3.1.4.8323329.6014.151787519...,-1
3,1.2.276.0.7230010.3.1.4.8323329.6813.151787520...,-1
4,1.2.276.0.7230010.3.1.4.8323329.699.1517875164...,-1


## let's try to merge ptt

In [22]:
hc_best_thr = 0.245
u_best_thr = 0.2625

best_thr = (hc_best_thr + u_best_thr) / 2

hc_ptt = torch.load('hc_ptt.pth')
u_ptt = torch.load('unet_ptt.pth')

ptt = (hc_ptt + u_ptt) / 2


In [23]:
# majority vote - no we don't actually use vote
pt_vote = torch.where(ptt > best_thr, torch.ones_like(ptt), torch.zeros_like(ptt))


In [25]:
# noise removal
sz = 256
noise_th = 75.0*(sz/128.0)**2 #threshold for the number of predicted pixels

pt_vote[pt_vote.view(pt_vote.shape[0],-1).sum(-1) < noise_th,...] = 0.0
pt_vote = pt_vote.numpy()


In [36]:
# Generate rle encodings in parallel (images are first converted to the original size)
mask_size = 1024

def mask_worker(mask):
    im = PIL.Image.fromarray((mask.T*255).astype(np.uint8)).resize((mask_size, mask_size))
    im = np.asarray(im)
    rle = mask2rle(im, mask_size, mask_size)
    
    return rle

pool = mp.Pool()
rles = pool.map(mask_worker, pt_vote)


In [38]:
sub_df = pd.DataFrame({'ImageId': id_list, 'EncodedPixels': rles})
sub_df.loc[sub_df.EncodedPixels=='', 'EncodedPixels'] = '-1'
sub_df.to_csv('merge_feature.csv', index=False)
sub_df.head()


Unnamed: 0,ImageId,EncodedPixels
0,1.2.276.0.7230010.3.1.4.8323329.6106.151787519...,-1
1,1.2.276.0.7230010.3.1.4.8323329.6588.151787519...,-1
2,1.2.276.0.7230010.3.1.4.8323329.6014.151787519...,-1
3,1.2.276.0.7230010.3.1.4.8323329.6813.151787520...,-1
4,1.2.276.0.7230010.3.1.4.8323329.699.1517875164...,-1
