In [1]:
import numpy as np
import pandas as pd
import pydicom
from matplotlib import pyplot as plt
import os
from mask_functions import rle2mask, mask2rle
from google.cloud import storage

ModuleNotFoundError: No module named 'mask_functions'

In [None]:
bucket = storage.Client().get_bucket("pneumothorax_chest_x-rays")

In [None]:
blobs = list(bucket.list_blobs(prefix='test/'))

In [None]:
df = pd.read_csv('./output/sample_submission.csv')
df.head()

In [None]:
import torch
import torch.utils.data
import collections
from tqdm import tqdm
from PIL import Image
import torchvision

class TorchDataset(torch.utils.data.Dataset):
    def __init__(self, blobs, df):
        self.blobs = blobs
        self.df = df
        
        self.image_info = collections.defaultdict(dict)
        
        i = 0
        for blob in tqdm(self.blobs, total= len(self.blobs)):
            file_path = blob.name
            img_id = file_path.split('/')[-1][:-4]
            
            df_temp = self.df.loc[self.df.ImageId == img_id]
            if not df_temp.iloc[0, 0]:
                continue
            
            dcm_path = './testset/{}.dcm'.format(img_id)
            png_path = './testset_png/{}.png'.format(img_id)
                
            # if the dcm is not downloaded, download it.
            if not os.path.exists(dcm_path):
                with open(dcm_path, 'wb') as file_obj:
                    blob.download_to_file(file_obj)  
            # if the png is not created, create it.
            if not os.path.exists(png_path):
                with open(png_path, 'wb') as file_obj:
                    dcm_data = pydicom.dcmread(dcm_path) #, force=True)  
                    img = dcm_data.pixel_array
                    img_mem = Image.fromarray(img)
                    img_mem.save(file_obj)
                
            self.image_info[i]["image_id"] = img_id
            self.image_info[i]["image_path"] = png_path
            i += 1
                
            
    def __getitem__(self, idx):
        # images
        img_path = self.image_info[idx]["image_path"]
        img = Image.open(img_path).convert("RGB")
        tsr = torchvision.transforms.ToTensor()(img)
        
#         # resize
#         tsr = img.resize((256, 256), resample=Image.BILINEAR)
#         tsr = torchvision.transforms.ToTensor()(tsr)

        return img, tsr, self.image_info[idx]["image_id"]
    

    def __len__(self):
        return len(self.image_info)

In [None]:
testset = TorchDataset(blobs, df)

In [None]:
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor

      
def get_instance_segmentation_model(num_classes):
    # load an instance segmentation model pre-trained on COCO
    model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)

    # get the number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    # now get the number of input features for the mask classifier
    in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels
    hidden_layer = 256
    # and replace the mask predictor with a new one
    model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask,
                                                       hidden_layer,
                                                       num_classes)

    return model

In [None]:
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

# our dataset has two classes only - background and person
num_classes = 2

# get the model using our helper function
model = get_instance_segmentation_model(num_classes)

# load the trained parameter
model.load_state_dict(torch.load('./model/model_para.pt'))
# model.load_state_dict(torch.load('./model/model.bin'))

# move model to the right device
model.to(device)

In [None]:
# pick one image from the test set
img, tsr, img_id = testset[0]
# put the model in evaluation mode
model.eval()
with torch.no_grad():
    prediction = model([tsr.to(device)])

In [None]:
prediction

In [None]:
scores = prediction[0]['scores']
scores

In [None]:
scores_binary = (scores >0.3)
scores_binary

In [None]:
scores_binary.all == 0

In [None]:
scores = prediction[0]['scores']
scores_binary = (scores >= 0.3)

# no score/mask or all the scores <0.3
if len(scores) ==0 or scores_binary.all == 0:
    rle = '-1'

else:
    masks = prediction[0]['masks'].mul(255).byte().cpu().numpy()
    mask_pred = np.zeros_like(masks[0, 0]).astype(np.uint64) #change type to prevent overflow
    for i, binary in enumerate(scores_binary):
        if binary ==1:
            mask_pred += masks[i, 0]
    mask_pred = (mask_pred > 127).astype(np.uint8)
    mask_pred = np.where(mask_pred ==0, 0, 255)
    rle = mask2rle(mask_pred.T, 1024, 1024)
    
#     # resize
#     mask_temp = Image.fromarray(mask_pred).resize((1024, 1024), resample=Image.BILINEAR)
#     mask_temp = np.array(mask_temp)
#     rle = mask2rle(mask_temp.T, 1024, 1024)
    
    if rle == '':
        rle = '-1'

In [None]:
rle

In [None]:
np.unique(mask_pred)

In [None]:
plt.figure(figsize=(10,10))
plt.ylabel('MASKED')

plt.imshow(img, cmap=plt.cm.bone)
plt.imshow(mask_pred, alpha=0.3, cmap="Reds")
# plt.imshow(mask_temp, alpha=0.3, cmap="Greens")


In [None]:
# predict on the test set
# put the model in evaluation mode
model.eval()

with torch.no_grad():
    test_list = list()
    
    for i in tqdm(range(len(testset))):
        img, tsr, img_id = testset[i]
        prediction = model([tsr.to(device)])
###
        scores = prediction[0]['scores']
        scores_binary = (scores >= 0.3)

        # no score/mask or all the scores <0.3
        if len(scores) ==0 or scores_binary.all == 0:
            rle = '-1'

        else:
            masks = prediction[0]['masks'].mul(255).byte().cpu().numpy()
            mask_pred = np.zeros_like(masks[0, 0]).astype(np.uint64) #change type to prevent overflow
            for i, binary in enumerate(scores_binary):
                if binary ==1:
                    mask_pred += masks[i, 0]
            mask_pred = (mask_pred > 127).astype(np.uint8)
            mask_pred = np.where(mask_pred ==0, 0, 255)
            rle = mask2rle(mask_pred.T, 1024, 1024)

        #     # resize
        #     mask_temp = Image.fromarray(mask_pred).resize((1024, 1024), resample=Image.BILINEAR)
        #     mask_temp = np.array(mask_temp)
        #     rle = mask2rle(mask_temp.T, 1024, 1024)

            if rle == '':
                rle = '-1'
###
    
        test_list.append([img_id, rle])

In [None]:
sample_order = list(df['ImageId'])

In [None]:
pred_list = [tuple for x in sample_order for tuple in test_list if tuple[0] == x]

In [None]:
df_pred = pd.DataFrame(pred_list, columns = df.columns.values) 

In [None]:
df_pred.to_csv('./output/submission.csv', index=False)