In [3]:
# Based on kernel
# https://www.kaggle.com/ekhtiar/finding-pneumo-part-1-eda-and-unet 

import torch # base
import numpy as np
from pathlib import Path # look in folders
import pandas as pd
import pydicom # open dicom images
from glob import glob
from tqdm import tqdm_notebook as tqdm

from skimage import exposure
import pydicom
from pytorch_tools import utils

ModuleNotFoundError: No module named 'skimage'

In [None]:
# import pytorch_tools as pt # core functionality
import utils # competition specific functions
from utils import rle_decode
# Some notebooks magic
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from pytorch_tools import models

In [None]:
arch = "vgg19_bn"
model = models.__dict__[arch](pretrained=None, encoder=True).eval()

In [None]:
tensor = torch.Tensor(1, 3, 224,224)

In [None]:
for name, module in model.features._modules.items():
#     print(type(name))

In [None]:
output[4]

In [None]:
model.features

In [None]:
tensor_features.

In [None]:
vgg2.features._modules.items()

In [None]:
## Initial setup
# !pip install kaggle
# !kaggle datasets download -d jesperdramsch/siim-acr-pneumothorax-segmentation-data

In [None]:
# Data
data = Path("./siim-acr-pneumothorax-segmentation-data")
train_rle_path = data / "train-rle.csv"
train_data_folder = data / "dicom-images-train"
test_data_folder = data / "dicom-images-test"
train_files_path = [x.as_posix() for x in sorted(train_data_folder.glob("./*/*/*"))]
test_files_path = [x.as_posix() for x in sorted(test_data_folder.glob("./*/*/*"))]
print(f"Train dataset length {len(train_files_path)}, test dataset length {len(test_data_path)}")

In [None]:
# load rles
rles_df = pd.read_csv(train_rle_path)
# the second column has a space at the start, so manually giving column name
rles_df.columns = ['ImageId', 'EncodedPixels']

In [None]:
# parse train DICOM dataset
train_metadata_df = pd.DataFrame()
train_metadata_list = []
for file_path in tqdm(train_files_path):
    dicom_data = pydicom.dcmread(file_path)
    train_metadata = my_utils.dicom_to_dict(dicom_data, file_path, rles_df)
    train_metadata_list.append(train_metadata)
train_metadata_df = pd.DataFrame(train_metadata_list)

test_metadata_list = []
for file_path in tqdm(test_files_path):
    dicom_data = pydicom.dcmread(file_path)
    test_metadata = my_utils.dicom_to_dict(dicom_data, file_path, rles_df, encoded_pixels=False)
    test_metadata_list.append(test_metadata)
test_metadata_df = pd.DataFrame(test_metadata_list)

In [None]:
import matplotlib.pyplot as plt
from matplotlib import patches as patches

In [None]:
num_img = 3
subplot_count = 0
fig, ax = plt.subplots(nrows=1, sharey=True, ncols=num_img, figsize=(num_img*12,12))
for index, row in train_metadata_df.sample(n=num_img).iterrows():
    dataset = pydicom.dcmread(row['file_path'])
    ax[subplot_count].imshow(dataset.pixel_array, cmap=plt.cm.bone)
    # label the x-ray with information about the patient
    ax[subplot_count].text(0,0,'Age:{}, Sex: {}, Pneumothorax: {}'.format(row['patient_age'],row['patient_sex'],row['has_pneumothorax']),
                           size=26,color='white', backgroundcolor='black')
    subplot_count += 1

In [None]:
def rle_decode(rle_str, shape=(1024, 1024), fill_value=1, dtype=np.float32):
    s = rle_str.strip().split(" ")
    starts, lengths = np.array([np.asarray(x, dtype=int) for x in (s[0:][::2], s[1:][::2])])
    mask = np.zeros(np.prod(shape), dtype=dtype)
    start = 0
    for index, length in zip(starts, lengths):
        start = start+index
        end = start+length
        mask[start: end] = fill_value
        start = end
    mask = mask.reshape(shape[::-1])
    return mask  

In [None]:
def bounding_box(img):
    # return max and min of a mask to draw bounding box
    rows = np.any(img, axis=1)
    cols = np.any(img, axis=0)
    rmin, rmax = np.where(rows)[0][[0, -1]]
    cmin, cmax = np.where(cols)[0][[0, -1]]

    return rmin, rmax, cmin, cmax

def plot_with_mask_and_bbox(file_path, mask_encoded_list, figsize=(20,10)):
    
    import cv2
    
    """Plot Chest Xray image with mask(annotation or label) and without mask.

    Args:
        file_path (str): file path of the dicom data.
        mask_encoded (numpy.ndarray): Pandas dataframe of the RLE.
        
    Returns:
        plots the image with and without mask.
    """
    
    pixel_array = pydicom.dcmread(file_path).pixel_array
    print(np.max(pixel_array))
    
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(16, 16))
    clahe_pixel_array = clahe.apply(pixel_array)
    adapteq_pixel_array = exposure.equalize_adapthist(pixel_array, clip_limit=0.03)
    
    # use the masking function to decode RLE
    mask_decoded_list = [my_utils.rle_decode(mask_encoded, (1024, 1024)).T for mask_encoded in mask_encoded_list]
    
    fig, ax = plt.subplots(nrows=1, ncols=3, sharey=True, figsize=(20,10))
    
    # print out the xray
    ax[0].imshow(pixel_array, cmap=plt.cm.bone)
    # print the bounding box
    for mask_decoded in mask_decoded_list:
        # print out the annotated area
        ax[0].imshow(mask_decoded, alpha=0.3, cmap="Reds")
        rmin, rmax, cmin, cmax = bounding_box(mask_decoded)
        bbox = patches.Rectangle((cmin,rmin),cmax-cmin,rmax-rmin,linewidth=1,edgecolor='r',facecolor='none')
        ax[0].add_patch(bbox)
    ax[0].set_title('With Mask')
    
    # plot image with clahe processing with just bounding box and no mask
    ax[1].imshow(clahe_pixel_array, cmap=plt.cm.bone)
    for mask_decoded in mask_decoded_list:
        rmin, rmax, cmin, cmax = bounding_box(mask_decoded)
        bbox = patches.Rectangle((cmin,rmin),cmax-cmin,rmax-rmin,linewidth=1,edgecolor='r',facecolor='none')
        ax[1].add_patch(bbox)
    ax[1].set_title('Without Mask - Clahe')
    
    # plot plain xray with just bounding box and no mask
    ax[2].imshow(adapteq_pixel_array, cmap=plt.cm.bone)
    for mask_decoded in mask_decoded_list:
        rmin, rmax, cmin, cmax = bounding_box(mask_decoded)
        bbox = patches.Rectangle((cmin,rmin),cmax-cmin,rmax-rmin,linewidth=1,edgecolor='r',facecolor='none')
        ax[2].add_patch(bbox)
    ax[2].set_title('Without Mask - Adapteq')
    plt.show()

In [None]:
# lets take 3 random samples of x-rays with 
train_metadata_sample = train_metadata_df[train_metadata_df['has_pneumothorax']==1].sample(n=1)
# plot ten xrays with and without mask
for index, row in train_metadata_sample.iterrows():
    file_path = row['file_path']
    mask_encoded_list = row['encoded_pixels_list']
    print('image id: ' + row['id'])
    plot_with_mask_and_bbox(file_path, mask_encoded_list)

In [None]:
train_metadata_sample = train_metadata_df[train_metadata_df['has_pneumothorax']==1].sample(n=1)
index, row = next(train_metadata_sample.iterrows())
mask_encoded_list = row['encoded_pixels_list']

In [None]:
def show_dcm_info(dataset):
    print("Filename.........:", file_path)
    print("Storage type.....:", dataset.SOPClassUID)
    print()

    pat_name = dataset.PatientName
    display_name = pat_name.family_name + ", " + pat_name.given_name
    print("Patient's name......:", display_name)
    print("Patient id..........:", dataset.PatientID)
    print("Patient's Age.......:", dataset.PatientAge)
    print("Patient's Sex.......:", dataset.PatientSex)
    print("Modality............:", dataset.Modality)
    print("Body Part Examined..:", dataset.BodyPartExamined)
    print("View Position.......:", dataset.ViewPosition)
    
    if 'PixelData' in dataset:
        rows = int(dataset.Rows)
        cols = int(dataset.Columns)
        print("Image size.......: {rows:d} x {cols:d}, {size:d} bytes".format(
            rows=rows, cols=cols, size=len(dataset.PixelData)))
        if 'PixelSpacing' in dataset:
            print("Pixel spacing....:", dataset.PixelSpacing)

def plot_pixel_array(dataset, figsize=(10,10)):
    plt.figure(figsize=figsize)
    plt.imshow(dataset.pixel_array, cmap=plt.cm.bone)
    plt.show()