In [None]:
### please specify your input path here
PROJECT_FOLDER = "YOUR_PROJECT_FOLDER" # parent folder of the input images
IMAGE_DATA_FOLDER = PROJECT_FOLDER + "images/" # folder of the input images
INPUT_TEST_CSV_FILE = "YOUR_TEST_FILE" # csv file list locations / paths to test cases (dicom)
OUTPUT_FILE = "YOUR_OUTPUT_FILE" # output file name

In [1]:
# !pip install -q ./for-pydicom/pylibjpeg-1.4.0-py3-none-any.whl
# !pip install -q ./for-pydicom/python_gdcm-3.0.14-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
# !pip install -q ./for-pydicom/pylibjpeg_libjpeg-1.3.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl

In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
%load_ext autoreload
%autoreload 2

In [3]:
import sys
sys.path.append("./segmentation-models-pytorch/segmentation_models.pytorch-master")
sys.path.append('./timm-pytorch-image-models/pytorch-image-models-master')
sys.path.append("./pretrainedmodels/pretrainedmodels-0.7.4")
sys.path.append("./efficientnet-pytorch/EfficientNet-PyTorch-master")

In [4]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import os
from glob import glob
import time
import sys
import ast
import math
import gc
gc.enable()
import copy

import warnings
warnings.filterwarnings("ignore")

import pydicom
import cv2
from PIL import Image, ImageEnhance

import matplotlib as mpl
import matplotlib.pyplot as plt

mpl.rcParams['figure.figsize'] = 12, 8

import albumentations as A
from albumentations.pytorch import ToTensorV2
from skimage import img_as_ubyte

from sklearn.metrics import *
from sklearn.model_selection import *

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
import timm
import segmentation_models_pytorch as smp

from skimage.measure import label, regionprops

if torch.cuda.is_available(): device = 'cuda'
else: device = 'cpu'

In [5]:
from datasets.dicomDatasets import LoadDicoms

In [6]:
class SagSegModel(pl.LightningModule):
    def __init__(self):
        super(SagSegModel, self).__init__()
        #tf_efficientnet_b0_ns resnest50d_4s2x40d seresnext50_32x4d tf_efficientnetv2_m_in21ft1k
        self.feature_extractor = smp.Unet('tu-tf_efficientnet_b1_ns', in_channels=1, classes=8, encoder_weights=None)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, inp):
        masks = self.feature_extractor(inp)
        return masks
    
def SagInference(models, image, L):
    with torch.no_grad():
        img = cv2.resize(image, (256, 256)).astype(np.float32) / np.max(image)
        
        outputs = []
        for model in models:
            output = model.sigmoid(model(torch.as_tensor(img).unsqueeze(0).unsqueeze(0).to(device)))[0].detach().cpu().numpy().transpose(1, 2, 0)
            output = cv2.resize(output, (image.shape[1], L)) #because odd and even numbers and // usage, this line will have to be written better and fixed, you get that
            outputs.append(output)
        
        output = np.mean(outputs, 0)
        
        output[output>0.3] = 1
        output[output<0.3] = 0
        
        preds = []
        for _ in output:
            classes = np.sum(_, 0)
            if np.any(classes):
                preds.append(np.argmax(classes)+1)
            else:
                preds.append(100)
                
    return preds

In [7]:
folders = ['./try2-seg-b1v10-sagview-full/',]# './try7-seresnext50-v7-full/'] #['./try7-b1-v8-full/', './try7-seresnext50-v7-full/']
model_funcs = [SagSegModel,]
sag_models = []
for model_func, folder in zip(model_funcs, folders):
    for file in sorted(glob(folder+"/*"), key=lambda x: x.split('/')[-1]):
        sag_model = SagSegModel()
        sag_model.eval()
        sag_model.to(device)
        st = torch.load(f"{file}", map_location=device)['state_dict']
        sag_model.load_state_dict(st, strict=False)
        sag_models.append(copy.deepcopy(sag_model))

sag_models = sag_models#[:1]

len(sag_models)

5

In [8]:
class BoneSegModel(pl.LightningModule):
    def __init__(self):
        super(BoneSegModel, self).__init__()
        #tf_efficientnet_b0_ns resnest50d_4s2x40d seresnext50_32x4d tf_efficientnetv2_m_in21ft1k
        self.feature_extractor = smp.Unet('tu-tf_efficientnet_b1_ns', in_channels=3, classes=8, encoder_weights=None)
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, inp):
        masks = self.feature_extractor(inp)
        return masks
    
def BoneInference(models, images, sz, bs):
    with torch.no_grad():
        
        images = nn.functional.interpolate(torch.as_tensor(images).unsqueeze(1), (sz, sz))
        
        images = torch.cat([images]*3, 1)
        
        outputs = []
        
        N = images.shape[0]/bs
        if not str(N).endswith('.0'): N += 1
        N = int(N)
        
        OUTS = []
        for i in range(N):
            outs = []
            for model in models:
                with torch.no_grad():
                    inputs = images[i*bs:(i+1)*bs].to(device)
                    
                    inputs = inputs.float() / 255
                    
                    outputs = model(inputs)
                    outputs = model.sigmoid(outputs)
                    outputs = outputs.detach().cpu().numpy().transpose(0, 2, 3, 1)
                    
                    #print(outputs.shape)
                    
                    outs.append(outputs)
                    
            outs = np.stack(outs)
            
            #print(outs.shape)
            
            outs = np.mean(outs, 0)
            
            outs[outs>0.5] = 1
            outs[outs<=0.5] = 0
            
            outs = outs.astype(np.uint8)
            
            #print(outs.shape)
            
            outs = np.stack(outs)
            OUTS.extend(outs)
    
    OUTS = np.stack(OUTS)
    
    return OUTS

In [9]:
folders = ['./try2-seg-b1v1-full/',]# './try7-seresnext50-v7-full/'] #['./try7-b1-v8-full/', './try7-seresnext50-v7-full/']
model_funcs = [BoneSegModel,]
bone_models = []
for model_func, folder in zip(model_funcs, folders):
    for file in sorted(glob(folder+"/*"), key=lambda x: x.split('/')[-1]):
        bone_model = BoneSegModel()
        bone_model.eval()
        bone_model.to(device)
        st = torch.load(f"{file}", map_location=device)['state_dict']
        bone_model.load_state_dict(st, strict=False)
        bone_models.append(copy.deepcopy(bone_model))

bone_models = bone_models#[:1]

len(bone_models)

5

In [10]:
class Model(pl.LightningModule):
    def __init__(self):
        super(Model, self).__init__()
        self.feature_extractor = timm.models.tf_efficientnet_b5_ns(in_chans=3, pretrained=False, num_classes=0, global_pool='')
        
        f = self.feature_extractor.num_features
        
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Linear(f, 1)
        self.flatten = nn.Flatten()
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, inp):
        features = self.feature_extractor(inp)
        features = self.avgpool(features)
        features = self.flatten(features)
        logits = self.classifier(features)
        
        return logits, features
    
class Model2(pl.LightningModule):
    def __init__(self):
        super(Model2, self).__init__()
        self.feature_extractor = timm.models.seresnext50_32x4d(in_chans=3, pretrained=False, num_classes=0, global_pool='')
        
        f = self.feature_extractor.num_features
        
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Linear(f, 1)
        self.flatten = nn.Flatten()
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, inp):
        features = self.feature_extractor(inp)
        features = self.avgpool(features)
        features = self.flatten(features)
        logits = self.classifier(features)
        
        return logits, features

class Model3(pl.LightningModule):
    def __init__(self):
        super(Model3, self).__init__()
        self.feature_extractor = timm.models.tf_efficientnetv2_s_in21k(in_chans=3, pretrained=False, num_classes=0, global_pool='')
        
        f = self.feature_extractor.num_features
        
        self.avgpool = nn.AdaptiveAvgPool2d(1)
        self.classifier = nn.Linear(f, 1)
        self.flatten = nn.Flatten()
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, inp):
        features = self.feature_extractor(inp)
        features = self.avgpool(features)
        features = self.flatten(features)
        logits = self.classifier(features)
        
        return logits, features
    
def CLSInference(models, images, bs):
    with torch.no_grad():
        outputs = []
        
        N = images.shape[0]/bs
        if not str(N).endswith('.0'): N += 1
        N = int(N)
        
        OUTS = []
        FEATS = []
        for i in range(N):
            outs = []
            feats = []
            for model in models:
                with torch.no_grad():
                    inputs = images[i*bs:(i+1)*bs].to(device)
                    
                    inputs = inputs.float() / 255
                    
                    outputs, features = model(inputs)
                    outputs = model.sigmoid(outputs)
                    outputs = outputs.detach().cpu().numpy()
                    
                    features = features.detach().cpu().numpy()
                    
                    feats.append(features)
                    
                    outs.append(outputs)
                    
            outs = np.stack(outs)
            feats = np.stack(feats)
            
            #print(outs.shape)
            
            outs = np.mean(outs, 0)
            feats = np.mean(feats, 0)
            
            OUTS.extend(outs)
            FEATS.extend(feats)
    
    OUTS = np.stack(OUTS)
    FEATS = np.stack(FEATS)
    
    return OUTS, FEATS

In [11]:
folders = ['./try17-b5-v5-t4-pseudo-round1/',]# './v2s-v6-full-plus-v6-full-tuned']# './try7-seresnext50-v7-full/'] #['./try7-b1-v8-full/',]
model_funcs = [Model,]
cls_models = []
for model_func, folder in tqdm(zip(model_funcs, folders)):
    for file in sorted(glob(folder+"/*"), key=lambda x: x.split('/')[-1]):
        cls_model = model_func()
        cls_model.eval()
        cls_model.to(device)
        st = torch.load(f"{file}", map_location=device)#['state_dict']
        cls_model.load_state_dict(st, )#strict=False)
        cls_models.append(copy.deepcopy(cls_model))

cls_models = cls_models#[1:2]

len(cls_models)

1it [00:03,  3.11s/it]


5

In [12]:
class Attention(nn.Module):
    def __init__(self, feature_dim, step_dim, bias=True, **kwargs):
        super(Attention, self).__init__(**kwargs)
        
        self.supports_masking = True

        self.bias = bias
        self.feature_dim = feature_dim
        self.step_dim = step_dim
        self.features_dim = 0
        
        weight = torch.zeros(feature_dim, 1)
        nn.init.xavier_uniform_(weight)
        self.weight = nn.Parameter(weight)
        
        if bias:
            self.b = nn.Parameter(torch.zeros(step_dim))
        
    def forward(self, x, mask=None):
        feature_dim = self.feature_dim
        step_dim = self.step_dim

        eij = torch.mm(
            x.contiguous().view(-1, feature_dim), 
            self.weight
        ).view(-1, step_dim)
        
        if self.bias:
            eij = eij + self.b
            
        eij = torch.tanh(eij)
        a = torch.exp(eij)
        
        if mask is not None:
            a = a * mask

        a = a / torch.sum(a, 1, keepdim=True) + 1e-10

        weighted_input = x * torch.unsqueeze(a, -1)
        return torch.sum(weighted_input, 1)
    
class SeqModel(nn.Module):
    def __init__(self, seq_dim=64):
        super(SeqModel, self).__init__()
        
        base = 2048
        m = 1
        
        self.lstm1 = nn.GRU(base*m, 512*m, bidirectional=True, batch_first=True)
        self.lstm2 = nn.GRU(512*2*m, 512*m, bidirectional=True, batch_first=True)
        
        self.attention1 = Attention(512 * m * 2, seq_dim)
        #self.attention2 = UFOAttention(d_model=512*m*2, d_k=512*m*2, d_v=512*m*2, h=8)
        
        self.conv1 = nn.Conv1d(seq_dim, 1, 1)
        
        self.lstm_bn1 = nn.BatchNorm1d(seq_dim)
        self.lstm_bn2 = nn.BatchNorm1d(seq_dim)
        
        self.att_bn1 = nn.BatchNorm1d(512*2*m)
        
        self.conv_bn1 = nn.BatchNorm1d(1)
        
        self.clf = nn.Linear(512*2*m*2, 1)
        
        self.flatten = nn.Flatten()
        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()
        self.avgpool = nn.AdaptiveAvgPool1d(1)
        
        self.dropout = nn.Dropout(0.2)
        
        self.final_classifier = nn.Linear(2048*7, 8)
        
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, inp):
        x, _ = self.lstm1(inp)
        x = self.tanh(x)
        
        #x = self.dropout(x)
        
        x = self.lstm_bn1(x)
        
        x, _ = self.lstm2(x)
        x = self.tanh(x)
        
        x = self.lstm_bn2(x)
        x = self.relu(x)
        
        #x = self.dropout(x)
        
        x_conv = self.conv1(x)
        x_conv = x_conv[:, 0]
        
        #x, _ = self.lstm2(x)
        #print(x.shape)
        
        #max_pool, _ = torch.max(x, 1)
        #max_pool = self.avgpool(x.transpose(1, 2))[:, :, 0]
        #print(max_pool.shape)
        
        #x = self.attention2(x, x, x)
        att_pool = self.attention1(x, mask=None)
        #print(att_pool.shape)
        #print(att_pool.shape)
        
        x = att_pool#torch.cat([max_pool, att_pool], -1)
        
        x = self.att_bn1(x)
        x = self.relu(x)
        
        x = torch.cat([x, x_conv], -1)
        
        x = self.dropout(x)
        
        #logits = self.clf(x)
        
        features = x.reshape(x.shape[0]//7, 7, 2048)
        features = nn.Flatten(1, 2)(features)
        
        logits = self.final_classifier(features)
        
        return logits

In [13]:
folders = ['./b5-v5-t4-pseudo-round1-seq-v2/',]# './v2s-v6-full-plus-v6-full-tuned']# './try7-seresnext50-v7-full/'] #['./try7-b1-v8-full/',]
model_funcs = [SeqModel]
dims = [64,]
seq_models = []
for model_func, folder, d in tqdm(zip(model_funcs, folders, dims)):
    for file in sorted(glob(folder+"/*"), key=lambda x: x.split('/')[-1]):
        seq_model = model_func(d)
        seq_model.eval()
        seq_model.to(device)
        st = torch.load(f"{file}", map_location=device)#['state_dict']
        seq_model.load_state_dict(st, )#strict=False)
        seq_models.append(copy.deepcopy(seq_model))

seq_models = seq_models#[1:2]

len(seq_models)

1it [00:00,  1.82it/s]


5

In [14]:
test_df = pd.read_csv(INPUT_TEST_CSV_FILE)

In [16]:
import re
import traceback

In [None]:
row_ids = []
fractured = []

augs = A.Compose([
    #A.Resize(CFG.SZ_H, CFG.SZ_W),
    A.LongestMaxSize(1024),
    A.PadIfNeeded(1024, 1024, border_mode=0, p=1),
    ToTensorV2()
])

means = [0.4760, 0.0723, 0.1412, 0.0362, 0.0535, 0.0802, 0.1372, 0.1947]

for index, row in tqdm(test_df.iterrows(), total=test_df.shape[0]):
    study_id = row['StudyInstanceUID']
    path = row['image_folder']
    files = glob(path + "/*")
    files.sort(key=lambda x: int(re.findall(r'(\d+).d', x)[0]))

    try:
    #if 1:
        dl = DataLoader(LoadDicoms(files), batch_size=128, num_workers=4, shuffle=False)

        #FAST LOAD ALL IMAGES

        images = []
        z_pos = []
        for batch in dl:
            images.extend(batch[0])
            z_pos.extend(batch[1])
        images = torch.stack(images).numpy()
        z_pos = torch.stack(z_pos).numpy()

        z_inter = np.sort(z_pos)[3] - np.sort(z_pos)[2]

        images = images[np.argsort(-z_pos)]

        #SAG VIEW, SELECTS IMPORTANT ONLY IMAGES AND THEIR KEYS

        sag = images[:, :, images.shape[-1]//2]

        keys = np.array(SagInference(sag_models, sag, images.shape[0]))
        selec_idxs = np.where(np.logical_and(keys!=100, keys!=8))

        images = images[selec_idxs]
        keys = keys[selec_idxs]

        #GET MASKS, AND ONLY THOSE WHICH HAVE MASKS

        masks = BoneInference(bone_models, images, 256, 32)

        masks = np.max(masks, -1)

        selec_idxs = [i for i, m in enumerate(masks) if np.max(m)]

        masks = masks[selec_idxs]
        images = images[selec_idxs]
        keys = keys[selec_idxs]

        #PREPROCESSING FOR IMAGES, INCLUDES ROI, AND 1:-1 TO MAKE SURE ALL 2.5D IS CORRECT, DID SAME WITH KEYS FOR MAKING SURE ITS GOOD FOR THE UPCOMING STEP

        inputs = []
        for i in range(1, len(images)-1):
            image_curr = images[i]
            image_last = images[i-1]
            image_next = images[i+1]

            image = np.stack([image_last, image_curr, image_next], -1)

            mask = masks[i]

            try:
                ymin, ymax = np.min(np.where(mask)[0])/mask.shape[1], np.max(np.where(mask)[0])/mask.shape[1]
                xmin, xmax = np.min(np.where(mask)[1])/mask.shape[0], np.max(np.where(mask)[1])/mask.shape[0]

                xmin = xmin * 0.95
                ymin = ymin * 0.95
                xmax = xmax * 1.05
                ymax = ymax * 1.05

                image = image[int(ymin*image.shape[0]):int(ymax*image.shape[0]), int(xmin*image.shape[1]):int(xmax*image.shape[1])]
            except:
                pass

            #print(image.shape)

            image = augs(image=image)['image']

            inputs.append(image)

        images = torch.stack(inputs)

        keys = keys[1:-1]

        #FEED RESIZE IMAGES TO MODELS TO GET FEATURES AND PREDS
        inputs = nn.functional.interpolate(images, (456, 456))
        preds, features = CLSInference(cls_models[:5], inputs, 32)

        #MAP PREDS TO BONES AND DO POST
        bone_features = []
        dim = 64
        
        for bone in range(1, 8):
            features_ = np.zeros((dim, 2048))
            
            if np.sum(keys==bone):
                feats = features[keys==bone]
                features_[:min(len(feats), dim)] = feats[:min(len(feats), dim)]

            bone_features.append(features_)


        
        bone_features = torch.as_tensor(np.stack(bone_features)).float().to(device)
        sps = []
        for seq_model in seq_models[:5]:
            sp = seq_model.sigmoid(seq_model(bone_features[:, :])[0]).detach().cpu().numpy()
            sps.append(sp)
        sp = np.mean(sps, 0)
        
        for _ in range(1, 8):
            row_ids.append(f"{study_id}_C{_}")
            fractured.append(sp[_])
        
        row_ids.append(f"{study_id}_patient_overall")
        fractured.append(sp[0])

        import gc
        gc.enable()

        del preds, features, images, inputs, masks

        gc.collect()
    
    #'''
    except Exception as error:
        print(error)
        print(traceback.format_exc())
        L = len(fractured)
        if L%8:
            row_ids = row_ids[:L - (L%8)]
            #fractured = fractured[:L - (L%8)]
        
        for bone in range(1, 8):
            row_ids.append(f"{study_id}_C{bone}")
        
        row_ids.append(f"{study_id}_patient_overall")
        
        fractured.extend(means[1:][L%8:])
        fractured.append(means[0])
    #'''
    # break

  2%|▏         | 92/5161 [13:31<12:05:08,  8.58s/it]

In [None]:
sub = pd.DataFrame({'row_id': row_ids, 'fractured': fractured})
sub.fractured = sub.fractured.clip(0.001, 0.999)
#sub.fractured = sub.fractured.clip(0.1, 0.9)
sub.head(8)

In [None]:
sub.to_csv(OUTPUT_FILE, index=False)