In [1]:
import pytorch_lightning as pl
import cv2 as cv
import pydicom as dicom
from pydicom.pixel_data_handlers import apply_voi_lut
import pandas as pd
import torch
import os
from collections import defaultdict
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms, models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import copy
import numpy as np
from PIL import Image,ImageOps
import matplotlib.pyplot as plt
import sys
from tqdm.notebook import tqdm, trange
import time

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
print(device)

cuda:0


In [3]:
train_dir = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train"
train_labels_dir = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv"
test_dir = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/test"
png_train = "./png/train"
png_test = "./png/test"
batch_size = 4

In [4]:
def ensure_dir(file_path):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)

In [5]:
def dicom2png(path,v):
    dcm_file = dicom.read_file(path)
#     print(np.mean(dcm_file.pixel_array)!=0)
    data = apply_voi_lut(dcm_file.pixel_array, dcm_file)
    if np.max(data) - np.min(data) != 0:
        if dcm_file.PhotometricInterpretation == "MONOCHROME1":
            data = np.amax(data) - data
        data = data - np.min(data)
        data = data / np.max(data)
        data = (data * 255).astype(np.uint8)
        im = Image.fromarray(data)
        to_dir = os.path.abspath(png_train)
        head,tail = os.path.split(path)
        file_name = tail
        ls = []
        head,tail = os.path.split(head)
        ls.append(tail)
        head,tail = os.path.split(head)
        ls.append(tail)
#         print(tail)
        ls.reverse()
        for loc in ls:
            to_dir = os.path.join(to_dir,loc)
        to_dir = os.path.abspath(png_train)
        to_dir = os.path.join(to_dir,v)
        os.makedirs(to_dir, exist_ok = True)
        file_name = os.path.splitext(file_name)[0]
        file_name += '.png'
        to_dir = os.path.join(to_dir,file_name)
        im.save(to_dir)

In [6]:
train_labels=pd.read_csv(train_labels_dir)
train_labels['BraTS21ID'] = train_labels['BraTS21ID'].apply(lambda x: str(x).zfill(5))
train_labels = train_labels.set_index('BraTS21ID')
train_labels = train_labels.drop(index = '00109')
train_labels = train_labels.drop(index = '00123')
train_labels = train_labels.drop(index = '00709')
train_patients = train_labels.index.values.tolist()
test_patients = os.listdir(test_dir)
print(len(train_labels))
# print(train_labels)
print('-----------------------')
# lab = train_labels.to_numpy()
# print(train_patients)
lab = train_labels.values.tolist()
print(type(lab),type(train_patients))
print(len(lab),len(train_patients))
# print(lab)
# for x,y in train_patients:
#     if(x == 00000):
#         print(y)

582
-----------------------
<class 'list'> <class 'list'>
582 582


In [7]:
for patient in tqdm(train_patients):
    id = train_patients.index(patient)
    v = lab[id]
    m=str(v)
#     print(v)
    path = os.path.join(train_dir, patient,'FLAIR')
#     print(path)
    for dcm_file in os.listdir(path):
        file_path = os.path.join(path,dcm_file)
        dicom2png(file_path,m)
print("Over")

  0%|          | 0/582 [00:00<?, ?it/s]

Over


In [8]:
classes=['1','0']
num_classes=2
batch_size=4

In [9]:
import torchvision
from torchvision import transforms

dataset = torchvision.datasets.ImageFolder(root="./png/train",transform=transforms.Compose([
                                                            transforms.ToTensor(),
                                                            transforms.Resize([224,224]),
                                                            ]))

In [10]:
print(len(dataset))
trainloader=torch.utils.data.DataLoader(dataset,batch_size=batch_size,shuffle=True,generator=torch.Generator().manual_seed(42))

999


In [11]:
dataiter=iter(trainloader)
images,labels=dataiter.next()

In [12]:
class RN(nn.Module):
    def __init__(self,learning_rate=0.001):
        super().__init__()
        self.learning_rate = learning_rate
        backbone = models.resnet152(pretrained=False)
        result = backbone.load_state_dict(torch.load("../input/pytorch-pretrained-models/resnet152-b121ed2d.pth"))
        print(result)
        num_ftrs=backbone.fc.in_features
        layers = list(backbone.children())[:-1]
        self.feature_extractor = nn.Sequential(*layers)
        fc=[]
        fc.extend([nn.Linear(in_features=num_ftrs,out_features=128),
           nn.ReLU(),
           nn.Dropout(),
           nn.Linear(in_features=128,out_features=64),
           nn.ReLU(),
           nn.Dropout(),
           nn.Linear(in_features=64,out_features=1)
          ])
        self.classifier=nn.Sequential(*fc)
        for param in self.feature_extractor.parameters():
            param.requires_grad=False
    def forward(self, x):
        with torch.no_grad():
            representations = self.feature_extractor(x).flatten(1)
        x=self.classifier(representations)
        return x;

In [13]:
rn=RN().to(device)

<All keys matched successfully>


In [14]:
def train(net,dataloader,epochs=15):
    loss_fn=nn.BCEWithLogitsLoss().to(device)
    running_loss = 0.0
    opt=optim.Adam(params=net.parameters())
    for epoch in range(epochs):
        print(f'Running Epoch: {epoch+1}')
        for i,data in enumerate(dataloader,0):
            inputs,labels=data
            inputs,labels=inputs.to(device),labels.to(device)
            opt.zero_grad()
            outputs=net(inputs)
            labels=labels.unsqueeze(-1)
            labels = labels.type_as(outputs)
    #         print(outputs)
            loss=loss_fn(outputs,labels)
            loss.backward()
            opt.step()
            running_loss += loss.item()
            if i % 200 == 199:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                    (epoch + 1, i + 1, running_loss / 200))
                running_loss = 0.0
            torch.cuda.empty_cache()
            del inputs,labels,outputs
    print('Finished Training')

In [15]:
for param in rn.parameters():
    if param.requires_grad:
        print(param.shape)

torch.Size([128, 2048])
torch.Size([128])
torch.Size([64, 128])
torch.Size([64])
torch.Size([1, 64])
torch.Size([1])


In [16]:
train(rn,trainloader,20)

Running Epoch: 1
[1,   200] loss: 0.711
Running Epoch: 2
[2,   200] loss: 0.850
Running Epoch: 3
[3,   200] loss: 0.810
Running Epoch: 4
[4,   200] loss: 0.791
Running Epoch: 5
[5,   200] loss: 0.761
Running Epoch: 6
[6,   200] loss: 0.726
Running Epoch: 7
[7,   200] loss: 0.768
Running Epoch: 8
[8,   200] loss: 0.744
Running Epoch: 9
[9,   200] loss: 0.701
Running Epoch: 10
[10,   200] loss: 0.727
Running Epoch: 11
[11,   200] loss: 0.733
Running Epoch: 12
[12,   200] loss: 0.752
Running Epoch: 13
[13,   200] loss: 0.736
Running Epoch: 14
[14,   200] loss: 0.747
Running Epoch: 15
[15,   200] loss: 0.723
Running Epoch: 16
[16,   200] loss: 0.702
Running Epoch: 17
[17,   200] loss: 0.716
Running Epoch: 18
[18,   200] loss: 0.693
Running Epoch: 19
[19,   200] loss: 0.757
Running Epoch: 20
[20,   200] loss: 0.740
Finished Training


In [17]:
def dicom2pngt(path,directory = png_train):
    dcm_file = dicom.read_file(path)
#     print(np.mean(dcm_file.pixel_array)!=0)
    if np.max(dcm_file.pixel_array) - np.min(dcm_file.pixel_array) != 0:
        data = apply_voi_lut(dcm_file.pixel_array, dcm_file)
        if dcm_file.PhotometricInterpretation == "MONOCHROME1":
            data = np.amax(data) - data
        data = data - np.min(data)
        data = data / (np.max(data) - np.min(data))
        data = (data * 255).astype(np.uint8)
        im = Image.fromarray(data)
        to_dir = os.path.abspath(directory)
        head,tail = os.path.split(path)
        file_name = tail
        ls = []
        head,tail = os.path.split(head)
        ls.append(tail)
        head,tail = os.path.split(head)
        ls.append(tail)
        ls.reverse()
        for loc in ls:
            to_dir = os.path.join(to_dir,loc)
        os.makedirs(to_dir, exist_ok = True)
        file_name = os.path.splitext(file_name)[0]
        file_name += '.png'
        to_dir = os.path.join(to_dir,file_name)
        im.save(to_dir)

In [18]:
def predict(dataloader,model):
    y = []
    model.eval()
    for data in dataloader:
        inputs,labels=data
        inputs,labels=inputs.to(device),labels.to(device)
        outputs=model(inputs)
        m = nn.Sigmoid()
        outputs=m(outputs)
        outputs = outputs.flatten()
        for o in outputs:
            y.append(o.item())
    model.train()
    print(len(y))
    y_pred = sum(y)/len(y)
#     print(y_pred)
    return y_pred

In [19]:
for patient in tqdm(test_patients):
#     print(f'patient: {patient}')
    path = os.path.join(test_dir, patient,'FLAIR')
    for dcm_file in os.listdir(path):
        file_path = os.path.join(path,dcm_file)
        dicom2pngt(file_path, directory = png_test)

  0%|          | 0/87 [00:00<?, ?it/s]

In [20]:
y_pred = defaultdict(dict)
for patient in test_patients:
    path = os.path.join(png_test, patient)
#     print(path)
    dataset = torchvision.datasets.ImageFolder(root=path,transform=transforms.Compose([transforms.ToTensor(),transforms.Resize([224,224]),]))
    testloader=torch.utils.data.DataLoader(dataset,batch_size=batch_size,shuffle=False)

    y_pred[patient] = predict(testloader,rn)
    


19
70
25
153
151
21
48
142
51
45
47
45
22
28
149
155
46
48
45
164
47
47
132
145
47
27
138
281
27
118
48
153
144
141
45
48
28
24
308
76
142
72
152
23
147
140
180
66
32
148
46
47
48
152
18
47
160
276
50
72
48
56
150
281
140
21
147
25
47
133
30
50
72
49
25
92
47
143
46
156
51
44
145
50
24
291
47


In [21]:
submission = pd.DataFrame({"BraTS21ID":y_pred.keys(),"MGMT_value":y_pred.values()})
submission.to_csv('submission.csv', index=False)

In [22]:
print(y_pred)

defaultdict(<class 'dict'>, {'00114': 0.3801164842749897, '00013': 0.46057387505258834, '00821': 0.4431061218669765, '00644': 0.37927771578816805, '00699': 0.5240465933518694, '00125': 0.39588590814682467, '00833': 0.4401722702508171, '00762': 0.394255529197169, '00393': 0.4354679277714561, '00287': 0.37845480723513497, '00434': 0.39740701463628325, '00337': 0.32871841804895285, '00145': 0.43009836734695867, '00307': 0.40670484996267725, '00489': 0.5492253803566798, '00749': 0.5198673623223459, '00163': 0.35983009053313214, '00384': 0.36100025148092146, '00438': 0.3730809463395013, '00047': 0.5241883116342673, '00323': 0.3241077201004992, '00428': 0.4840886304353146, '00462': 0.49504850189568417, '00560': 0.3787227122691171, '00422': 0.4488309586935855, '00825': 0.2985411735709921, '00335': 0.47788363068863965, '00082': 0.5976169231311282, '00458': 0.40414852731757694, '01006': 0.4350852170738123, '00573': 0.3591942572190116, '00647': 0.43478795584120783, '00603': 0.44619046834607917, 

In [23]:
submission

Unnamed: 0,BraTS21ID,MGMT_value
0,00114,0.380116
1,00013,0.460574
2,00821,0.443106
3,00644,0.379278
4,00699,0.524047
...,...,...
82,00474,0.463120
83,00174,0.431917
84,00119,0.353262
85,00080,0.412723


In [24]:
import csv
with open("submission.csv", "w") as outfile:
    writer = csv.writer(outfile)
    writer.writerow(["BraTS21ID","MGMT_value"])
    for key, value in y_pred.items():
        writer.writerow([key, value])

In [25]:
!rm -rf ./png

In [26]:
!ls

__notebook__.ipynb  submission.csv
