In [1]:
import pytorch_lightning as pl
import cv2 as cv
import pydicom as dicom
from pydicom.pixel_data_handlers import apply_voi_lut
import pandas as pd
import torch
import os
from collections import defaultdict
from torch.utils.data import Dataset,DataLoader
from torchvision import transforms, models
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import copy
import numpy as np
from PIL import Image,ImageOps
import matplotlib.pyplot as plt
import sys
from tqdm.notebook import tqdm, trange
import time

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
print(device)

cuda:0


In [3]:
train_dir = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train"
train_labels_dir = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/train_labels.csv"
test_dir = "../input/rsna-miccai-brain-tumor-radiogenomic-classification/test"
png_train = "./png/train"
png_test = "./png/test"
batch_size = 4

In [4]:
def ensure_dir(file_path):
    directory = os.path.dirname(file_path)
    if not os.path.exists(directory):
        os.makedirs(directory)

In [5]:
def dicom2png(path,v):
    dcm_file = dicom.read_file(path)
#     print(np.mean(dcm_file.pixel_array)!=0)
    data = apply_voi_lut(dcm_file.pixel_array, dcm_file)
    if np.max(data) - np.min(data) != 0:
        if dcm_file.PhotometricInterpretation == "MONOCHROME1":
            data = np.amax(data) - data
        data = data - np.min(data)
        data = data / np.max(data)
        data = (data * 255).astype(np.uint8)
        im = Image.fromarray(data)
        to_dir = os.path.abspath(png_train)
        head,tail = os.path.split(path)
        file_name = tail
        ls = []
        head,tail = os.path.split(head)
        ls.append(tail)
        head,tail = os.path.split(head)
        ls.append(tail)
#         print(tail)
        ls.reverse()
        for loc in ls:
            to_dir = os.path.join(to_dir,loc)
        to_dir = os.path.abspath(png_train)
        to_dir = os.path.join(to_dir,v)
        os.makedirs(to_dir, exist_ok = True)
        file_name = os.path.splitext(file_name)[0]
        file_name += '.png'
        to_dir = os.path.join(to_dir,file_name)
        im.save(to_dir)

In [6]:
train_labels=pd.read_csv(train_labels_dir)
train_labels['BraTS21ID'] = train_labels['BraTS21ID'].apply(lambda x: str(x).zfill(5))
train_labels = train_labels.set_index('BraTS21ID')
train_labels = train_labels.drop(index = '00109')
train_labels = train_labels.drop(index = '00123')
train_labels = train_labels.drop(index = '00709')
train_patients = train_labels.index.values.tolist()
test_patients = os.listdir(test_dir)
print(len(train_labels))
# print(train_labels)
print('-----------------------')
# lab = train_labels.to_numpy()
# print(train_patients)
lab = train_labels.values.tolist()
print(type(lab),type(train_patients))
print(len(lab),len(train_patients))
# print(lab)
# for x,y in train_patients:
#     if(x == 00000):
#         print(y)

582
-----------------------
<class 'list'> <class 'list'>
582 582


In [7]:
for patient in tqdm(train_patients):
    id = train_patients.index(patient)
    v = lab[id]
    m=str(v)
#     print(v)
    path = os.path.join(train_dir, patient,'T2w')
#     print(path)
    for dcm_file in os.listdir(path):
        file_path = os.path.join(path,dcm_file)
        dicom2png(file_path,m)
print("Over")

  0%|          | 0/582 [00:00<?, ?it/s]

Over


In [8]:
classes=['1','0']
num_classes=2
batch_size=4

In [9]:
import torchvision
from torchvision import transforms

dataset = torchvision.datasets.ImageFolder(root="./png/train",transform=transforms.Compose([
                                                            transforms.ToTensor(),
                                                            transforms.Resize([224,224]),
                                                            ]))

In [10]:
print(len(dataset))
trainloader=torch.utils.data.DataLoader(dataset,batch_size=batch_size,shuffle=True,generator=torch.Generator().manual_seed(42))

921


In [11]:
dataiter=iter(trainloader)
images,labels=dataiter.next()

In [12]:
class RN(nn.Module):
    def __init__(self,learning_rate=0.001):
        super().__init__()
        self.learning_rate = learning_rate
        backbone = models.resnet152(pretrained=False)
        result = backbone.load_state_dict(torch.load("../input/pytorch-pretrained-models/resnet152-b121ed2d.pth"))
        print(result)
        num_ftrs=backbone.fc.in_features
        layers = list(backbone.children())[:-1]
        self.feature_extractor = nn.Sequential(*layers)
        fc=[]
        fc.extend([nn.Linear(in_features=num_ftrs,out_features=512),
           nn.ReLU(),
           nn.Dropout(),
           nn.Linear(in_features=512,out_features=128),
           nn.ReLU(),
           nn.Dropout(),
           nn.Linear(in_features=128,out_features=1)
          ])
        self.classifier=nn.Sequential(*fc)
        for param in self.feature_extractor.parameters():
            param.requires_grad=False
    def forward(self, x):
        with torch.no_grad():
            representations = self.feature_extractor(x).flatten(1)
        x=self.classifier(representations)
        return x;

In [13]:
rn=RN().to(device)

<All keys matched successfully>


In [14]:
def train(net,dataloader,epochs=15):
    loss_fn=nn.BCEWithLogitsLoss().to(device)
    running_loss = 0.0
    opt=optim.Adam(params=net.parameters())
    for epoch in range(epochs):
        print(f'Running Epoch: {epoch+1}')
        for i,data in enumerate(dataloader,0):
            inputs,labels=data
            inputs,labels=inputs.to(device),labels.to(device)
            opt.zero_grad()
            outputs=net(inputs)
            labels=labels.unsqueeze(-1)
            labels = labels.type_as(outputs)
    #         print(outputs)
            loss=loss_fn(outputs,labels)
            loss.backward()
            opt.step()
            running_loss += loss.item()
            if i % 200 == 199:    # print every 2000 mini-batches
                print('[%d, %5d] loss: %.3f' %
                    (epoch + 1, i + 1, running_loss / 200))
                running_loss = 0.0
            torch.cuda.empty_cache()
            del inputs,labels,outputs
    print('Finished Training')

In [15]:
for param in rn.parameters():
    if param.requires_grad:
        print(param.shape)

torch.Size([512, 2048])
torch.Size([512])
torch.Size([128, 512])
torch.Size([128])
torch.Size([1, 128])
torch.Size([1])


In [16]:
train(rn,trainloader,20)

Running Epoch: 1
[1,   200] loss: 0.754
Running Epoch: 2
[2,   200] loss: 0.788
Running Epoch: 3
[3,   200] loss: 0.767
Running Epoch: 4
[4,   200] loss: 0.761
Running Epoch: 5
[5,   200] loss: 0.728
Running Epoch: 6
[6,   200] loss: 0.688
Running Epoch: 7
[7,   200] loss: 0.741
Running Epoch: 8
[8,   200] loss: 0.695
Running Epoch: 9
[9,   200] loss: 0.711
Running Epoch: 10
[10,   200] loss: 0.655
Running Epoch: 11
[11,   200] loss: 0.666
Running Epoch: 12
[12,   200] loss: 0.635
Running Epoch: 13
[13,   200] loss: 0.644
Running Epoch: 14
[14,   200] loss: 0.676
Running Epoch: 15
[15,   200] loss: 0.630
Running Epoch: 16
[16,   200] loss: 0.606
Running Epoch: 17
[17,   200] loss: 0.632
Running Epoch: 18
[18,   200] loss: 0.635
Running Epoch: 19
[19,   200] loss: 0.636
Running Epoch: 20
[20,   200] loss: 0.624
Finished Training


In [17]:
def dicom2pngt(path,directory = png_train):
    dcm_file = dicom.read_file(path)
#     print(np.mean(dcm_file.pixel_array)!=0)
    if np.max(dcm_file.pixel_array) - np.min(dcm_file.pixel_array) != 0:
        data = apply_voi_lut(dcm_file.pixel_array, dcm_file)
        if dcm_file.PhotometricInterpretation == "MONOCHROME1":
            data = np.amax(data) - data
        data = data - np.min(data)
        data = data / (np.max(data) - np.min(data))
        data = (data * 255).astype(np.uint8)
        im = Image.fromarray(data)
        to_dir = os.path.abspath(directory)
        head,tail = os.path.split(path)
        file_name = tail
        ls = []
        head,tail = os.path.split(head)
        ls.append(tail)
        head,tail = os.path.split(head)
        ls.append(tail)
        ls.reverse()
        for loc in ls:
            to_dir = os.path.join(to_dir,loc)
        os.makedirs(to_dir, exist_ok = True)
        file_name = os.path.splitext(file_name)[0]
        file_name += '.png'
        to_dir = os.path.join(to_dir,file_name)
        im.save(to_dir)

In [18]:
def predict(dataloader,model):
    y = []
    model.eval()
    for data in dataloader:
        inputs,labels=data
        inputs,labels=inputs.to(device),labels.to(device)
        outputs=model(inputs)
        m = nn.Sigmoid()
        outputs=m(outputs)
        outputs = outputs.flatten()
        for o in outputs:
            y.append(o.item())
    model.train()
    print(len(y))
    y_pred = sum(y)/len(y)
#     print(y_pred)
    return y_pred

In [19]:
for patient in tqdm(test_patients):
#     print(f'patient: {patient}')
    path = os.path.join(test_dir, patient,'T2w')
    for dcm_file in os.listdir(path):
        file_path = os.path.join(path,dcm_file)
        dicom2pngt(file_path, directory = png_test)

  0%|          | 0/87 [00:00<?, ?it/s]

In [20]:
y_pred = defaultdict(dict)
for patient in test_patients:
    path = os.path.join(png_test, patient)
#     print(path)
    dataset = torchvision.datasets.ImageFolder(root=path,transform=transforms.Compose([transforms.ToTensor(),transforms.Resize([224,224]),]))
    testloader=torch.utils.data.DataLoader(dataset,batch_size=batch_size,shuffle=False)

    y_pred[patient] = predict(testloader,rn)
    


19
295
25
279
265
123
29
268
49
45
47
47
20
28
263
275
46
48
45
273
48
29
82
280
47
26
47
286
67
118
73
128
297
287
46
49
29
24
69
151
276
275
67
23
269
266
276
71
32
252
69
48
48
127
21
48
68
84
50
26
47
28
283
75
261
21
127
25
46
283
30
50
286
48
24
162
70
271
47
48
50
24
121
49
23
268
29


In [21]:
submission = pd.DataFrame({"BraTS21ID":y_pred.keys(),"MGMT_value":y_pred.values()})
submission.to_csv('submission.csv', index=False)

In [22]:
print(y_pred)

defaultdict(<class 'dict'>, {'00114': 0.6623851311834235, '00013': 0.4362183936571671, '00821': 0.5403999698162079, '00644': 0.5518885862656392, '00699': 0.4426711251994349, '00125': 0.5238539806710996, '00833': 0.5223010881193753, '00762': 0.47022822749481274, '00393': 0.7003012129238674, '00287': 0.6004435704814063, '00434': 0.604365441710391, '00337': 0.6359957513657022, '00145': 0.7380508154630661, '00307': 0.6595600258026805, '00489': 0.47046301683885516, '00749': 0.49304012740200215, '00163': 0.6420379433295001, '00384': 0.5987941678613424, '00438': 0.7773723800977071, '00047': 0.35654458747460294, '00323': 0.5248368446094295, '00428': 0.7077952088980839, '00462': 0.39944439186010416, '00560': 0.5096649721264839, '00422': 0.6917218255235794, '00825': 0.6327748287182587, '00335': 0.6747061001493576, '00082': 0.4212561526524645, '00458': 0.665764837567486, '01006': 0.7294832217996403, '00573': 0.6337181446078706, '00647': 0.39689819794148207, '00603': 0.5197710485169382, '00712': 0

In [23]:
submission

Unnamed: 0,BraTS21ID,MGMT_value
0,00114,0.662385
1,00013,0.436218
2,00821,0.540400
3,00644,0.551889
4,00699,0.442671
...,...,...
82,00474,0.333498
83,00174,0.628226
84,00119,0.628404
85,00080,0.342589


In [24]:
import csv
with open("submission.csv", "w") as outfile:
    writer = csv.writer(outfile)
    writer.writerow(["BraTS21ID","MGMT_value"])
    for key, value in y_pred.items():
        writer.writerow([key, value])

In [25]:
!rm -rf ./png

In [26]:
!ls

__notebook__.ipynb  submission.csv
