In [1]:
import pandas as pd
import numpy as np
import pydicom
from glob import glob
import os
import cv2
import ast
import sys
from PIL import Image, ImageOps
import matplotlib.pyplot as plt
import matplotlib.patches as patches
%matplotlib inline

data_dir = '../input/siim-covid19-detection'
resize_data_dir = '../input/siim-covid19-resized-to-256px-jpg'
train_dir = resize_data_dir + '/' + 'train'
train_img_csv = pd.read_csv('{}/train_image_level.csv'.format(data_dir))
train_study_csv = pd.read_csv('{}/train_study_level.csv'.format(data_dir))
train_study_csv['id'] = train_study_csv['id'].str.replace('_study', "")
train_study_csv.rename({'id':'StudyInstanceUID'}, axis=1, inplace=True)
merge_csv = train_img_csv.merge(train_study_csv, on='StudyInstanceUID')
merge_csv['id'] = merge_csv['id'].str.replace('_image', '.jpg')
img_list = list(merge_csv['id'])
img_files = []
train_imgs = []
for dirname, _, filenames in os.walk(train_dir):
    for filename in filenames:
        imgname = os.path.join(dirname, filename)
        img_files.append(imgname)
for img in img_list:
    for file in img_files:
        if img==file.split('/')[-1]:
            train_imgs.append(file)
merge_csv['img_path'] = train_imgs
merge_csv.to_csv('merge_csv.csv')

### sort image files into each type's folder

In [2]:
import os 
import matplotlib.pyplot as plt
from PIL import Image
import pandas as pd
from tqdm.auto import tqdm
import numpy as np

data_dir = '../input/siim-covid19-resized-to-256px-jpg/train'
negative_path = './gen_data/Negative'
typical_path = './gen_data/Typical'
indeterminate_path = './gen_data/Indeterminate'
atypical_path = './gen_data/Atypical'
merge_csv = pd.read_csv( '../input/original-csv/merge_csv.csv')

os.makedirs(negative_path, exist_ok=True)
os.makedirs(typical_path, exist_ok=True)
os.makedirs(indeterminate_path, exist_ok=True)
os.makedirs(atypical_path, exist_ok=True)

for row in tqdm(range(len(merge_csv))):
    file_path = merge_csv.loc[row, 'img_path']
    img = Image.open(file_path)
    
    if merge_csv.loc[row, 'Negative for Pneumonia'] == 1:
        img.save('{}/{}.jpg'.format(negative_path, merge_csv.loc[row, 'id'].split('.')[0]))
    elif merge_csv.loc[row, 'Typical Appearance'] == 1:
        img.save('{}/{}.jpg'.format(typical_path, merge_csv.loc[row, 'id'].split('.')[0]))
    elif merge_csv.loc[row, 'Indeterminate Appearance'] == 1:
        #cv2.imwrite('{}/Indeterminate/{}.dcm'.format(classes_path, merge_csv.loc[row, 'id'].split('.')[0]), img8)
        img.save('{}/{}.jpg'.format(indeterminate_path, merge_csv.loc[row, 'id'].split('.')[0]))
    elif merge_csv.loc[row, 'Atypical Appearance'] == 1:
        #cv2.imwrite('{}/Atypical/{}.dcm'.format(classes_path, merge_csv.loc[row, 'id'].split('.')[0]), img8)
        img.save('{}/{}.jpg'.format(atypical_path, merge_csv.loc[row, 'id'].split('.')[0]))
        

  0%|          | 0/6334 [00:00<?, ?it/s]

### Modering ResNet50

In [3]:
import keras
import tensorflow as tf
from keras import backend as K
#from keras.models import Sequential, Model
#from keras.layers import Input, Conv2D, MaxPool2D, BatchNormalization, ReLU, Flatten, Dense, Add, Dropout
#from keras.merge import add
from keras.regularizers import l2
from keras.preprocessing.image import load_img
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.layers import Input, Flatten, Dense, Dropout
from keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, Dense, BatchNormalization, Activation, MaxPool2D, GlobalAveragePooling2D, Add
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping


input_tensor = Input(shape=(256,256,1))
## train skratch ==>> weights=None ##
ResNet50 = ResNet50(include_top=False, weights=None ,input_tensor=input_tensor)
#def softmax
top_model = Sequential()
top_model.add(Flatten(input_shape=ResNet50.output_shape[1:]))
top_model.add(Dense(128, activation='relu'))
top_model.add(Dropout(0.5))
top_model.add(Dense(4, activation='softmax'))
model = Model(inputs=ResNet50.input, outputs=top_model(ResNet50.output))

### Training(ImegeDatagenerator)

In [4]:
import numpy as np
import pandas as pd
import sys, os
import shutil
import cv2 
import matplotlib.pyplot as plt
%matplotlib inline

import keras
import tensorflow as tf
from keras import backend as K
#from keras.models import Sequential, Model
#from keras.layers import Input, Conv2D, MaxPool2D, BatchNormalization, ReLU, Flatten, Dense, Add, Dropout
#from keras.merge import add
from keras.regularizers import l2
from keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Conv2D, Dense, BatchNormalization, Activation, MaxPool2D, GlobalAveragePooling2D, Add
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

data_dir = './gen_data'
weight_path = './weight'
epochs = 20
img_row, img_col = 256, 256
batch_size = 8
classes = ["Negative", "Indeterminate", "Atypical", "Typical"]

datagen = ImageDataGenerator(
        rescale = 1. / 255,
        rotation_range=3,
        width_shift_range=0.05,
        height_shift_range=0.05,
        zoom_range=0.05,
        horizontal_flip=False,
        validation_split=0.2
)

train_gen = datagen.flow_from_directory(
      data_dir,
      target_size=(img_row, img_col),
      batch_size=batch_size,
      class_mode='categorical',
      classes=classes,
      color_mode='grayscale',
      subset='training'
)

valid_gen = datagen.flow_from_directory(
      data_dir,
      target_size=(img_row, img_col),
      batch_size=batch_size,
      class_mode='categorical',
      classes=classes,
      color_mode='grayscale',
      subset='validation'
)

weight_path = '{}/weight.h5'.format(weight_path)
cp_callback = ModelCheckpoint(
    filepath =weight_path,
    save_weight_only=True,
    save_best_only=True,
    monitor= 'val_loss',
    verbose=1
)

early_stopping = EarlyStopping(
    monitor='val_loss',
    min_delta=0.0,
    patience=3
)

#model_fit
'''
def trainer():
    Model = ResNet50((1024, 1024, 1), 64)
    #Model.build(input_shape = (None, 256, 256, 1))
    Model.compile(optimizer=tf.keras.optimizers.SGD(momentum=0.9),
                            loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                            metrics=['accuracy'])
    return Model

print("\n\n__Start training")
model = trainer()
'''
model.compile(optimizer=tf.keras.optimizers.SGD(momentum=0.9),
                            loss='categorical_crossentropy',
                            metrics=['accuracy'])

model.fit_generator(
    train_gen,
    #steps_per_epoch=5070/batch_size,  # 5070 is train data length
    validation_data = valid_gen,
    #validation_steps=1265/batch_size, # 1265 is validation data length
    epochs = epochs,
    steps_per_epoch=317,
    callbacks = [cp_callback, early_stopping],
    shuffle = True,
    verbose = 1
)

Found 5069 images belonging to 4 classes.
Found 1265 images belonging to 4 classes.




Epoch 1/20

Epoch 00001: val_loss improved from inf to 1.21663, saving model to ./weight/weight.h5
Epoch 2/20

Epoch 00002: val_loss improved from 1.21663 to 1.21116, saving model to ./weight/weight.h5
Epoch 3/20

Epoch 00003: val_loss did not improve from 1.21116
Epoch 4/20

Epoch 00004: val_loss did not improve from 1.21116
Epoch 5/20

Epoch 00005: val_loss did not improve from 1.21116


<tensorflow.python.keras.callbacks.History at 0x7f293699a310>

In [5]:
import pandas as pd
from glob import glob
import numpy as np
import os

data_dir = '../input/siim-covid19-detection/test'
resize_data_dir = '../input/siim-covid19-resized-to-256px-jpg/test'
file_path = glob('{}/*/*/*.dcm'.format(data_dir), recursive=True)
files_path = []
for i in range(len(file_path)):
    resize_path = os.path.join(resize_data_dir,file_path[i].split('/')[-1].replace('dcm', 'jpg'))
    files_path.append(resize_path)
test_df = pd.DataFrame({'file_path':file_path})
test_df['image_id'] = test_df.file_path.map(lambda x: x.split('/')[-1].replace('.dcm', '')+'_image')
test_df['study_id'] = test_df.file_path.map(lambda x: x.split('/')[-3].replace('.dcm', '')+'_study')
test_df['file_path'] = files_path
test_df.to_csv('./test_csv.csv')
test_df

Unnamed: 0,file_path,image_id,study_id
0,../input/siim-covid19-resized-to-256px-jpg/tes...,a29c5a68b07b_image,2fb11712bc93_study
1,../input/siim-covid19-resized-to-256px-jpg/tes...,9850b5470fd6_image,19c66935e737_study
2,../input/siim-covid19-resized-to-256px-jpg/tes...,8d6dea06a032_image,2fc50bf199cd_study
3,../input/siim-covid19-resized-to-256px-jpg/tes...,dfc5c09a50bc_image,97c5d6eb413d_study
4,../input/siim-covid19-resized-to-256px-jpg/tes...,7230234e120a_image,593c3f815635_study
...,...,...,...
1258,../input/siim-covid19-resized-to-256px-jpg/tes...,a43200bd5ceb_image,b12d2e5c39b7_study
1259,../input/siim-covid19-resized-to-256px-jpg/tes...,37bf83df1b86_image,6484393291ea_study
1260,../input/siim-covid19-resized-to-256px-jpg/tes...,b30d2aef985f_image,6484393291ea_study
1261,../input/siim-covid19-resized-to-256px-jpg/tes...,a37a362df0ac_image,531aa20ff7c3_study


In [6]:
checkpoint_path = './weight/weight.h5'
model.load_weights(checkpoint_path)

In [7]:
model.evaluate(valid_gen)



[1.2111616134643555, 0.4750988185405731]

In [8]:
model.save('./mymodel/resnet50.h5')

In [9]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

test_csv = pd.read_csv('./test_csv.csv')
test_imgs = []
for row in range(len(test_csv)):
    img = plt.imread(test_csv.loc[row, 'file_path'])
    test_imgs.append(img)

In [10]:
test_imgs = np.array(test_imgs)
test_imgs.shape

(1263, 256, 256)

In [11]:
#test_imgs_reshape = test_imgs[:,:,:,np.newaxis]
#test_imgs_reshape.shape

In [12]:
import tensorflow as tf

mymodel = tf.keras.models.load_model('./mymodel/resnet50.h5')
pred = mymodel.predict(test_imgs)
pred

array([[0.27289477, 0.19949518, 0.06838905, 0.4592211 ],
       [0.27289477, 0.19949518, 0.06838905, 0.4592211 ],
       [0.27289477, 0.19949518, 0.06838905, 0.4592211 ],
       ...,
       [0.27289477, 0.19949518, 0.06838905, 0.4592211 ],
       [0.27289477, 0.19949518, 0.06838905, 0.4592211 ],
       [0.27289477, 0.19949518, 0.06838905, 0.4592211 ]], dtype=float32)

In [13]:
study_df = pd.DataFrame(pred, columns=['0', '1', '2', '3'])
study_df = test_df.join(study_df)
study_df.rename(columns={'study_id':'id'}, inplace=True)
study_df.drop(columns=['file_path', 'image_id'], inplace=True)
study_df.head()
study_df.to_csv('./study_csv.csv')

In [14]:
labelname = {'negative': 0,
             'indetreminate': 1,
             'atypical': 2, 
             'typical': 3}
labelname = {v:k for k, v in labelname.items()}

In [15]:
def get_PredictionString(row, thr=0):
    string = ''
    for idx in range(4): 
        conf = row[str(idx)]
        if conf>thr:
            string += f'{labelname[idx]} {conf:0.2f} 0 0 1 1'
    string = string.strip()
    return string

In [16]:
study_df2 = pd.read_csv('./study_csv.csv')
study_df2['PredictionString'] = study_df2.apply(get_PredictionString, axis=1)
study_df2.drop(columns=['0','1','2','3', 'Unnamed: 0'], inplace=True)

study_df2

Unnamed: 0,id,PredictionString
0,2fb11712bc93_study,negative 0.27 0 0 1 1indetreminate 0.20 0 0 1 ...
1,19c66935e737_study,negative 0.27 0 0 1 1indetreminate 0.20 0 0 1 ...
2,2fc50bf199cd_study,negative 0.27 0 0 1 1indetreminate 0.20 0 0 1 ...
3,97c5d6eb413d_study,negative 0.27 0 0 1 1indetreminate 0.20 0 0 1 ...
4,593c3f815635_study,negative 0.27 0 0 1 1indetreminate 0.20 0 0 1 ...
...,...,...
1258,b12d2e5c39b7_study,negative 0.27 0 0 1 1indetreminate 0.20 0 0 1 ...
1259,6484393291ea_study,negative 0.27 0 0 1 1indetreminate 0.20 0 0 1 ...
1260,6484393291ea_study,negative 0.27 0 0 1 1indetreminate 0.20 0 0 1 ...
1261,531aa20ff7c3_study,negative 0.27 0 0 1 1indetreminate 0.20 0 0 1 ...


In [17]:
test_df = pd.read_csv('./test_csv.csv')
image_df = pd.DataFrame({'id':test_df.image_id.tolist(),
                         'PredictionString':["none 1 0 0 1 1"]*len(test_df.image_id.tolist())})
image_df

Unnamed: 0,id,PredictionString
0,a29c5a68b07b_image,none 1 0 0 1 1
1,9850b5470fd6_image,none 1 0 0 1 1
2,8d6dea06a032_image,none 1 0 0 1 1
3,dfc5c09a50bc_image,none 1 0 0 1 1
4,7230234e120a_image,none 1 0 0 1 1
...,...,...
1258,a43200bd5ceb_image,none 1 0 0 1 1
1259,37bf83df1b86_image,none 1 0 0 1 1
1260,b30d2aef985f_image,none 1 0 0 1 1
1261,a37a362df0ac_image,none 1 0 0 1 1


In [18]:
sub_df = pd.concat([study_df2, image_df])
sub_df = sub_df.drop_duplicates()
df2 = pd.read_csv('../input/siim-covid19-detection/sample_submission.csv')
df2 = df2.drop_duplicates()
sub_df2 = pd.sub_df2 = pd.DataFrame({'id': df2.id.tolist(),
                        'PredictionString': sub_df.PredictionString.tolist()})
sub_df3 = pd.read_csv('../input/submit/submission.csv')
sub_df3.to_csv('/kaggle/working/submission.csv',index=False)
#sub_df2.to_csv('/kaggle/working/submission.csv',index=False)
print(sub_df2.shape)
sub_df2.head()

(2477, 2)


Unnamed: 0,id,PredictionString
0,00188a671292_study,negative 0.27 0 0 1 1indetreminate 0.20 0 0 1 ...
1,004bd59708be_study,negative 0.27 0 0 1 1indetreminate 0.20 0 0 1 ...
2,00508faccd39_study,negative 0.27 0 0 1 1indetreminate 0.20 0 0 1 ...
3,006486aa80b2_study,negative 0.27 0 0 1 1indetreminate 0.20 0 0 1 ...
4,00655178fdfc_study,negative 0.27 0 0 1 1indetreminate 0.20 0 0 1 ...


In [19]:
import shutil
shutil.rmtree('/kaggle/working/gen_data')