In [6]:
import pandas as pd 
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
root_dir = '/home/mamur/TUM/MLMI/'

In [13]:
data = pd.read_csv(root_dir + '220112_mimic-cxr.csv')

In [14]:
len(data)

377110

In [15]:
data.columns

Index(['dicom_id', 'study_id', 'subject_id', 'split',
       'PerformedProcedureStepDescription', 'ViewPosition', 'Rows', 'Columns',
       'StudyDate', 'StudyTime', 'ProcedureCodeSequence_CodeMeaning',
       'ViewCodeSequence_CodeMeaning',
       'PatientOrientationCodeSequence_CodeMeaning', 'Atelectasis',
       'Cardiomegaly', 'Consolidation', 'Edema', 'Enlarged Cardiomediastinum',
       'Fracture', 'Lung Lesion', 'Lung Opacity', 'No Finding',
       'Pleural Effusion', 'Pleural Other', 'Pneumonia', 'Pneumothorax',
       'Support Devices', 'report', 'png_224', 'jpg', 'png_256'],
      dtype='object')

In [16]:
chexpert_labels = ['Atelectasis', 'Cardiomegaly', 'Consolidation', 'Edema', 'Enlarged Cardiomediastinum', 'Fracture', 'Lung Lesion', 'Lung Opacity', 'No Finding', 'Pleural Effusion', 'Pleural Other', 'Pneumonia', 'Pneumothorax', 'Support Devices']

In [18]:
img_column = 'jpg'
img_id = 'dicom_id'

In [20]:
# unique ids in dicom_id column
print(len(data[img_id].unique()))
# so each row belongs to unique image
print(len(data[img_column].unique()))

377110
377110


In [24]:
columns =  img_id + ',' + img_column + ',' + ','.join(chexpert_labels)
columns = columns.split(',')
columns

['dicom_id',
 'jpg',
 'Atelectasis',
 'Cardiomegaly',
 'Consolidation',
 'Edema',
 'Enlarged Cardiomediastinum',
 'Fracture',
 'Lung Lesion',
 'Lung Opacity',
 'No Finding',
 'Pleural Effusion',
 'Pleural Other',
 'Pneumonia',
 'Pneumothorax',
 'Support Devices']

In [26]:
data = data[columns]

In [28]:
data

Unnamed: 0,dicom_id,jpg,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
0,02aa804e-bde0afdd-112c0b34-7bc16630-4e384014,/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s5041...,,,,,,,,,1.0,,,,,
1,174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962,/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s5041...,,,,,,,,,1.0,,,,,
2,2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab,/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s5318...,,,,,,,,,,,,,,
3,e084de3b-be89b11e-20fe3f9f-9c8d8dfe-4cfd202c,/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s5318...,,,,,,,,,,,,,,
4,68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714,/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s5391...,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377105,428e2c18-5721d8f3-35a05001-36f3d080-9053b83c,/mimic-cxr-jpg/2.0.0/files/p19/p19999733/s5713...,,,,,,,,,,,,,,
377106,58c403aa-35ff8bd9-73e39f54-8dc9cc5d-e0ec3fa9,/mimic-cxr-jpg/2.0.0/files/p19/p19999733/s5713...,,,,,,,,,,,,,,
377107,58766883-376a15ce-3b323a28-6af950a0-16b793bd,/mimic-cxr-jpg/2.0.0/files/p19/p19999987/s5536...,,,,,,,,,,,,,,
377108,7ba273af-3d290f8d-e28d0ab4-484b7a86-7fc12b08,/mimic-cxr-jpg/2.0.0/files/p19/p19999987/s5862...,,,,,,,,,,,,,,


In [29]:
# fil NaN with 0
data = data.fillna(0)

In [30]:
data

Unnamed: 0,dicom_id,jpg,Atelectasis,Cardiomegaly,Consolidation,Edema,Enlarged Cardiomediastinum,Fracture,Lung Lesion,Lung Opacity,No Finding,Pleural Effusion,Pleural Other,Pneumonia,Pneumothorax,Support Devices
0,02aa804e-bde0afdd-112c0b34-7bc16630-4e384014,/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s5041...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
1,174413ec-4ec4c1f7-34ea26b7-c5f994f8-79ef1962,/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s5041...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,2a2277a9-b0ded155-c0de8eb9-c124d10e-82c5caab,/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s5318...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,e084de3b-be89b11e-20fe3f9f-9c8d8dfe-4cfd202c,/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s5318...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,68b5c4b1-227d0485-9cc38c3f-7b84ab51-4b472714,/mimic-cxr-jpg/2.0.0/files/p10/p10000032/s5391...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
377105,428e2c18-5721d8f3-35a05001-36f3d080-9053b83c,/mimic-cxr-jpg/2.0.0/files/p19/p19999733/s5713...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
377106,58c403aa-35ff8bd9-73e39f54-8dc9cc5d-e0ec3fa9,/mimic-cxr-jpg/2.0.0/files/p19/p19999733/s5713...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
377107,58766883-376a15ce-3b323a28-6af950a0-16b793bd,/mimic-cxr-jpg/2.0.0/files/p19/p19999987/s5536...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
377108,7ba273af-3d290f8d-e28d0ab4-484b7a86-7fc12b08,/mimic-cxr-jpg/2.0.0/files/p19/p19999987/s5862...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
