In [1]:
!pip install tqdm

Looking in indexes: https://mirrors.aliyun.com/pypi/simple/
Collecting tqdm
  Downloading https://mirrors.aliyun.com/pypi/packages/0b/e8/d6f4db0886dbba2fc87b5314f2d5127acdc782e4b51e6f86972a2e45ffd6/tqdm-4.62.0-py2.py3-none-any.whl (76 kB)
[K     |████████████████████████████████| 76 kB 5.0 MB/s 
[?25hInstalling collected packages: tqdm
Successfully installed tqdm-4.62.0


In [2]:
import shutil, os
import zipfile
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras import Model
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import GroupKFold
from tqdm.notebook import tqdm
# from tf.keras.metrics import AUC

In [None]:
# !unzip -q siimcovid19-512-jpg-image-dataset.zip -d siimcovid19-512-jpg-image-dataset/

In [3]:
fold_index = 0

# Prepare Dataset

In [4]:
dataset_dir = '/mnt/siim-covid19/siimcovid19-512-jpg-image-dataset/'

In [None]:
def get_df_from_original_file():
    # make sure using original siimcovid19-512-jpg-image-dataset
    train_df = pd.read_csv(dataset_dir + 'train.csv')
    train_df['image_path'] = dataset_dir + 'train/' + train_df.image_id + '.jpg'
    # split
    gkf = GroupKFold(n_splits = 5)
    train_df['fold'] = -1
    for fold, (train_idx, val_idx) in enumerate(gkf.split(train_df, groups = train_df.StudyInstanceUID.tolist())):
        train_df.loc[val_idx, 'fold'] = fold
    return train_df

In [5]:
# Use saved file
train_df_path = '/mnt/siim-covid19/output/study_train_df_with_cur_loc.csv'
if (os.path.exists(train_df_path)):
    train_df = pd.read_csv(train_df_path)
else:
    train_df = get_df_from_original_file()

train_df.head()

Unnamed: 0.1,Unnamed: 0,boxes,label,StudyInstanceUID,image_id,Negative for Pneumonia,Typical Appearance,Indeterminate Appearance,Atypical Appearance,filepath,image_path,fold
0,0,"[{'x': 789.28836, 'y': 582.43035, 'width': 102...",opacity 1 789.28836 582.43035 1815.94498 2499....,5776db0cec75,000a312787f2,0,1,0,0,/kaggle/input/siim-covid19-detection/train/577...,/mnt/siim-covid19/siimcovid19-512-jpg-image-da...,4
1,1,,none 1 0 0 1 1,ff0879eb20ed,000c3a3f293f,1,0,0,0,/kaggle/input/siim-covid19-detection/train/ff0...,/mnt/siim-covid19/siimcovid19-512-jpg-image-da...,0
2,2,"[{'x': 677.42216, 'y': 197.97662, 'width': 867...",opacity 1 677.42216 197.97662 1545.21983 1197....,9d514ce429a7,0012ff7358bc,0,1,0,0,/kaggle/input/siim-covid19-detection/train/9d5...,/mnt/siim-covid19/siimcovid19-512-jpg-image-da...,4
3,3,"[{'x': 2729, 'y': 2181.33331, 'width': 948.000...",opacity 1 2729 2181.33331 3677.00012 2785.33331,28dddc8559b2,001398f4ff4f,0,0,0,1,/kaggle/input/siim-covid19-detection/train/28d...,/mnt/siim-covid19/siimcovid19-512-jpg-image-da...,1
4,4,"[{'x': 623.23328, 'y': 1050, 'width': 714, 'he...",opacity 1 623.23328 1050 1337.23328 2156 opaci...,dfd9fdd85a3e,001bd15d1891,0,1,0,0,/kaggle/input/siim-covid19-detection/train/dfd...,/mnt/siim-covid19/siimcovid19-512-jpg-image-da...,3


In [None]:
# move files and change records
class_names = ['Negative for Pneumonia', 'Typical Appearance', 'Indeterminate Appearance', 'Atypical Appearance']
for class_name in class_names:
    os.makedirs(dataset_dir + 'train/' + class_name, exist_ok = True)
    os.makedirs(dataset_dir + 'val/' + class_name, exist_ok = True)

for index, row in tqdm(train_df.iterrows()):
    # filename = file_path.split('/')[-1].split('.')[0]
    # if (filename + '_image' != row.id):
    #     continue
    file_path = row.image_path

    is_val = (row.fold == fold_index)
    train_or_val = 'val' if is_val else 'train'
    class_name = class_names[row[class_names].to_list().index(1)]
    filename = row.image_id

    target_path = dataset_dir + train_or_val + '/' + class_name + '/' + filename + '.jpg'
    if (file_path == target_path):
        continue
    shutil.move(file_path, target_path)
    # print(file_path)
    # print(target_path)
    train_df.loc[index, 'image_path'] = target_path

In [None]:
# Save train_df
train_df.to_csv(train_df_path)

In [6]:
train_dir = dataset_dir + 'train/'
validation_dir = dataset_dir + 'val/'

# Inception

In [None]:
# Add our data-augmentation parameters to ImageDataGenerator
input_dim = 299
train_datagen = ImageDataGenerator(rescale = 1./255., rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2,shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True)
test_datagen = ImageDataGenerator( rescale = 1.0/255. )
train_generator = train_datagen.flow_from_directory(train_dir, batch_size = 32, class_mode = 'categorical', target_size = (input_dim, input_dim))
validation_generator = test_datagen.flow_from_directory(validation_dir, batch_size = 32, class_mode = 'categorical', target_size = (input_dim, input_dim))

In [None]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
base_model = InceptionV3(input_shape = (input_dim, input_dim, 3), include_top = False, weights = 'imagenet')

# for layer in base_model.layers:
#     layer.trainable = False

from tensorflow.keras.optimizers import RMSprop

x = layers.Flatten()(base_model.output)
x = layers.Dense(1024, activation='relu')(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(4, activation='sigmoid')(x)

model = tf.keras.models.Model(base_model.input, x)

# model.compile(optimizer = RMSprop(lr=0.0001), loss = 'binary_crossentropy', metrics = ['acc'])
model.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics = ['acc'])

In [None]:
inc_history = model.fit(train_generator, validation_data = validation_generator, epochs = 10, steps_per_epoch=None)

# ResNet50

In [None]:
# Add our data-augmentation parameters to ImageDataGenerator
train_datagen = ImageDataGenerator(rescale = 1./255., rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True)
test_datagen = ImageDataGenerator(rescale = 1.0/255.)
train_generator = train_datagen.flow_from_directory(train_dir, batch_size = 20, class_mode = 'binary', target_size = (224, 224))
validation_generator = test_datagen.flow_from_directory( validation_dir, batch_size = 20, class_mode = 'binary', target_size = (224, 224))

In [None]:
from tensorflow.keras.applications import ResNet50

base_model = ResNet50(input_shape=(224, 224,3), include_top=False, weights="imagenet")

# for layer in base_model.layers:
#     layer.trainable = False

from tensorflow.keras.applications import ResNet50
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Dense, Flatten, GlobalAveragePooling2D

base_model = Sequential()
base_model.add(ResNet50(include_top=False, weights='imagenet', pooling='max'))
base_model.add(Dense(1, activation='sigmoid'))

base_model.compile(optimizer = tf.keras.optimizers.SGD(lr=0.0001), loss = 'binary_crossentropy', metrics = ['acc'])


In [None]:
resnet_history = base_model.fit(train_generator, validation_data = validation_generator, steps_per_epoch = 100, epochs = 10)

# EfficientNet

In [None]:
!pip install -U efficientnet

In [None]:
# Add our data-augmentation parameters to ImageDataGenerator
train_datagen = ImageDataGenerator(rescale = 1./255., rotation_range = 40, width_shift_range = 0.2, height_shift_range = 0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True)
test_datagen = ImageDataGenerator(rescale = 1.0/255.)
train_generator = train_datagen.flow_from_directory(train_dir, batch_size = 20, class_mode = 'binary', target_size = (224, 224))
validation_generator = test_datagen.flow_from_directory( validation_dir, batch_size = 20, class_mode = 'binary', target_size = (224, 224))

In [None]:
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers


def build_model(num_classes):
    inputs = layers.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
    x = img_augmentation(inputs)
    model = EfficientNetB0(include_top=False, input_tensor=x, weights="imagenet")

    # Freeze the pretrained weights
    model.trainable = False

    # Rebuild top
    x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
    x = layers.BatchNormalization()(x)

    top_dropout_rate = 0.2
    x = layers.Dropout(top_dropout_rate, name="top_dropout")(x)
    outputs = layers.Dense(NUM_CLASSES, activation="softmax", name="pred")(x)

    # Compile
    model = tf.keras.Model(inputs, outputs, name="EfficientNet")
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-2)
    model.compile(
        optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"]
    )
    return model

In [None]:
eff_history = model_final.fit_generator(train_generator, validation_data = validation_generator, steps_per_epoch = 100, epochs = 10)

# ViT

In [None]:
!pip install transformers

In [None]:
from transformers import ViTFeatureExtractor, ViTForImageClassification
from PIL import Image
import requests

url = 'http://images.cocodataset.org/val2017/000000039769.jpg'
image = Image.open(requests.get(url, stream=True).raw)

feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')
model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')

inputs = feature_extractor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits
# model predicts one of the 1000 ImageNet classes
predicted_class_idx = logits.argmax(-1).item()
print("Predicted class:", model.config.id2label[predicted_class_idx])