In [48]:
import os
import torch
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import tensorflow as tf
from tensorflow import keras
from keras.models import Model
from keras.utils import to_categorical
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.vgg16 import VGG16, preprocess_input
from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.layers import Dense, Dropout, Flatten

In [87]:
BATCH_SIZE = 64
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

gpus = tf.config.list_physical_devices('GPU')
if gpus:
  # Restrict TensorFlow to only use the first GPU
  try:
    tf.config.set_visible_devices(gpus[0], 'GPU')
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
  except RuntimeError as e:
    # Visible devices must be set before GPUs have been initialized
    print(e)

Num GPUs Available:  0


In [None]:
! git clone https://github.com/wduan10/BroadBahnMi.git

Cloning into 'BroadBahnMi'...
remote: Enumerating objects: 442, done.[K
remote: Counting objects: 100% (124/124), done.[K
remote: Compressing objects: 100% (89/89), done.[K
remote: Total 442 (delta 77), reused 80 (delta 34), pack-reused 318[K
Receiving objects: 100% (442/442), 193.98 MiB | 26.89 MiB/s, done.
Resolving deltas: 100% (154/154), done.


In [88]:
# device = torch.device('cuda' if (torch.cuda.is_available()) else 'cpu')
# hpc = (device == 'cuda')
if (gpus):
    labels_path_train = '/groups/CS156b/data/student_labels/train2023.csv'
    labels_path_test = '/groups/CS156b/data/student_labels/test_ids.csv'
    img_dir = '/groups/CS156b/data'

    df_train = pd.read_csv(labels_path_train)[:-1]
else:
    # google colab
    labels_path_train = '/content/BroadBahnMi/data/train/labels/labels.csv'
    labels_path_test = '/content/BroadBahnMi/data/test/ids.csv'
    img_dir = '/content/BroadBahnMi/data'

    df_train = pd.read_csv(labels_path_train)

df_test = pd.read_csv(labels_path_test)
display(df_train)
display(df_test)

def parse_labels(df):
    df.fillna(0, inplace=True)
    return df

classes = ["No Finding", "Enlarged Cardiomediastinum", "Cardiomegaly", "Lung Opacity",
           "Pneumonia", "Pleural Effusion", "Pleural Other", "Fracture", "Support Devices"]

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,Unnamed: 0.1.1,Unnamed: 0.1.1.1,Path,Sex,Age,Frontal/Lateral,AP/PA,No Finding,Enlarged Cardiomediastinum,Cardiomegaly,Lung Opacity,Pneumonia,Pleural Effusion,Pleural Other,Fracture,Support Devices
0,0,0,0,0,0,train/pid50512/study1/view1_frontal.jpg,Female,68,Frontal,AP,1.0,,,,,,,,1.0
1,1,1,1,1,1,train/pid21580/study2/view1_frontal.jpg,Female,87,Frontal,AP,-1.0,,0.0,1.0,,0.0,,1.0,
2,2,2,2,2,2,train/pid21580/study1/view1_frontal.jpg,Female,83,Frontal,AP,-1.0,,,1.0,,,,1.0,
3,3,3,3,3,3,train/pid21580/study1/view2_lateral.jpg,Female,83,Lateral,,-1.0,,,1.0,,,,1.0,
4,4,4,4,4,4,train/pid33839/study1/view1_frontal.jpg,Male,41,Frontal,AP,-1.0,,,,,,,,
5,5,5,5,5,5,train/pid17532/study1/view1_frontal.jpg,Female,20,Frontal,PA,1.0,-1.0,,,,-1.0,,,
6,6,6,6,6,6,train/pid17532/study1/view2_lateral.jpg,Female,20,Lateral,,1.0,-1.0,,,,-1.0,,,
7,7,7,7,7,11,train/pid05208/study1/view1_frontal.jpg,Female,42,Frontal,AP,1.0,-1.0,,,,-1.0,,,
8,8,8,8,8,12,train/pid35409/study1/view1_frontal.jpg,Male,69,Frontal,AP,-1.0,,1.0,1.0,,-1.0,,-1.0,1.0
9,9,9,9,9,13,train/pid35409/study2/view1_frontal.jpg,Male,69,Frontal,AP,-1.0,1.0,,1.0,,-1.0,,-1.0,1.0


Unnamed: 0.1,Unnamed: 0,Id,Path
0,0,18,test/pid56785/study1/view1_frontal.jpg
1,1,19,test/pid56785/study1/view2_lateral.jpg
2,2,44,test/pid57943/study1/view1_frontal.jpg
3,3,45,test/pid57943/study2/view1_frontal.jpg
4,4,57,test/pid54805/study1/view1_frontal.jpg
5,5,102,test/pid54777/study1/view1_frontal.jpg
6,6,104,test/pid54251/study2/view1_frontal.jpg
7,7,105,test/pid54251/study2/view2_lateral.jpg
8,8,106,test/pid54251/study1/view1_frontal.jpg
9,9,107,test/pid54251/study1/view2_lateral.jpg


In [89]:
df = pd.DataFrame()
pathology = 'Fracture'
df['filename'] = df_train['Path']
df['label'] = df_train[pathology]

if (gpus):
    df['label'] = df['label'][:-1]
# else:
#     df['label'] = parse_labels(df['label'][:-1])

# remove Nan values
df = df.dropna()

# 'categorical' requires strings
df['label'] = df['label'].astype(str)

# Stratified train/test split based on 'Frontal/Lateral' column
train_df, val_df = train_test_split(df,
                                    test_size=0.2,
                                    random_state=42)

In [83]:
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                   rescale=1./255, #Normalize
                                   zoom_range=0.4,
                                   horizontal_flip=True)

val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input,
                                 rescale=1./255)

# Apply the ImageDataGenerator to create image batches
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=img_dir,
    x_col='filename',
    y_col='label',
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    target_size=(224, 224),
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=img_dir,
    x_col='filename',
    y_col='label',
    class_mode='categorical',
    batch_size=BATCH_SIZE,
    target_size=(224, 224),
)

Found 4 validated image filenames belonging to 2 classes.
Found 1 validated image filenames belonging to 1 classes.


VGG16 Model

In [85]:
conv_base = VGG16(include_top=False, weights='imagenet', input_shape=(224, 224, 3))

# Customize top layer
top_layer = conv_base.output
top_layer = tf.keras.layers.GlobalAveragePooling2D()(top_layer)
top_layer = Dense(4096, activation='relu')(top_layer)
top_layer = Dense(1072, activation='relu')(top_layer)
top_layer = Dropout(0.2)(top_layer)
output_layer = Dense(2, activation='softmax')(top_layer) # Predicting for one pathology

model = Model(inputs=conv_base.input, outputs=output_layer)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

model.fit(
    train_generator,
    epochs=1,
    validation_data=val_generator,
    verbose=1)

model.evaluate(val_generator)



[8.824780464172363, 1.0]