In [None]:
import os
import PIL
## checking for xrays and their respective masks
from glob import glob
import re
from collections import defaultdict
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import tensorflow as tf
from skimage import measure

DIR = "/kaggle/input/chest-xray-masks-and-labels/data/"

lung_image_paths = glob(os.path.join(DIR,"Lung Segmentation/CXR_png/*.png"))
mask_image_paths = glob(os.path.join(DIR,"Lung Segmentation/masks/*.png"))

related_paths = defaultdict(list)

for img_path in lung_image_paths:
    img_match = re.search("CXR_png/(.*)\.png$", img_path)
    if img_match:
        img_name = img_match.group(1)
    for mask_path in mask_image_paths:
        mask_match = re.search(img_name, mask_path)
        if mask_match:
            related_paths["image_path"].append(img_path)
            related_paths["mask_path"].append(mask_path)

paths_df = pd.DataFrame.from_dict(related_paths)

In [None]:
import glob
import os
FIND_FOLDER = r'/kaggle/input/chest-xray-masks-and-labels/Lung Segmentation/CXR_png/'
images_files = glob.glob(os.path.join(FIND_FOLDER, '*.png'))
print(len(images_files), "Files Found.")

In [None]:
xray_num = 5
img_path = paths_df["image_path"][xray_num]
mask_path = paths_df["mask_path"][xray_num]

img = PIL.Image.open(img_path)
mask = PIL.Image.open(mask_path)

fig = plt.figure(figsize = (10,10))

ax1 = fig.add_subplot(2,2,1)
ax1.imshow(img, cmap = "gray")
ax2 = fig.add_subplot(2,2,2)
ax2.imshow(mask, cmap = "gray")

In [None]:
from tqdm import tqdm
import cv2
def prepare_train_test(df = pd.DataFrame(), resize_shape = tuple(), color_mode = "rgb"):
    img_array = list()
    mask_array = list()

    for image_path in tqdm(paths_df.image_path):
        resized_image = cv2.resize(cv2.imread(image_path),resize_shape)
        resized_image = resized_image/255.
        if color_mode == "gray":
            img_array.append(resized_image[:,:,0])
        elif color_mode == "rgb":
            img_array.append(resized_image[:,:,:])
      # img_array.append(resized_image)
  
    for mask_path in tqdm(paths_df.mask_path):
        resized_mask = cv2.resize(cv2.imread(mask_path),resize_shape)
        resized_mask = resized_mask/255.
        mask_array.append(resized_mask[:,:,0])
        # mask_array.append(resized_image)

    return img_array, mask_array

img_array, mask_array = prepare_train_test(df = paths_df, resize_shape = (256,256), color_mode = "gray")


In [None]:
from tqdm import tqdm
import cv2
def prepare_train_test1(df = pd.DataFrame(), resize_shape = tuple(), color_mode = "gray"):
    covid_array = list()
   

    for covid_path in tqdm(paths_dfc.covid_path):
        resized_image = cv2.resize(cv2.imread(covid_path),resize_shape)
        resized_image = resized_image/255.
        if color_mode == "gray":
            covid_array.append(resized_image[:,:,0])
        #elif color_mode == "rgb":
            #covid_array.append(resized_image[:,:,:])
      # img_array.append(resized_image)
  
    return covid_array

covid_array = prepare_train_test1(df = paths_dfc, resize_shape = (256,256), color_mode = "gray")

In [None]:
from sklearn.model_selection import train_test_split
img_train, img_test, mask_train, mask_test = train_test_split(img_array, mask_array, test_size = 0.2, random_state= 42)

img_side_size = 256
img_train = np.array(img_train).reshape(len(img_train), img_side_size, img_side_size)
img_test = np.array(img_test).reshape(len(img_test), img_side_size, img_side_size)
mask_train = np.array(mask_train).reshape(len(mask_train), img_side_size, img_side_size)
mask_test = np.array(mask_test).reshape(len(mask_test), img_side_size, img_side_size)

In [None]:
#img_side_size = 256
#covid_array = np.array(covid_array).reshape(len(covid_array), img_side_size, img_side_size, 1)



In [None]:
### U-net 

from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras import backend as keras
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, LearningRateScheduler
from tensorflow.keras.activations import *

def dice_coef(y_true, y_pred):
    y_true_f = keras.flatten(y_true)
    y_pred_f = keras.flatten(y_pred)
    intersection = keras.sum(y_true_f * y_pred_f)
    return (2. * intersection + 1) / (keras.sum(y_true_f) + keras.sum(y_pred_f) + 1)

def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)

def unet(input_size=(256,256,1)):
    inputs = Input(input_size)
    
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
    
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
   
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
    
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)

    up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=3)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)

    up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=3)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)

    up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=3)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)

    up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=3)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)

    conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)

    return Model(inputs=[inputs], outputs=[conv10])

In [None]:
EPOCHS = 50
model = unet(input_size=(256,256,1))
model.compile(optimizer=Adam(lr=5*1e-4), loss="binary_crossentropy", \
                  metrics=[dice_coef, 'binary_accuracy'])
model.summary()

In [None]:
tf.keras.utils.plot_model(model, to_file='model.png')

In [None]:
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, EarlyStopping, ReduceLROnPlateau
weight_path="{}_weights.best.hdf5".format('cxr_reg')

checkpoint = ModelCheckpoint(weight_path, monitor='loss', #verbose=1, 
                             save_best_only=True, #mode='min', 
                             save_weights_only = True)


early = EarlyStopping(monitor="loss", 
                      #mode="min", 
                      patience=10) # probably needs to be more patient, but kaggle time is limited
callbacks_list = [checkpoint, early]

In [None]:
#earlystopping = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=10)

history = model.fit(x = img_train, 
                    y = mask_train, 
                    validation_data = (img_test, mask_test), 
                    epochs = 30, 
                    batch_size = 8,
                   callbacks = callbacks_list)
model.save('my_model.h5')

In [None]:
model.save('my_model.h5') 

In [None]:
def test_on_image(model, img_array, img_num, img_side_size = 256):
    
    pred = model.predict(img_array[img_num].reshape(1,img_side_size,img_side_size,1))
    pred[pred>0.5] = 1.0
    pred[pred<0.5] = 0.0
    fig = plt.figure(figsize = (15,10))
    
    plt.subplot(1,4,1)
    plt.imshow(pred.reshape(img_side_size, img_side_size), cmap = "gray")
    plt.title("Prediction")
    plt.axis("off")
    
    plt.subplot(1,4,2)
    plt.imshow(mask_test[img_num].reshape(img_side_size, img_side_size), cmap = "gray")
    plt.title("Actual");
    plt.axis("off")
    
    plt.subplot(1,4,3)
    plt.imshow(mask_test[img_num].reshape(img_side_size, img_side_size), cmap = "gray", alpha = 0.5)
    plt.imshow(pred.reshape(img_side_size, img_side_size),cmap = "PuBu", alpha = 0.3)
    plt.title("Overlap")
    plt.axis("off")
    
    plt.subplot(1,4,4)
    plt.imshow(img_array[img_num].reshape(img_side_size, img_side_size), cmap = "gray")
    plt.title("Original")
    plt.axis("off")
    
    return pred

def dice_coef_test(y_true, y_pred):
    y_true_f = y_true.flatten()
    y_pred_f = y_pred.flatten()
    union = np.sum(y_true_f) + np.sum(y_pred_f)
    if union==0: return 1
    intersection = np.sum(y_true_f * y_pred_f)
    return 2. * intersection / union

IMG_NUM = 3 #Melhor img_num 12 (0.98) Pior img_num 10 (0.9)
prediction = test_on_image(model, img_array = img_test, img_num = IMG_NUM, img_side_size = 256)
dice_coef_test(y_true = mask_test[IMG_NUM], y_pred = prediction)

In [None]:
def get_metrics(history):
    fig = plt.figure(figsize = (10,10))
    plt.subplot(2,2,1)
    plt.plot(history.history["loss"], label = "training loss")
    plt.plot(history.history["val_loss"], label = "validation loss")
    plt.legend()
    plt.xlabel("Epochs")
    plt.ylabel("Binary Cross entropy")

    plt.subplot(2,2,2)
    plt.plot(history.history["dice_coef"], label = "training dice coefficient")
    plt.plot(history.history["val_dice_coef"], label = "validation dice coefficient")
    plt.legend()
    plt.xlabel("Epochs")
    plt.ylabel("Dice Coefficient")
    
get_metrics(history = history)

In [None]:
#custom dataset masks extraction
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
from glob import glob
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from skimage import exposure
import cv2
import warnings
warnings.filterwarnings('ignore')
import shutil
import tensorflow as tf
%matplotlib inline

import matplotlib.pylab as pylab
import seaborn as sns
import pprint
import pydicom as dicom
from pydicom.pixel_data_handlers.util import apply_voi_lut
import wandb

import PIL
from PIL import Image
from colorama import Fore, Back, Style
viz_counter=0



In [None]:
import glob
import os
FIND_FOLDER = r'/kaggle/input/sample/sample/sample/images'
images_files = glob.glob(os.path.join(FIND_FOLDER, '*.png'))
print(len(images_files), "Files Found.")

In [None]:
subset_png_files = images_files[:2]

In [None]:
viz_counter=0
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt

def props(arr):
    print("Shape:", arr.shape, "Maximum:", arr.max(), "Minimum:", arr.min(), "Data Type:", arr.dtype)

for path in subset_png_files:  # Change this variable to your list of PNG file paths
    # Read PNG file
    modified_image_data = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

    # Display properties
    props(modified_image_data)

    # Display images
    fig, ax = plt.subplots(1, 2, figsize=(20, 12))
    ax[0].imshow(modified_image_data, cmap="gray")
    ax[0].axis('off')
    ax[1].imshow(modified_image_data, cmap="viridis")
    ax[1].axis('off')

    # Save and show the figure
    plt.savefig(str(viz_counter) + ".png", dpi=300)
    viz_counter += 1
    plt.show()


In [None]:
from tqdm import tqdm
import cv2
def prepare_train_test_sample(df = pd.DataFrame(), resize_shape = tuple(), color_mode = "rgb"):
    img_array = list()
    #mask_array = list()

    for image_path in tqdm(paths_df.image_path):
        resized_image = cv2.resize(cv2.imread(image_path),resize_shape)
        resized_image = resized_image/255.
        if color_mode == "gray":
            img_array.append(resized_image[:,:,0])
        elif color_mode == "rgb":
            img_array.append(resized_image[:,:,:])

    return img_array
paths_df = pd.DataFrame({'image_path': ['/kaggle/input/sample/sample/sample/images']})

img_array = prepare_train_test_sample(df = paths_df, resize_shape = (256,256), color_mode = "gray")


In [None]:
from tqdm import tqdm
import cv2
import os
import pandas as pd

def prepare_train_test_sample(folder_path='', resize_shape=tuple(), color_mode="rgb"):
    img_array = list()

    # List all files in the folder
    file_list = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(('.jpg', '.png', '.jpeg'))]

    for image_path in tqdm(file_list):
        resized_image = cv2.resize(cv2.imread(image_path), resize_shape)
        resized_image = resized_image / 255.
        if color_mode == "gray":
            img_array.append(resized_image[:, :, 0])
        elif color_mode == "rgb":
            img_array.append(resized_image[:, :, :])

    return img_array

# Specify the path to your folder containing images
folder_path = '/kaggle/input/sample/sample/sample/images/'

img_array = prepare_train_test_sample(folder_path=folder_path, resize_shape=(256, 256), color_mode="gray")


In [None]:
viz_counter=0
import cv2
import os
import numpy as np
import matplotlib.pyplot as plt

def props(arr):
    print("Shape:", arr.shape, "Maximum:", arr.max(), "Minimum:", arr.min(), "Data Type:", arr.dtype)

for path in subset_png_files:  # Change this variable to your list of PNG file paths
    # Read PNG file
    modified_image_data = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

    # Display properties
    props(modified_image_data)

    # Display images
    fig, ax = plt.subplots(1, 2, figsize=(20, 12))
    ax[0].imshow(modified_image_data, cmap="gray")
    ax[0].axis('off')
    ax[1].imshow(modified_image_data, cmap="viridis")
    ax[1].axis('off')

    # Save and show the figure
    plt.savefig(str(viz_counter) + ".png", dpi=300)
    viz_counter += 1
    plt.show()


In [None]:
from keras.models import *
from keras.layers import *
from keras.optimizers import *
from keras import backend as keras
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint, LearningRateScheduler


def dice_coef(y_true, y_pred):
    y_true_f = keras.flatten(y_true)
    y_pred_f = keras.flatten(y_pred)
    intersection = keras.sum(y_true_f * y_pred_f)
    return (2. * intersection + 1) / (keras.sum(y_true_f) + keras.sum(y_pred_f) + 1)

def dice_coef_loss(y_true, y_pred):
    return -dice_coef(y_true, y_pred)

def unet(input_size=(256,256,1)):
    inputs = Input(input_size)

    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1)
    pool1 = MaxPooling2D(pool_size=(2, 2))(conv1)

    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1)
    conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2)
    pool2 = MaxPooling2D(pool_size=(2, 2))(conv2)

    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2)
    conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3)
    pool3 = MaxPooling2D(pool_size=(2, 2))(conv3)

    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3)
    conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4)
    pool4 = MaxPooling2D(pool_size=(2, 2))(conv4)

    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4)
    conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5)

    up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=(2, 2), padding='same')(conv5), conv4], axis=3)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6)
    conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6)

    up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=(2, 2), padding='same')(conv6), conv3], axis=3)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7)
    conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7)

    up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same')(conv7), conv2], axis=3)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8)
    conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8)

    up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same')(conv8), conv1], axis=3)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9)
    conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9)

    conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9)

    return Model(inputs=[inputs], outputs=[conv10])

In [None]:
model = unet(input_size=(512,512,1))
model.compile(optimizer=Adam(lr=1e-5), loss=dice_coef_loss,
                  metrics=[dice_coef, 'binary_accuracy'])
#model.summary()

In [None]:
model_weights_path = "/kaggle/working/cxr_reg_weights.best.hdf5"

model.load_weights(model_weights_path)

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

"""
Shapes that you wish to resize to
"""
Shape_X, Shape_Y = 1024,1024

for path in subset_png_files:  # Change this variable to your list of PNG file paths
    # Read PNG file
    modified_image_data = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

    # Resize the image
    resized_image_data = cv2.resize(modified_image_data, (Shape_Y, Shape_X))

    # Preprocess the image for prediction
    prep_unet_input_img_1 = resized_image_data.reshape(1, Shape_X, Shape_Y, 1)
    prep_unet_input_img = (prep_unet_input_img_1 - 127.0) / 127.0


    # Make predictions using the model
    pred_img = model.predict(prep_unet_input_img)
    pred_img_preprocessed_1 = np.squeeze(pred_img)
    pred_img_preprocessed = (pred_img_preprocessed_1 * 255 > 127).astype(np.int8)

    # Apply the mask to the original image
    res = cv2.bitwise_and(resized_image_data, resized_image_data, mask=pred_img_preprocessed)

    fig, ax = plt.subplots(1, 3, figsize=(20, 12))
    ax[0].imshow(resized_image_data, cmap="gray")
    ax[0].axis('off')
    ax[1].imshow(pred_img_preprocessed, cmap="gray")
    ax[1].axis('off')
    ax[2].imshow(res, cmap="viridis")
    ax[2].axis('off')
    plt.savefig(str(viz_counter)+".png",dpi=300)
    viz_counter+=1
    cv2.imwrite(str(viz_counter)+".png",res)
    viz_counter+=1
    plt.show()

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt

"""
Shapes that you wish to resize to
"""
Shape_X, Shape_Y = 1024,1024
out = 'seg_masks'
# Create the output directory if it doesn't exist
os.makedirs(out, exist_ok=True)

for path in images_files:  # Change this variable to your list of PNG file paths
    # Read PNG file
    modified_image_data = cv2.imread(path, cv2.IMREAD_GRAYSCALE)

    # Resize the image
    resized_image_data = cv2.resize(modified_image_data, (Shape_Y, Shape_X))

    # Preprocess the image for prediction
    prep_unet_input_img_1 = resized_image_data.reshape(1, Shape_X, Shape_Y, 1)
    prep_unet_input_img = (prep_unet_input_img_1 - 127.0) / 127.0


    # Make predictions using the model
    pred_img = model.predict(prep_unet_input_img)
    pred_img_preprocessed_1 = np.squeeze(pred_img)
    pred_img_preprocessed = (pred_img_preprocessed_1 * 255 > 127).astype(np.int8)

    # Apply the mask to the original image
    res = cv2.bitwise_and(resized_image_data, resized_image_data, mask=pred_img_preprocessed)

    # Display the images
    #fig, ax = plt.subplots(1, 2, figsize=(20, 12))
    fig=plt.plot()
    # ax[0].imshow(resized_image_data, cmap="gray")
    # ax[0].axis('off')
    plt.imshow(pred_img_preprocessed,cmap='gray')
    # ax[1].axis('off')
    # ax[2].imshow(res, cmap="gray")
    # ax[2].axis('off')
    plt.savefig(str(viz_counter) + ".png", dpi=300)
    viz_counter += 1
    # cv2.imwrite(str(viz_counter) + ".png", res)
    # viz_counter += 1
    # out='/content/segmented_masks'
    # Save the displayed images
    save_path = os.path.join(out, f"{os.path.splitext(os.path.basename(path))[0]}_mask.png")
    plt.savefig(save_path, dpi=300)
    cv2.imwrite(save_path, pred_img_preprocessed * 255)

    # Increment the counter
    viz_counter += 1
    #plt.show()
    # # Save the binary mask with the same name as the original image
    # mask_save_path = os.path.join(output_directory, f"{os.path.splitext(os.path.basename(path))[0]}_mask.png")
    # cv2.imwrite(mask_save_path, pred_img_preprocessed * 255)