In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
BASE_PATH = '/content/drive/MyDrive/Portfolio/Projects/Course/Computer-Aided-Diagnosis-GI-Tract-Image-Segmentation'

In [None]:
# Unzip files into the On the Fly Dataset
import zipfile
import os

zip_path = BASE_PATH + '/datasets/uw-madison-gi-tract-image-segmentation.zip'
extract_path = '/content/datasets'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print("Unzipped successfully!")

In [None]:
os.chdir(BASE_PATH)
!pwd

In [None]:
import os
import pycocotools
from pycocotools import mask
import pycocotools.mask as mask_util
import numpy as np
import json
from pycocotools.coco import COCO
from sklearn.model_selection import train_test_split
import random
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.gridspec as gridspec
import matplotlib.patches as mpatches
import matplotlib as mpl
from pathlib import Path
from matplotlib.colors import ListedColormap

from src.data import SegmentationDataset
from src.data import DataGenerator

def np_encoder(object):
    if isinstance(object, np.generic):
        return object.item()

SEED = 42

In [None]:
os.chdir('/content')
!pwd

In [None]:
CLASSES = ['small_bowel', 'large_bowel', 'stomach']

# Help Functions

# Load data

In [None]:
sd = SegmentationDataset(dataset_dir='datasets/train',
                         csv_file_path='datasets/train.csv')

In [None]:
df = sd.processed_df
df.head()

# Split Data

In [None]:
# Create train/val/test split based on slice
temp = df.groupby(['id','class']).agg({'segmentation':'count'}).reset_index().pivot_table(index = 'id', columns = 'class', values = 'segmentation').reset_index()
temp['all 3 organs'] = temp[['large_bowel','small_bowel','stomach']].sum(axis = 1).astype(int)

df_images_train, df_images_temp = train_test_split(temp, stratify=temp['all 3 organs'],
                                                  test_size=0.35,
                                                  random_state=SEED)

df_images_val, df_images_test = train_test_split(df_images_temp, stratify=df_images_temp['all 3 organs'],
                                                  test_size=0.5714,
                                                  random_state=SEED) # test_size ~ 57.14% of 35%

train_df = df[(df['id'].isin(df_images_train['id'])) & (df['segmentation'].notna())].reset_index(drop=True)
val_df = df[(df['id'].isin(df_images_val['id'])) & (df['segmentation'].notna())].reset_index(drop=True)
test_df = df[(df['id'].isin(df_images_test['id'])) & (df['segmentation'].notna())].reset_index(drop=True)

In [None]:
temp

In [None]:
train_df

In [None]:
val_df

In [None]:
test_df

In [None]:
train_df['class'].value_counts(normalize=True)

In [None]:
val_df['class'].value_counts(normalize=True)

In [None]:
test_df['class'].value_counts(normalize=True)

# Test 20 Samples

In [None]:
temp_df = train_df.sample(8)
temp_df = temp_df.reset_index(drop=True)

In [None]:
categories = sd.create_coco_categories(CLASSES)
categories

In [None]:
temp_images = sd.create_coco_images(temp_df)
temp_images[0]

In [None]:
temp_annotations = sd.create_annotations(temp_df, temp_images)
temp_annotations[0]

In [None]:
len(temp_annotations)

In [None]:
temp_json = {
    'categories': categories,
    'images': temp_images,
    'annotations': temp_annotations
}

with open('temp_json.json', 'w', encoding='utf-8') as f:
    json.dump(temp_json, f, ensure_ascii=True, indent=4, default=np_encoder)

In [None]:
annFile = Path(f'temp_json.json')
coco = COCO(annFile)
imgIds = coco.getImgIds()

In [None]:
temp_generator_class = DataGenerator(dataset_dir='datasets/train',
                                     subset="train",
                                     classes=CLASSES,
                                     input_image_size=(128,128),
                                     annFile='temp_json.json',
                                     shuffle=True)

In [None]:


X, y = temp_generator_class[1]

cmap = ListedColormap(['none', 'red'])  # 'none' is transparent, 'red' for the mask

fig, ax = plt.subplots()
# Display the image
ax.imshow((X/255.)[:,:,0], cmap='gray')  # Use gray scale for the background image
# Display the mask
# The mask is added with 'alpha' for transparency so the image can be seen under the mask
ax.imshow(y[:,:,1], cmap=cmap, alpha=0.5)  # Adjust alpha for more or less transparency

plt.show()

# Generate COCO data

In [None]:
os.chdir(BASE_PATH)
os.getcwd()

In [None]:
!ls

In [None]:
!mkdir datasets/coco

In [None]:
categories = sd.create_coco_categories(CLASSES)
train_images = sd.create_coco_images(train_df)
train_annotations = sd.create_annotations(train_df, train_images)

train_json = {
    'categories': categories,
    'images': train_images,
    'annotations': train_annotations
}

with open('datasets/coco/train_json.json', 'w', encoding='utf-8') as f:
    json.dump(train_json, f, ensure_ascii=True, indent=4, default=np_encoder)

print(train_annotations[0])

In [None]:
categories = sd.create_coco_categories(CLASSES)
val_images = sd.create_coco_images(val_df)
val_annotations = sd.create_annotations(val_df, val_images)

val_json = {
    'categories': categories,
    'images': val_images,
    'annotations': val_annotations
}

with open('datasets/coco/val_json.json', 'w', encoding='utf-8') as f:
    json.dump(val_json, f, ensure_ascii=True, indent=4, default=np_encoder)

print(val_annotations[0])

In [None]:
test_images = sd.create_coco_images(test_df)
test_annotations = sd.create_annotations(test_df, test_images)
test_json = {
    'categories': categories,
    'images': test_images,
    'annotations': test_annotations
}

with open('datasets/coco/test_json.json', 'w', encoding='utf-8') as f:
    json.dump(test_json, f, ensure_ascii=True, indent=4, default=np_encoder)

print(test_annotations[0])

In [None]:
train_generator_class = DataGenerator(dataset_dir='/content/datasets/train',
                                     subset="train",
                                     classes=CLASSES,
                                     input_image_size=(128,128),
                                     annFile='datasets/coco/train_json.json',
                                     shuffle=True)

val_generator_class = DataGenerator(dataset_dir='/content/datasets/train',
                                     subset="train",
                                     classes=CLASSES,
                                     input_image_size=(128,128),
                                     annFile='datasets/coco/val_json.json',
                                     shuffle=True)

test_generator_class = DataGenerator(dataset_dir='/content/datasets/train',
                                     subset="test",
                                     classes=CLASSES,
                                     input_image_size=(128,128),
                                     annFile='datasets/coco/test_json.json',
                                     shuffle=False)

In [None]:
train_generator_class[0][1].shape

In [None]:
os.chdir('/content/')
!pwd

In [None]:
X, y = val_generator_class[]

cmap = ListedColormap(['none', 'red'])  # 'none' is transparent, 'red' for the mask

fig, ax = plt.subplots()
# Display the image
ax.imshow((X/255.)[:,:,0], cmap='gray')  # Use gray scale for the background image
# Display the mask
# The mask is added with 'alpha' for transparency so the image can be seen under the mask
ax.imshow(y[:,:,1], cmap=cmap, alpha=0.5)  # Adjust alpha for more or less transparency

plt.show()