<a href="https://colab.research.google.com/github/varun1724/NUCLS-Image-Segmentation/blob/main/Mask_RCNN_Multi_Class_TF1x_Working.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow==1.14
!pip install tensorflow-gpu==1.14
# # %tensorflow_version 1.x
!pip install keras==2.1
!pip install 'h5py==2.10.0' --force-reinstall

**Manually restart runtime and then continue below**


In [None]:
# !pip install -q git+https://github.com/matterport/Mask_RCNN.git

In [None]:
# !git clone -q https://github.com/matterport/Mask_RCNN.git

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!cp -r /content/drive/MyDrive/maskrcnn/Mask_RCNN /content

In [None]:
!cd ~Mask_RCNN
!pip install -r /content/Mask_RCNN/requirements.txt

In [None]:
# !cp /content/drive/MyDrive/maskrcnn/mask_rcnn_coco.h5 /content

In [None]:
import os
import glob
import cv2
import pandas as pd
import xml.etree
from numpy import zeros, asarray

import Mask_RCNN.mrcnn.utils
import Mask_RCNN.mrcnn.config
import Mask_RCNN.mrcnn.model


class NuCLSDataset(Mask_RCNN.mrcnn.utils.Dataset):

	def load_dataset(self, dataset_dir, fold_num, is_train=True):
		self.add_class("dataset", 1, "Tumor")
		self.add_class("dataset", 2, "Mitotic_f")
		self.add_class("dataset", 3, "Stromal")
		self.add_class("dataset", 4, "Macroph")
		self.add_class("dataset", 5, "Lymphoc")
		self.add_class("dataset", 6, "Plasma C")
		self.add_class("dataset", 7, "Other")

		images_dir = dataset_dir + '/train_test_splits'
		annotations_dir = dataset_dir + '/csv/'

		if is_train:
		  fold_file = images_dir + '/fold_' + str(fold_num) + '_train.csv'
		else:
		  fold_file = images_dir + '/fold_' + str(fold_num) + '_test.csv'

		df = pd.DataFrame()
		df = pd.read_csv(fold_file)

		image_file_path = dataset_dir + '/rgb/'

		for i in range(len(df)):
      
			images_paths = glob.glob(image_file_path + str(df.loc[i,'slide_name']) + '*')

			for img_path in images_paths:

				image_id = img_path[len(img_path):][:-4] #filename[:-4]

				mask_path = img_path.replace('/QC/rgb/','/QC/mask/')
				ann_path = img_path.replace('/QC/rgb/','/QC/csv/')
				ann_path = ann_path.replace('.png','.csv')
				im = cv2.imread(img_path)
				h, w, c = im.shape
				df1 = pd.DataFrame()
				df1 = pd.read_csv(ann_path)
				img_write = False
				for i in range(len(df1)):
					class_name = df1.loc[i,'group']
					if not class_name in ['apoptotic_body', 'correction_apoptotic_body', 
                           'unlabeled', 'correction_unlabeled']:
						img_write = True
						break
						# if not class_name in class_names_list:
						# 	class_names_list += [class_name]
						# 	print(class_names_list)
			  # ann_path = annotations_dir + image_id + '.xml'

				if img_write:
					self.add_image('dataset', image_id=image_id, path=img_path, annotation=ann_path, h=h, w=w)

	def extract_boxes(self, filename, width, height):
   
		class_list1 = ['tumor', 'correction_tumor']
		class_list2 = ['mitotic_figure', 'correction_mitotic_figure']
		class_list3 = ['fibroblast', 'correction_fibroblast',
										'vascular_endothelium', 'correction_vascular_endothelium']
		class_list4 = ['macrophage', 'correction_macrophage']
		class_list5 = ['lymphocyte', 'correction_lymphocyte']
		class_list6 = ['plasma_cell', 'correction_plasma_cell']
		class_list7 = ['neutrophil', 'correction_neutrophil',
										'eosinophil', 'correction_eosinophil',
										'myoepithelium', 'correction_myoepithelium',
										'ductal_epithelium', 'correction_ductal_epithelium']

		df = pd.DataFrame()
		df = pd.read_csv(filename)

		boxes = list()
		class_ids = list()

		for i in range(len(df)):
			class_name = df.loc[i,'group']
			if not class_name in ['apoptotic_body', 'correction_apoptotic_body', 
                           'unlabeled', 'correction_unlabeled']:
				xmin = int(df.loc[i,'xmin'])
				ymin = int(df.loc[i,'ymin'])
				xmax = int(df.loc[i,'xmax'])
				ymax = int(df.loc[i,'ymax'])
				coors = [xmin, ymin, xmax, ymax]
				# print(class_name)
				if class_name in class_list1:
					id = 1
				if class_name in class_list2:
					id = 2
				if class_name in class_list3:
					id = 3
				if class_name in class_list4:
					id = 4
				if class_name in class_list5:
					id = 5
				if class_name in class_list6:
					id = 6
				if class_name in class_list7:
					id = 7
				boxes.append(coors)
				class_ids.append(id)

		# print(boxes, width, height)
		# print(class_ids)
		return boxes, width, height, class_ids

	def load_mask(self, image_id):
		info = self.image_info[image_id]
		path = info['annotation']
		width = info['w']
		height = info['h']
		boxes, w, h, class_ids = self.extract_boxes(path, width, height)
		masks = zeros([h, w, len(boxes)], dtype='uint8')
		# print(class_ids)

		# class_ids = list()
		for i in range(len(boxes)):
			box = boxes[i]
			row_s, row_e = box[1], box[3]
			col_s, col_e = box[0], box[2]
			masks[row_s:row_e, col_s:col_e, i] = 1
			# class_ids.append(self.class_names.index('tumor'))
		# print(masks, asarray(class_ids, dtype='int32'))
		return masks, asarray(class_ids, dtype='int32')


In [None]:
class TrainConfig(Mask_RCNN.mrcnn.config.Config):
    NAME = "Train_cfg"

    #IMAGE_MAX_DIM = 768
    #IMAGE_MIN_DIM = 256

    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    
    NUM_CLASSES = 7+1

    STEPS_PER_EPOCH = 1000 // IMAGES_PER_GPU

    VALIDATION_STEPS = 200 // IMAGES_PER_GPU

Train_config = TrainConfig()
Train_config.display()



In [None]:
# from matplotlib import pyplot

# for i in range(100000):
#   image_id = i
#   image = valid_dataset.load_image(image_id)
#   print(i, image.shape)

In [None]:
import imgaug as ia
import imgaug.augmenters as iaa
seq = iaa.Sequential([
    iaa.Fliplr(0.1), # horizontal flips
    iaa.Flipud(0.1), # vertical flips
    # iaa.Crop(percent=(0, 0.1)), # random crops
    # Small gaussian blur with random sigma between 0 and 0.5.
    # But we only blur about 50% of all images.
    # iaa.Sometimes(
    #     0.23,
    #     iaa.GaussianBlur(sigma=(0, 0.5))
    # ),
    # Strengthen or weaken the contrast in each image.
    # iaa.Sometimes(
    #     0.23,
    #     iaa.LinearContrast((0.75, 1.5))
    # ),
    # Add gaussian noise.
    # For 50% of all images, we sample the noise once per pixel.
    # For the other 50% of all images, we sample the noise per pixel AND
    # channel. This can change the color (not only brightness) of the
    # pixels.
    # iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
    # Make some images brighter and some darker.
    # In 20% of all cases, we sample the multiplier once per channel,
    # which can end up changing the color of the images.
    # iaa.Multiply((0.8, 1.2), per_channel=0.2),
    # Apply affine transformations to each image.
    # Scale/zoom them, translate/move them, rotate them and shear them.
    iaa.Affine(
        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
        # translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
        rotate=(-25, 25),
        # shear=(-2, 2)
    )
], random_order=True)

In [None]:
# !cp /content/drive/MyDrive/maskrcnn/NuCLS_mask_rcnn_mclass_r2.h5 /content/drive/MyDrive/maskrcnn/NuCLS_mask_rcnn_mclass_r3.h5

In [None]:
# train_set = NuCLSDataset()
# train_set.load_dataset(dataset_dir='/content/drive/MyDrive/QC', fold_num = 2, is_train=True)
# train_set.prepare()

**TRAIN**

In [None]:
import random

for i in range(100):
  folds = random.randint(0, 4)
  print('i=',i+1,' fold=',folds+1)
  # folds = 998
  train_set = NuCLSDataset()
  train_set.load_dataset(dataset_dir='/content/drive/MyDrive/QC', fold_num = int(folds+1), is_train=True)
  train_set.prepare()

  valid_dataset = NuCLSDataset()
  valid_dataset.load_dataset(dataset_dir='/content/drive/MyDrive/QC', fold_num = int(folds+1), is_train=False)
  valid_dataset.prepare()

  # if i==0:
  model = Mask_RCNN.mrcnn.model.MaskRCNN(mode='training', 
                            model_dir='/content/drive/MyDrive/maskrcnn/checkpoints/', 
                            config=Train_config)

  model.keras_model.load_weights(filepath='/content/drive/MyDrive/maskrcnn/NuCLS_mask_rcnn_mclass_r2.h5', 
                  by_name=True)

  model.train(train_dataset=train_set, 
        val_dataset=valid_dataset, 
        learning_rate=Train_config.LEARNING_RATE, 
        epochs=10,# augmentation = seq,
        layers='all')
  
  model_path = '/content/drive/MyDrive/maskrcnn/NuCLS_mask_rcnn_mclass_r2.h5'
  model.keras_model.save_weights(model_path)


In [None]:
# model.keras_model.save_weights('/content/drive/MyDrive/maskrcnn/NuCLS_mask_rcnn_mclass_r1.h5')

In [None]:
class TestConfig(Mask_RCNN.mrcnn.config.Config):
    NAME = "Test_cfg"

    GPU_COUNT = 1
    IMAGES_PER_GPU = 1
    
    NUM_CLASSES = 7+1


Test_config = TestConfig()
Test_config.display()

In [None]:
from numpy import expand_dims
from numpy import mean
from Mask_RCNN.mrcnn.model import load_image_gt
from Mask_RCNN.mrcnn.utils import compute_ap
from Mask_RCNN.mrcnn.utils import compute_recall
from Mask_RCNN.mrcnn.model import mold_image

def evaluate_model(dataset, model, cfg, threshold=0.5):
	APs = list()
	for image_id in dataset.image_ids:
		# load image, bounding boxes and masks for the image id
		image, image_meta, gt_class_id, gt_bbox, gt_mask = load_image_gt(dataset, cfg, image_id, use_mini_mask=False)
		# convert pixel values (e.g. center)
		scaled_image = mold_image(image, cfg)
		# convert image into one sample
		sample = expand_dims(scaled_image, 0)
		# make prediction
		yhat = model.detect(sample, verbose=0)
		# print(yhat)
		# extract results for first sample
		r = yhat[0]
		# calculate statistics, including AP
		AP, _, _, _ = compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'], iou_threshold=threshold)
		# store
		APs.append(AP)
	# calculate the mean AP across all images
	mAP = mean(APs)
	return mAP

def evaluate_model_1(dataset, model, cfg):
  APs = list(); ARs = list(); 
  for image_id in dataset.image_ids:
    image, image_meta, gt_class_id, gt_bbox, gt_mask = load_image_gt(dataset, cfg, image_id, use_mini_mask=False)
    scaled_image = mold_image(image, cfg)
    sample = expand_dims(scaled_image, 0)
    yhat = model.detect(sample, verbose=0)
    r = yhat[0]
    AP, _, _, _ = compute_ap(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'])
    AR, _ = compute_recall(r["rois"], gt_bbox, iou=0.5) 
    ARs.append(AR)
    APs.append(AP)
  # calculate the mean AP across all images
  mAP = mean(APs)
  mAR = mean(ARs) 
  return mAP, mAR



**Testing on Folds 1 to 5**

In [None]:
from matplotlib import pyplot
from matplotlib.patches import Rectangle
import numpy as np
import Mask_RCNN.mrcnn.visualize as visualize
import Mask_RCNN.mrcnn.utils as utils
from Mask_RCNN.mrcnn.model import log

In [None]:
model = Mask_RCNN.mrcnn.model.MaskRCNN(mode='inference', 
                          model_dir='/content/', 
                          config=Test_config)

model.keras_model.load_weights(filepath='/content/drive/MyDrive/maskrcnn/NuCLS_mask_rcnn_mclass_r2.h5', 
                  by_name=True)

for folds in range(5):
  print(' fold=',folds+1)

  valid_dataset = NuCLSDataset()
  valid_dataset.load_dataset(dataset_dir='/content/drive/MyDrive/QC', fold_num = int(folds+1), is_train=False)
  valid_dataset.prepare()

  # test_mAP = evaluate_model(valid_dataset, model, Test_config, threshold=0.5)
  # print("Test mAP: %.3f" % test_mAP)

  test_mAP, mARs_test = evaluate_model_1(valid_dataset, model, Test_config)

  f_score_test = (2 * test_mAP * mARs_test)/(test_mAP + mARs_test)

  print("Test mAP: %.3f" % test_mAP)
  print('f1-score-test', f_score_test)


In [None]:
pyplot.rcParams['figure.figsize'] = [12, 8]

# plot a number of photos with ground truth and predictions
def plot_actual_vs_predicted(dataset, model, cfg, n_images=5):
	# load image and mask
	for i in range(n_images):
		# load the image and mask
		image = dataset.load_image(i)
		mask, _ = dataset.load_mask(i)
		# convert pixel values (e.g. center)
		scaled_image = mold_image(image, cfg)
		# convert image into one sample
		sample = expand_dims(scaled_image, 0)
		# make prediction
		yhat = model.detect(sample, verbose=0)[0]
		# define subplot
		pyplot.subplot(n_images, 2, i*2+1)
		# plot raw pixel data
		pyplot.imshow(image)
		pyplot.title('Actual')
		# plot masks
		for j in range(mask.shape[2]):
			pyplot.imshow(mask[:, :, j], cmap='gray', alpha=0.5, interpolation='none')
		# get the context for drawing boxes
		pyplot.subplot(n_images, 2, i*2+2)
		# plot raw pixel data
		pyplot.imshow(image)
		pyplot.title('Predicted')
		ax = pyplot.gca()
		# plot each box
		for box in yhat['rois']:
			# get coordinates
			y1, x1, y2, x2 = box
			# calculate width and height of the box
			width, height = x2 - x1, y2 - y1
			# create the shape
			rect = Rectangle((x1, y1), width, height, fill=False, color='red')
			# draw the box
			ax.add_patch(rect)
	# show the figure
	pyplot.show()

In [None]:
pyplot.rcParams['figure.figsize'] = [12, 8]

# plot a number of photos with ground truth and predictions
def plot_actual_vs_predicted(dataset, model, cfg, n_images=5):
	# load image and mask
	for i in range(n_images):
		info = dataset.image_info[i]
		img_path = info['path']
		ann_path = info['annotation']
		# load the image and mask
		image = dataset.load_image(i)
		mask, class_ids = dataset.load_mask(i)
		# Compute Bounding box
		bbox = utils.extract_bboxes(mask)
    # Display image and additional stats
		# print("image_id ", image_id, valid_dataset.image_reference(image_id))
		log("image", image)
		log("mask", mask)
		log("class_ids", class_ids)
		log("bbox", bbox)

    # Run detection
		results = model.detect([image], verbose=1)

		# Visualize results
		r = results[0]


		# # convert pixel values (e.g. center)
		# scaled_image = mold_image(image, cfg)
		# # convert image into one sample
		# sample = expand_dims(scaled_image, 0)
		# # make prediction
		# # yhat = model.detect(sample, verbose=0)[0]


		# Display image and instances
		print(img_path,ann_path)
		visualize.display_instances(image, bbox, mask, class_ids, valid_dataset.class_names, show_mask=False)
		visualize.display_differences(image,
                        bbox, class_ids, mask,
                        r['rois'], r['class_ids'], r['scores'], r['masks'],
                        ['tumor'], title="image" + str(i), ax=None,
                        show_mask=False, show_box=True,
												iou_threshold=0.5, score_threshold=0.5)
		visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], valid_dataset.class_names, show_mask=False)
		# print(r['class_ids'])
		# print(results)
      



		# # plot raw pixel data
		# pyplot.imshow(image)
		# pyplot.title('Predicted')
		# ax = pyplot.gca()
		# # plot each box
		# for box in yhat['rois']:
		# 	# get coordinates
		# 	y1, x1, y2, x2 = box
		# 	# calculate width and height of the box
		# 	width, height = x2 - x1, y2 - y1
		# 	# create the shape
		# 	rect = Rectangle((x1, y1), width, height, fill=False, color='red')
		# 	# draw the box
		# 	ax.add_patch(rect)
	# show the figure
	pyplot.show()

In [None]:
image_ids = np.random.choice(valid_dataset.image_ids, 4)
for image_id in image_ids:
    image = valid_dataset.load_image(image_id)
    mask, class_ids = valid_dataset.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, valid_dataset.class_names)

    # Compute Bounding box
    bbox = utils.extract_bboxes(mask)

    # Display image and additional stats
    print("image_id ", image_id, valid_dataset.image_reference(image_id))
    log("image", image)
    log("mask", mask)
    log("class_ids", class_ids)
    log("bbox", bbox)
    # Display image and instances
    visualize.display_instances(image, bbox, mask, class_ids, valid_dataset.class_names)

In [None]:
folds = 1


print(' fold=',folds+1)

valid_dataset = NuCLSDataset()
valid_dataset.load_dataset(dataset_dir='/content/drive/MyDrive/QC', fold_num = int(folds+1), is_train=False)
valid_dataset.prepare()

plot_actual_vs_predicted(valid_dataset, model, Test_config, n_images=1)

In [None]:
plot_actual_vs_predicted(valid_dataset, model, Test_config, n_images=5)