<a href="https://colab.research.google.com/github/EternalSorrrow/bak/blob/master/ra2_feet_joint_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
#@title Install dependencies

!git clone https://github.com/matterport/Mask_RCNN.git

%cd Mask_RCNN
!python setup.py install
!pip show mask-rcnn

%cd ..
#!pip3 install imgaug

In [0]:
from google.colab import drive
drive.mount('/content/drive')

In [0]:
#@title Paths definition

train_set_path = 'drive/My Drive/Work/ML/RA2/ra2/train/'
subset_path = 'drive/My Drive/Work/ML/RA2/ra2/feet_subset/'

In [0]:
#@title Temp set files count

import shutil
import os

#for item in items_to_select:
#  shutil.copy(train_set_path + item, temp_set_path)

len(os.listdir(subset_path))

In [0]:
#@title Load annotation file

import json
import cv2

annotations = None

with open(subset_path + 'project.json') as json_file:
  annotations = json.load(json_file)
  annotations = list(annotations['_via_img_metadata'].values())

In [0]:
#@title Load images and parse annotations

def load_images(anns):
  files = dict()

  for ann in anns:
    if ann['regions']:
      f = cv2.imread(subset_path + ann['filename'])
      files[ann['filename']] = f
    else: #Skip images with no annotations
      pass
  
  return files

def load_annotations(anns):
  regions = dict()

  for ann in anns:
    path = ann['filename']
    ann = ann['regions']
    
    if ann:
      file_regions = dict()

      for region in ann:
        region_class = region['region_attributes']['joint']
        poly = list(zip(
            region['shape_attributes']['all_points_x'],
            region['shape_attributes']['all_points_y']
        ))
        file_regions[region_class] = poly
      
      regions[path] = file_regions
    else: #Skip images with no annotations
      pass

  return regions

imgs = load_images(annotations)
anns = load_annotations(annotations)

In [0]:
#@title Example image

import matplotlib.pyplot as plt
import numpy as np

print(len(anns.keys()), 'annotation sets found')
ex_fname = np.random.choice(list(imgs.keys()))

img = imgs[ex_fname].copy()
ans = anns[ex_fname].values()

randcol = lambda : (np.random.randint(255), np.random.randint(255), np.random.randint(255)) 

for poly in ans:
  poly = np.array(poly, dtype=np.int32)
  cv2.fillPoly(img, [poly], randcol())

plt.figure(figsize=(10, 10))
plt.imshow(img)

In [0]:
#@title Modular live loss plotter callback

#Modular live loss plotter for Keras models
#Allows to create custom layouts of per-batch or per-epoch plots for different metrics

#Monitor class defines a plot, which either may be batch or epoch-scoped, and may contain several graphs
#Batch monitor plots its values per batch, and refreshes itself on new epoch begin
#Epoch monitor plots its values per epoch, and performs no refresh
#All values/last N values displaying
#Log-scale/Linear scale displaying

#Plotter callback handles different Monitors and responds to the actual plotting
#Defines a grid where Monitors will be drawn, grid size, refresh rate in batches
#when the Monitors will be re-drawn in addition to per-epoch update
#Plotter can be silenced to disable plotting and only archivate per-epoch data

from IPython.display import clear_output
from keras.callbacks import Callback
import matplotlib.pyplot as plt

class Monitor():
    def __init__(self, scope='epoch', monitors= [ 'loss' ], plot_last=-1, log_scale=False, precision=4):
        self.scope = scope.lower()
        self.monitors = [ monitor.lower() for monitor in monitors ]
        self.plot_last = max(0, plot_last)
        self.x = []
        self.ys = [ [] for monitor in monitors ]
        self.log_scale = log_scale
        self.precision = precision

    def reinit(self):
        self.x = []
        self.ys = [ [] for monitor in self.monitors ]

    def update(self, iteration, logs={}):
        self.x.append(iteration)
        
        for i, monitor in enumerate(self.monitors):
            if logs.get(monitor) is not None:
                self.ys[i].append(logs.get(monitor))
            else:
                pass #Action to execute when cannot get info for a certain monitor

    def plot(self, axis):
        x_data = self.x[ -self.plot_last : ]
        y_array = [ y_data[ -self.plot_last : ] for y_data in self.ys ]

        for i, y_data in enumerate(y_array):
            label = self.monitors[i] + '_' + self.scope #Compose graph name
            if self.log_scale:
                axis.set_yscale('log') #Set up scale
                
            if len(x_data) == len(y_data): #If data are coherent, plot them
                axis.plot(x_data, y_data, label=label)

                if self.precision > 0 and len(y_data) > 0: #If there's a last point plotted, print its value
                    text = str(round(y_data[-1],  self.precision))
                    axis.text(x_data[-1], y_data[-1], text)
            else:
                continue
                
        label = {'batch' : 'Batches', 'epoch' : 'Epochs'} #Set up x-label
        axis.set_xlabel(label[self.scope])
        
        axis.legend()


class Plotter(Callback):
    def __init__(self, scale=5, n_cols=2, n_rows=1, monitors=[], refresh_rate=-1, silent=False):
        if (n_cols * n_rows < len(monitors)):
            raise ValueError('Grid is too small to fit all monitors!')

        self.n_cols = n_cols
        self.n_rows = n_rows
        self.scale = scale

        self.monitors = monitors

        self.batch_monitors, self.epoch_monitors = [], []

        for monitor in monitors:
            if monitor.scope == 'epoch':
                self.epoch_monitors.append(monitor)
            elif monitor.scope == 'batch':
                self.batch_monitors.append(monitor)

        self.refresh_rate = refresh_rate
        self.silent = False

    def on_train_begin(self, logs={}):
        pass

    def on_epoch_begin(self, epoch, logs={}):
        [ monitor.reinit() for monitor in self.batch_monitors ]

    def plot(self):
        clear_output(wait=True)

        figsize = ( self.scale * self.n_cols, self.scale * self.n_rows)
        fig, ax = plt.subplots(figsize=figsize, ncols=self.n_cols, nrows=self.n_rows)

        if self.n_cols * self.n_rows == 1:
          ax = np.array([ax])

        for index, axis in enumerate(ax.flat):
          if index < len(self.monitors):
              self.monitors[index].plot(axis)

        plt.show()

    def on_batch_end(self, batch, logs={}):
        [ monitor.update(batch, logs) for monitor in self.batch_monitors ]

        if self.silent or batch == 0 or self.refresh_rate <= 0 or batch % self.refresh_rate != 0:
            return

        self.plot()

    def on_epoch_end(self, epoch, logs={}):
        [ monitor.update(epoch, logs) for monitor in self.epoch_monitors ]

        if self.silent:
            return

        self.plot()

    def reinit(self):
      [ monitor.reinit() for monitor in self.monitors ]


In [0]:
#@title Import Mask R-CNN dependencies

%cd Mask_RCNN/

from mrcnn.config import Config
from mrcnn import model as modellib
from mrcnn import visualize
import mrcnn
from mrcnn.utils import Dataset
from mrcnn.model import MaskRCNN

from os import listdir
from xml.etree import ElementTree

%cd ..

In [0]:
train_samples = 80
val_samples = 8

classes_num = 6
max_instances_to_detect = int(classes_num * 2)

In [0]:
#@title Configuration definition

class FeetJointsConfig(Config):
    # give the configuration a recognizable name
    NAME = "FeetJoints_config"
 
    # set the number of GPUs to use along with the number of images
    # per GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 4
 
    # number of classes (we would normally add +1 for the background)
     # kangaroo + BG
    NUM_CLASSES = classes_num + 1
   
    # Number of training steps per epoch
    STEPS_PER_EPOCH = max(1, train_samples // IMAGES_PER_GPU)
    VALIDATION_STEPS = max(1, val_samples // IMAGES_PER_GPU)

    #Select backbone: resnet50 or resnet101
    BACKBONE = "resnet101"

    #Image resizing
    #IMAGE_RESIZE_MODE = "square"
    IMAGE_MIN_DIM = 512
    IMAGE_MAX_DIM = 512
    #IMAGE_MIN_SCALE = 2.0

    #RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128)
    
    # Learning rate
    LEARNING_RATE=0.001
    
    # Set lower confidence threshold
    DETECTION_MIN_CONFIDENCE = 0.0
    
    # setting Max ground truth instances
    MAX_GT_INSTANCES=6

    # max detected instances
    DETECTION_MAX_INSTANCES = max_instances_to_detect

config = FeetJointsConfig()
config.display()

In [0]:
#@title Dataset class definition

class FeetJoints(Dataset):
  def train_val_split(self, split = 32):
    
    annotations = json.load(open(os.path.join(subset_path, "project.json")))
    annotations = annotations['_via_img_metadata']
    annotations = list(annotations.values())  # don't need the dict keys
    annotations = [a for a in annotations if a['regions']]

    files = [ a['filename'] for a in annotations ]
    files = np.random.permutation(files)

    return files[:split], files[split:]

  def load_joints(self, subset):
    #Add Classes
    
    self.add_class("joints", 1, "mtp_1")
    self.add_class("joints", 2, "mtp_2")
    self.add_class("joints", 3, "mtp_3")
    self.add_class("joints", 4, "mtp_4")
    self.add_class("joints", 5, "mtp_5")
    self.add_class("joints", 6, "mtp_ip")
    
    
    # Load annotations
    # VGG Image Annotator (up to version 1.6) saves each image in the form:
    # { 'filename': '28503151_5b5b7ec140_b.jpg',
    #   'regions': {
    #       '0': {
    #           'region_attributes': {},
    #           'shape_attributes': {
    #               'all_points_x': [...],
    #               'all_points_y': [...],
    #               'name': 'polygon'}},
    #       ... more regions ...
    #   },
    #   'size': 100202
    # }
    # We mostly care about the x and y coordinates of each region
    # Note: In VIA 2.0, regions was changed from a dict to a list.
    annotations = json.load(open(os.path.join(subset_path, "project.json")))
    annotations = annotations['_via_img_metadata']
    annotations = list(annotations.values())  # don't need the dict keys

    #Skip files not included into our subset

    # The VIA tool saves images in the JSON even if they don't have any
    # annotations. Skip unannotated images and images not included into passed subset
    annotations = [a for a in annotations if a['regions'] and a['filename'] in subset ]
    assert len(annotations) == len(subset)

    # Add images
    for a in annotations:
        # Get the x, y coordinaets of points of the polygons that make up
        # the outline of each object instance. These are stores in the
        # shape_attributes (see json format above)
        # The if condition is needed to support VIA versions 1.x and 2.x.
        if type(a['regions']) is dict:
            polygons = [(r['region_attributes']['joint'], r['shape_attributes']) for r in a['regions'].values()]
        else:
            polygons = [(r['region_attributes']['joint'], r['shape_attributes']) for r in a['regions']] 

        # load_mask() needs the image size to convert polygons to masks.
        # Unfortunately, VIA doesn't include it in JSON, so we must read
        # the image. This is only managable since the dataset is tiny.
        image_path = os.path.join(train_set_path, a['filename'])
        image = cv2.imread(image_path)
        height, width = image.shape[:2]

        self.add_image(
            "joints",
            image_id=a['filename'],  # use file name as a unique image id
            path=image_path,
            width=width, height=height,
            polygons=polygons)
        
  def load_mask(self, image_id):
        """Generate instance masks for an image.
       Returns:
        masks: A bool array of shape [height, width, instance count] with
            one mask per instance.
        class_ids: a 1D array of class IDs of the instance masks.
        """
        
        image_info = self.image_info[image_id]
        if image_info["source"] != "joints":
          return super(self.__class__, self).load_mask(image_id)

        # Convert polygons to a bitmap mask of shape
        # [height, width, instance_count]

        info = self.image_info[image_id]
        mask = np.zeros([info["height"], info["width"], len(info["polygons"])],
                        dtype=np.uint8)
        
        classes_dict = dict([ (item['name'], item['id']) for item in self.class_info ])
        
        for j, p in info["polygons"]:
            i = classes_dict[j] - 1
            
            # Get indexes of pixels inside the polygon and set them to 1
            poly = np.array(list(zip(p['all_points_x'], p['all_points_y'])), dtype=np.int32)
            poly_map = np.zeros(shape=(mask.shape[0], mask.shape[1]), dtype=np.uint8)
            cv2.fillPoly(poly_map, [ poly ], 1)
            mask[:, :, i] = poly_map

        # Return mask, and array of class IDs of each instance. Since we have
        # one class ID only, we return an array of 1s
        return mask.astype(np.bool), np.arange(classes_num) + 1
    
  def image_reference(self, image_id):
        """Return the path of the image."""
        info = self.image_info[image_id]
        if info["source"] == "joints":
            return info["path"]
        else:
            super(self.__class__, self).image_reference(image_id)

def get_data():#Training Dataset
    
    dataset_train = FeetJoints()

    train_subset, val_subset = dataset_train.train_val_split(train_samples)

    dataset_train.load_joints(train_subset)
    dataset_train.prepare()#Validating Dataset
    
    dataset_val = FeetJoints()        
    dataset_val.load_joints(val_subset)    
    dataset_val.prepare()

    return dataset_train, dataset_val

train, val = get_data()

In [0]:
#@title Sample some dataset images

print("Image Count: {}, {}".format(len(train.image_ids), len(val.image_ids)))
print("Class Count: {}, {}".format(train.num_classes, val.num_classes))
for i, info in enumerate(train.class_info):
    print("{:3}. {:50}".format(i, info['name']))

image_ids = np.random.choice(train.image_ids, 4)
for image_id in image_ids:
    image = train.load_image(image_id)
    mask, class_ids = train.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, train.class_names, limit=6)

In [0]:
#@title Display random image with regions and BBs

from mrcnn import utils

# Load random image and mask.
image_id = np.random.choice(train.image_ids)
image = train.load_image(image_id)
mask, class_ids = train.load_mask(image_id)
# Compute Bounding box
bbox = utils.extract_bboxes(mask)

# Display image and additional stats
print("image_id ", image_id, train.image_reference(image_id))

# Display image and instances
visualize.display_instances(image, bbox, mask, class_ids, train.class_names)

In [0]:
#@title Create logs folder and get COCO weights

!mkdir logs
!wget https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5

In [0]:
#@title Define the model building function

MODEL_DIR = 'logs'
COCO_MODEL_PATH = 'mask_rcnn_coco.h5'

def get_model(model_dir,
              init_with = "coco",  # imagenet, coco, or last
              ):
  model = modellib.MaskRCNN(mode="training", config=config, model_dir=model_dir)
  
  if init_with == "imagenet":
      model.load_weights(model.get_imagenet_weights(), by_name=True)
  elif init_with == "coco":
      # Load weights trained on MS COCO, but skip layers that
      # are different due to the different number of classes
      # See README for instructions to download the COCO weights
      model.load_weights(COCO_MODEL_PATH, by_name=True,
                        exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                  "mrcnn_bbox", "mrcnn_mask"])
  return model

In [0]:
#@title Auxilary drawing function

def get_ax(rows=1, cols=1, size=8):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Change the default size attribute to control the size
    of rendered images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

In [0]:
#@title Define and visualize augmentations

import imgaug as ia
import imgaug.augmenters as iaa

seq = iaa.Sequential([
    
    iaa.Fliplr(0.5), # horizontal flips
    iaa.Crop(percent=(0, 0.1)), # random crops
    
    # Small gaussian blur with random sigma between 0 and 0.5.
    # But we only blur about 50% of all images.
    iaa.Sometimes(
        0.5,
        iaa.GaussianBlur(sigma=(0, 0.5))
    ),
    
    # Strengthen or weaken the contrast in each image.
    iaa.LinearContrast((0.75, 1.5)),
    
    # Add gaussian noise.
    # For 50% of all images, we sample the noise once per pixel.
    # For the other 50% of all images, we sample the noise per pixel AND
    # channel. This can change the color (not only brightness) of the
    # pixels.
    #iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
    
    # Make some images brighter and some darker.
    # In 20% of all cases, we sample the multiplier once per channel,
    # which can end up changing the color of the images.
    iaa.Multiply((0.8, 1.2), per_channel=0.2),
    
    # Apply affine transformations to each image.
    # Scale/zoom them, translate/move them, rotate them and shear them.
    iaa.Affine(
        scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
        translate_percent={"x": (0.05, 0.05), "y": (-0.05, 0.05)},
        rotate=(-5, 5),
        #shear=(-8, 8)
    )
], random_order=True) # apply augmenters in random order

axes = get_ax(cols=4, rows=3, size=4)

for ax in axes.flat:
  image_id = np.random.choice(train.image_ids)
  image = train.load_image(image_id)
  image = seq(images=[ image ])[0]

  ax.imshow(image)

In [0]:
model = get_model(MODEL_DIR, init_with = 'coco')

In [0]:
#@title LR scheduler

from keras.callbacks import LearningRateScheduler

def get_schedule(base_lr = 0.001, momentum=0.995):
  return lambda x: base_lr * momentum ** x

schedule = get_schedule(momentum=0.99)
lr_scheduler = LearningRateScheduler(schedule)

ax = get_ax()
ax.plot([ schedule(i) for i in range(300) ])

In [0]:
#@title Define plotter monitors

monitors = [
    Monitor(scope='epoch', monitors = ['loss', 'val_loss'], plot_last=128),
]

plotter = Plotter(monitors=monitors, n_rows=1, n_cols=1, scale=6, refresh_rate=-1)

In [0]:
#@title Remove previously saved checkpoints

!rm -r logs

In [0]:
#@title Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.
head_epochs = 300

model.train(
    train, val, 
    learning_rate=config.LEARNING_RATE, 
    epochs=head_epochs, 
    layers='heads',
    augmentation=seq,
    custom_callbacks = [ plotter, lr_scheduler ]
)

In [0]:
#@title Fine tune all layers
# Passing layers="all" trains all layers. You can also 
# pass a regular expression to select which layers to
# train by name pattern.
fine_epochs = 0

model.train(
    train, val, 
    learning_rate=config.LEARNING_RATE * 0.1,
    epochs=head_epochs + fine_epochs, 
    layers="all",
    augmentation=seq,
    custom_callbacks = [ plotter, lr_scheduler ]
)

In [0]:
#@title Rebuild the model for inference

class InferenceConfig(FeetJointsConfig):
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

inference_config = InferenceConfig()

# Recreate the model in inference mode
model = modellib.MaskRCNN(mode="inference", 
                          config=inference_config,
                          model_dir=MODEL_DIR)

# Get path to saved weights
# Either set a specific path or find last trained weights
# model_path = os.path.join(ROOT_DIR, ".h5 file name here")
model_path = model.find_last()

# Load trained weights
print("Loading weights from ", model_path)
model.load_weights(model_path, by_name=True)

In [0]:
#@title Test on random image, display GT first

# Val ground truth
image_id = np.random.choice(val.image_ids)
original_image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(val, inference_config, 
                           image_id, use_mini_mask=False)

visualize.display_instances(original_image, gt_bbox, gt_mask, gt_class_id, 
                            train.class_names, figsize=(16, 16))

In [0]:
#@title Prediction

results = model.detect([original_image], verbose=1)

r = results[0]
visualize.display_instances(original_image, r['rois'], r['masks'], r['class_ids'], 
                            val.class_names, r['scores'], ax=get_ax(size=16))