In [1]:
import torch, detectron2, cv2
!nvcc --version
print("\nTorch CUDA version:", torch.version.cuda)
print("\nTorch version:", torch.__version__)
print("\ndetectron2:", detectron2.__version__)
print("\ncv2 version:",cv2.__version__)


nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Fri_Sep__8_19:56:38_Pacific_Daylight_Time_2023
Cuda compilation tools, release 12.3, V12.3.52
Build cuda_12.3.r12.3/compiler.33281558_0

Torch CUDA version: 12.1

Torch version: 2.1.1

detectron2: 0.6

cv2 version: 4.8.1


In [2]:
print("Python version: ")
!python --version

Python version: 
Python 3.8.18


In [3]:
# Some basic setup:
# Setup detectron2 logger
from detectron2.utils.logger import setup_logger
setup_logger()
import numpy as np
import os, json, cv2, random
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor,DefaultTrainer
from detectron2.config import get_cfg
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.utils.visualizer import ColorMode,GenericMask,Visualizer
from detectron2.structures.keypoints import heatmaps_to_keypoints
from detectron2.structures.keypoints_hack import heatmaps_to_keypoints2, heatmaps_to_keypoints_iterative, heatmaps_modify_iterative
from matplotlib import pyplot as plt
import glob
import random
from detectron2.data import build_detection_test_loader
from detectron2.evaluation import (
    CityscapesInstanceEvaluator,
    CityscapesSemSegEvaluator,
    COCOEvaluator,
    COCOPanopticEvaluator,
    LVISEvaluator,
    PascalVOCDetectionEvaluator,
    SemSegEvaluator,
    DatasetEvaluator,
    inference_on_dataset,
    print_csv_format,
    verify_results,
)
import cv2 

In [None]:
cd C:\R_projects\spat_1f_noise\deep_learning\detectron2\custom_training

In [5]:
# Register datasets and populate metadata
register_coco_instances("cat_data_train", {}, ".\coco_train.json", ".\datasets\master_sample")
register_coco_instances("cat_data_test", {}, ".\coco_test.json", ".\datasets\master_sample")
register_coco_instances("cat_data_val", {}, ".\coco_val.json", ".\datasets\master_sample")

MetadataCatalog.get("cat_data_train").set(thing_classes=["cat"])
MetadataCatalog.get("cat_data_train").set(thing_colors=(0,0,255))
MetadataCatalog.get("cat_data_train").set(keypoint_names=["head","middle","tail"])
MetadataCatalog.get("cat_data_train").set(keypoint_flip_map=[])
MetadataCatalog.get("cat_data_test").set(thing_classes=["cat"])
MetadataCatalog.get("cat_data_test").set(thing_colors=(0,0,255))
MetadataCatalog.get("cat_data_test").set(keypoint_names=["head","middle","tail"])
MetadataCatalog.get("cat_data_test").set(keypoint_flip_map=[])
MetadataCatalog.get("cat_data_val").set(thing_classes=["cat"])
MetadataCatalog.get("cat_data_val").set(thing_colors=(0,0,255))
MetadataCatalog.get("cat_data_val").set(keypoint_names=["head","middle","tail"])
MetadataCatalog.get("cat_data_val").set(keypoint_flip_map=[])

namespace(name='cat_data_val',
          json_file='.\\coco_val.json',
          image_root='.\\datasets\\master_sample',
          evaluator_type='coco',
          thing_classes=['cat'],
          thing_colors=(0, 0, 255),
          keypoint_names=['head', 'middle', 'tail'],
          keypoint_flip_map=[])

In [6]:
# Define custom data maper for augmentation and custom validation data loss eval hook

from detectron2.engine.hooks import HookBase
from detectron2.evaluation import inference_context
from detectron2.utils.logger import log_every_n_seconds
from detectron2.data import DatasetMapper, build_detection_test_loader
from detectron2.engine import DefaultTrainer
import detectron2.utils.comm as comm
import torch
import time
import datetime
import logging
from detectron2.data import detection_utils as utils
import detectron2.data.transforms as T
import copy

def custom_mapper(dataset_dict):
    dataset_dict = copy.deepcopy(dataset_dict)  # it will be modified by code below
    image = utils.read_image(dataset_dict["file_name"], format="BGR")
    transform_list = [
        T.Resize((1000,1000)),
        T.RandomBrightness(0.8, 1.8),
        T.RandomContrast(0.6, 1.3),
        T.RandomSaturation(0.8, 1.4),
        T.RandomRotation(angle=[90, 90]),
        T.RandomLighting(0.7),
        T.RandomFlip(prob=0.5, horizontal=False, vertical=True),
        T.RandomFlip(prob=0.5, horizontal=True, vertical=False),
    ]
    image, transforms = T.apply_transform_gens(transform_list, image)
    dataset_dict["image"] = torch.as_tensor(image.transpose(2, 0, 1).astype("float32"))

    annos = [
        utils.transform_instance_annotations(obj, transforms, image.shape[:2], 
                                             keypoint_hflip_indices = [0,1,2])
        for obj in dataset_dict.pop("annotations")
        if obj.get("iscrowd", 0) == 0
    ]
    instances = utils.annotations_to_instances(annos, image.shape[:2])
    dataset_dict["instances"] = instances
    #dataset_dict["instances"] = detectron2.data.detection_utils.filter_empty_instances(instances)
    return dataset_dict

class LossEvalHook(HookBase):
    def __init__(self, eval_period, model, data_loader):
        self._model = model
        self._period = eval_period
        self._data_loader = data_loader
    
    def _do_loss_eval(self):
        # Copying inference_on_dataset from evaluator.py
        total = len(self._data_loader)
        num_warmup = min(5, total - 1)
            
        start_time = time.perf_counter()
        total_compute_time = 0
        losses = []
        for idx, inputs in enumerate(self._data_loader):            
            if idx == num_warmup:
                start_time = time.perf_counter()
                total_compute_time = 0
            start_compute_time = time.perf_counter()
            if torch.cuda.is_available():
                torch.cuda.synchronize()
            total_compute_time += time.perf_counter() - start_compute_time
            iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
            seconds_per_img = total_compute_time / iters_after_start
            if idx >= num_warmup * 2 or seconds_per_img > 5:
                total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start
                eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1)))
                log_every_n_seconds(
                    logging.INFO,
                    "Loss on Validation  done {}/{}. {:.4f} s / img. ETA={}".format(
                        idx + 1, total, seconds_per_img, str(eta)
                    ),
                    n=5,
                )
            loss_batch = self._get_loss(inputs)
            losses.append(loss_batch)
        mean_loss = np.mean(losses)
        self.trainer.storage.put_scalar('validation_loss', mean_loss)
        comm.synchronize()

        return losses
            
    def _get_loss(self, data):
        # How loss is calculated on train_loop 
        metrics_dict = self._model(data)
        metrics_dict = {
            k: v.detach().cpu().item() if isinstance(v, torch.Tensor) else float(v)
            for k, v in metrics_dict.items()
        }
        total_losses_reduced = sum(loss for loss in metrics_dict.values())
        return total_losses_reduced
        
        
    def after_step(self):
        next_iter = self.trainer.iter + 1
        is_final = next_iter == self.trainer.max_iter
        if is_final or (self._period > 0 and next_iter % self._period == 0):
            self._do_loss_eval()
        self.trainer.storage.put_scalars(timetest=12)

class CustomTrainer(DefaultTrainer):
    """
    Custom Trainer deriving from the "DefaultTrainer"

    Overloads build_hooks to add a hook to calculate loss on the test set during training.
    """

    def build_hooks(self):
        hooks = super().build_hooks()
        hooks.insert(-1, LossEvalHook(
            200, # Frequency of calculation - every 200 iterations here
            self.model,
            build_detection_test_loader(
                self.cfg,
                self.cfg.DATASETS.TEST[0],
                mapper=custom_mapper
            )
        ))

        return hooks


In [7]:
# Set model configs
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("cat_data_train",)
cfg.DATASETS.TEST = ("cat_data_val", ) # Called test in detectron2, but is actually the validation data
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.DATALOADER.NUM_WORKERS = 2
cfg.DATALOADER.FILTER_EMPTY_ANNOTATIONS = False 
cfg.TEST.EVAL_PERIOD = 0 # Turn off COCO evluator. 
cfg.SOLVER.IMS_PER_BATCH = 2  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.0001  
cfg.SOLVER.MAX_ITER = 12000    
cfg.SOLVER.LR_SCHEDULER_NAME = "WarmupCosineLR"
cfg.SOLVER.WARMUP_ITERS = 500
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # The "RoIHead batch size"
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1 
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.03 # Set to lower?
cfg.TEST.DETECTIONS_PER_IMAGE = 1
cfg.SOLVER.CHECKPOINT_PERIOD = 500
cfg.MODEL.ROI_KEYPOINT_HEAD.NUM_KEYPOINTS = 3
cfg.TEST.KEYPOINT_OKS_SIGMAS = [0.1, 0.3, 0.1]
# cfg.INPUT.RANDOM_FLIP = "horizontal"
cfg.MODEL.MASK_ON = True
cfg.MODEL.KEYPOINT_ON = True
cfg.MODEL.ROI_KEYPOINT_HEAD.LOSS_WEIGHT = 5
cfg.MODEL.ROI_KEYPOINT_HEAD.NORMALIZE_LOSS_BY_VISIBLE_KEYPOINTS = False
#cfg.MODEL.ROI_KEYPOINT_HEAD.ITERATIVE = True
cfg.INPUT.USE_DIFF = False                      # Include difference in green channel 10 frames ago as the 4th channel if True
cfg.OUTPUT_DIR = "./modelv7_1"
#cfg.MODEL.BACKBONE.FREEZE_AT = 0
cfg.INPUT.MAX_SIZE_TEST = 1000
cfg.INPUT.MAX_SIZE_TRAIN = 1000
cfg.INPUT.MIN_SIZE_TEST = 800
cfg.INPUT.MIN_SIZE_TRAIN = 1000

cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN = 5000
cfg.MODEL.RPN.PRE_NMS_TOPK_TEST = 3000
cfg.MODEL.RPN.POST_NMS_TOPK_TRAIN = 2500
cfg.MODEL.RPN.POST_NMS_TOPK_TEST = 2000
cfg.MODEL.RPN.NMS_THRESH = 0.7


os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)

In [8]:
print(cfg)

CUDNN_BENCHMARK: False
DATALOADER:
  ASPECT_RATIO_GROUPING: True
  FILTER_EMPTY_ANNOTATIONS: False
  NUM_WORKERS: 2
  REPEAT_THRESHOLD: 0.0
  SAMPLER_TRAIN: TrainingSampler
DATASETS:
  PRECOMPUTED_PROPOSAL_TOPK_TEST: 1000
  PRECOMPUTED_PROPOSAL_TOPK_TRAIN: 2000
  PROPOSAL_FILES_TEST: ()
  PROPOSAL_FILES_TRAIN: ()
  TEST: ('cat_data_val',)
  TRAIN: ('cat_data_train',)
GLOBAL:
  HACK: 1.0
INPUT:
  CROP:
    ENABLED: False
    SIZE: [0.9, 0.9]
    TYPE: relative_range
  FORMAT: BGR
  MASK_FORMAT: polygon
  MAX_SIZE_TEST: 1000
  MAX_SIZE_TRAIN: 1000
  MIN_SIZE_TEST: 800
  MIN_SIZE_TRAIN: 1000
  MIN_SIZE_TRAIN_SAMPLING: choice
  RANDOM_FLIP: horizontal
  USE_DIFF: False
MODEL:
  ANCHOR_GENERATOR:
    ANGLES: [[-90, 0, 90]]
    ASPECT_RATIOS: [[0.5, 1.0, 2.0]]
    NAME: DefaultAnchorGenerator
    OFFSET: 0.0
    SIZES: [[32], [64], [128], [256], [512]]
  BACKBONE:
    FREEZE_AT: 2
    NAME: build_resnet_fpn_backbone
  DEVICE: cuda
  FPN:
    FUSE_TYPE: sum
    IN_FEATURES: ['res2', 'res3', '

In [None]:
# Train model
trainer = CustomTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()

In [None]:
# Look at model performance over iterations
%load_ext tensorboard
#%reload_ext tensorboard
%tensorboard --logdir "./modelv7_1"

### Go to directroy to delete temp file if TensorBoard fails to launch
# !del /S C:\Users\vsbpa\AppData\Local\Temp\.tensorboard-info

In [None]:
# Load fitted model as predictor
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_0007999.pth")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.3  # set to 0.3 for now, but actually filtered to 0.7 in R
predictor = DefaultPredictor(cfg)

In [None]:
# For writing ground truths and predictions into a csv file that spat1f can process in R
import csv
from datetime import datetime

# Function for writing csv
def write_csv(path, df):
    keys = df[0].keys()
    with open(path, "w", newline = "") as output_file: 
        dict_writer = csv.DictWriter(output_file, keys)
        dict_writer.writeheader()
        dict_writer.writerows(df)

# Extract predicted annotations
def extract_pred_annotations(predictor, fi, data_name):
    inst = predictor(cv2.imread(fi))["instances"].to("cpu")
    data = {}
    data["file_name"] = fi
    data["image_size"] = list(inst.image_size)
    try: data["thing_class"] = MetadataCatalog.get(data_name).get("thing_classes")[inst.pred_classes[0]]
    except: data["thing_class"] = "NA"
    try: data["score"] = inst.scores.tolist()[0]
    except: data["score"] = "NA"
    try: data["keypoints"] = np.array(inst.pred_keypoints.tolist()[0]).ravel().tolist()
    except: data["keypoints"] = "NA"
    try: data["bbox"] = inst.pred_boxes.tensor.tolist()[0]
    except: data["bbox"] = "NA"
    try:
        cont, _ = cv2.findContours(inst.pred_masks.numpy()[0,:,:].astype('uint8'),cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
        data["polygon"] = cont[0].ravel().tolist()
    except: 
        data["polygon"] = "NA"
    data["type"] = "prediction"
    return(data)

# Extract ground truth annotations
def extract_gt_annotations(data_dict, data_name):
    data = {}
    data["file_name"] = data_dict["file_name"]
    data["image_size"] = [data_dict["width"], data_dict["height"]]
    
    try: ann = data_dict["annotations"][0] # One instance for now
    except: ann = {}
    try: data["thing_class"] = MetadataCatalog.get('cat_data_test').get("thing_classes")[ann["category_id"]]
    except: data["thing_class"] = "NA"
    data["score"] = "NA"
    try: data["keypoints"] = ann["keypoints"]
    except: data["keypoints"] = "NA"
    try: data["bbox"] = ann["bbox"]
    except: data["bbox"] = "NA"
    try: data["polygon"] = ann["segmentation"][0]
    except: data["polygon"] = "NA"
    data["type"] = "ground_truth"
    return(data)
    
# Nice wrapper for prediction and ground truth data extraction and formatting
def img_inference2(predictor, data_name, inference_info = ""):
    d = DatasetCatalog.get(data_name)
    dataset = []
    tot = len(d)
    for i in range(tot):
        print(f"{i+1} of {tot}", end = "\r")
        fi = d[i]["file_name"]
        data = extract_pred_annotations(predictor, fi, data_name)
        data["inference_info"] = inference_info
        dataset.append(data)
        data = extract_gt_annotations(d[i], data_name)
        data["inference_info"] = inference_info
        dataset.append(data)
    return(dataset)

# Wrapper for applying img_inference2() to a registered dataset 
def write_img_inference2(predictor, write_path, file_name_ID, data_name, model_ver):
    now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    inference_info = model_ver + "__" + now
    df = img_inference2(predictor, data_name, inference_info)
    write_csv(os.path.join(write_path, file_name_ID + "_inference.csv"), df)

In [None]:
# For writing predictions

# For performing model inference on all images in a subdirectory corresponding to the supplied rep_ID in a root directory
def img_inference(predictor, rep_ID, root_path, inference_info):
    dataset = []
    f = glob.glob(os.path.join(root_path, rep_ID,"*[!_diff].jpg"))
    tot = len(f)
    for i in range(tot):
        print(f"{i+1} of {tot}", end = "\r")
        fi = f[i]
        inst = predictor(cv2.imread(fi))["instances"].to("cpu")
        data = {}
        data["file_name"] = fi
        data["image_size"] = list(inst.image_size)
        try: data["thing_class"] = MetadataCatalog.get('cat_data_test').get("thing_classes")[inst.pred_classes[0]]
        except: data["thing_class"] = "NA"
        try: data["score"] = inst.scores.tolist()[0]
        except: data["score"] = "NA"
        try: data["keypoints"] = np.array(inst.pred_keypoints.tolist()[0]).ravel().tolist()
        except: data["keypoints"] = "NA"
        try: data["bbox"] = inst.pred_boxes.tensor.tolist()[0]
        except: data["bbox"] = "NA"
        try:
            cont, _ = cv2.findContours(inst.pred_masks.numpy()[0,:,:].astype('uint8'),cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)
            data["polygon"] = cont[0].ravel().tolist()
        except: 
            data["polygon"] = "NA"
        data["inference_info"] = inference_info
        dataset.append(data)
    return(dataset)

# Wrapper for writing model inference on all images in a subdirectory of the root directory 
def write_img_inference(predictor, write_path, rep_ID, read_path, model_ver):
    now = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    inference_info = model_ver + "__" + now
    df = img_inference(predictor, rep_ID, read_path, inference_info)
    write_csv(os.path.join(write_path, rep_ID + "_inference.csv"), df)

In [None]:
# Perform inference on the testing dataset, saving both groundtruths and predictions
write_img_inference2(
    predictor, 
    write_path = "C:/R_projects/deep_learning_playground",
    file_name_ID = "master_modelv7_1_7999iter_test",
    data_name = "cat_data_test", 
    model_ver = "modelv7.1"
    )

In [None]:
# Perform inference on the training dataset, saving both groundtruths and predictions
write_img_inference2(
    predictor, 
    write_path = "C:/R_projects/deep_learning_playground",
    file_name_ID = "master_modelv7_1_7999iter_train",
    data_name = "cat_data_train", 
    model_ver = "modelv7.1"
    )

In [None]:
# Get a list of rep_IDs for looping
rep_IDs = [os.path.basename(x) 
 for x in 
 glob.glob("C:/R_projects/spat_1f_noise/processed_feed/rep*", recursive=False)]

In [None]:
# Make model inference on all rep_IDs
for ID in rep_IDs:
    print(f"Processing {ID}", end = "\n")
    write_img_inference(
    predictor, 
    write_path = "C:/R_projects/deep_learning_playground/inferences",
    rep_ID = ID,
    read_path = "C:/R_projects/spat_1f_noise/processed_feed", 
    model_ver = "modelv7.1"
    )
  