FIXME -> have to get tested

# YOLACT
Config as Parameter Fork (+ Python 3.12)

This file shows, how you can use this fork.

---
### Installation

1. Install Anaconda
2. Open your bash/terminal and navigate to this folder
3. 
    ```python
    conda env create -f environment.yml
    ```

---
### Imports

In [5]:
# add nn-lib
import sys
sys.path.append("./yolact")

# not needed here but if is outside from this folder

In [6]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

# Utils
import time
import math
import random
from IPython.display import clear_output

# Image Utils
import numpy as np
import cv2
import matplotlib.pyplot as plt

# PyTorch & Torch-Utils
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from torchvision import datasets, transforms
import torchvision

# Import YOLACT - import what you need
from yolact import Yolact
from data import Config, yolact_base_config
from backbone import ResNetBackbone
from utils.augmentations import FastBaseTransform
from utils.logger import Log
from eval import prep_display
# from layers.output_utils import postprocess
from layers.modules import MultiBoxLoss
from utils.functions import MovingAverage
from layers.output_utils import postprocess
from train import train

RuntimeError: No CUDA GPUs are available

---
### Configurations

In [None]:
activation_func = Config({
    'tanh':    torch.tanh,
    'sigmoid': torch.sigmoid,
    'softmax': lambda x: torch.nn.functional.softmax(x, dim=-1),
    'relu':    lambda x: torch.nn.functional.relu(x, inplace=True),
    'none':    lambda x: x,
})

In [None]:
data_config = Config({
    'name': "WISDOM",

    # Training images and annotations
    'train_images': "~/data/train/images",
    'train_info':   "-",
    'train_img_folder_path': "~/data/train/images/images", 
    'train_mask_folder_path': "~/data/train/images/masks", 

    # Validation images and annotations.
    'valid_images': "~/data/test/images",
    'valid_info':   "-",
    'valid_img_folder_path': "~/data/test/images/images", 
    'valid_mask_folder_path': "~/data/test/images/masks", 

    # Whether or not to load GT. If this is False, eval.py quantitative evaluation won't work.
    'has_gt': True,

    # A list of names for each of you classes.
    'class_names': ["object"]*80,
})

In [None]:
resnet_transform = Config({
    'channel_order': 'RGB',
    'normalize': True,
    'subtract_means': False,
    'to_float': False,
})

In [None]:
backbone_config = Config({
    'name': 'ResNet101',
    'path': 'resnet101_reducedfc.pth',
    'type': ResNetBackbone,
    'args': ([3, 4, 23, 3],),
    'transform': resnet_transform,

    'selected_layers': list(range(1, 4)),
    'pred_scales': [[24], [48], [96], [192], [384]],
    'pred_aspect_ratios': [ [[1, 1/2, 2]] ]*5,

    'use_pixel_scales': True,
    'preapply_sqrt': False,
    'use_square_anchors': True,
})

In [None]:
fpn_config = Config({
    'num_features': 256,
    'interpolation_mode': 'bilinear',
    'num_downsample': 2,
    'use_conv_downsample': True,
    'pad': True,
    'relu_downsample_layers': False,
    'relu_pred_layers': True,
})

In [None]:
train_yolact_config = Config({

    'name': "YOLACT Example",

    ############
    ### Data ###
    ############

    'dataset': data_config,
    'num_classes': len(data_config.class_names) + 1, # This should include the background class
    'max_size': 550,

    #    in SSD
    # Randomize hue, vibrance, etc.
    'augment_photometric_distort': True,
    # Have a chance to scale down the image and pad (to emulate smaller detections)
    'augment_expand': True,
    # Potentialy sample a random crop from the image and put it in a random place
    'augment_random_sample_crop': True,
    # Mirror the image with a probability of 1/2
    'augment_random_mirror': True,
    # Flip the image vertically with a probability of 1/2
    'augment_random_flip': False,
    # With uniform probability, rotate the image [0,90,180,270] degrees
    'augment_random_rot90': False,



    ########################
    ### Training Details ###
    ########################

    'max_iter': 20*(50000//5), 
    'lr': 1e-3,
    'momentum': 0.9,
    'freeze_bn': False,
    'fpn': fpn_config,

    'decay': 5e-4,
    'gamma': 0.1,
    'lr_steps': (280000, 600000, 700000, 750000),
    'lr_warmup_init': 1e-4,
    'lr_warmup_until': 500,

    #    backbone
    'backbone': backbone_config,

    #     scale loss
    'conf_alpha': 1,
    'bbox_alpha': 1.5,
    'mask_alpha': 0.4 / 256 * 140 * 140, 

    'use_semantic_segmentation_loss': True,
    'semantic_segmentation_alpha': 1,

    'use_mask_scoring': False,
    'mask_scoring_alpha': 1,

    'use_focal_loss': False,
    'focal_loss_alpha': 0.25,
    'focal_loss_gamma': 2,
    'focal_loss_init_pi': 0.01,



    #################
    ### Detection ###
    #################

    'max_num_detections': 100,
    'eval_mask_branch': True,   # False,
    
    'nms_top_k': 200,
    'nms_conf_thresh': 0.005,
    'nms_thresh': 0.5,

    'mask_type': 1,
    'mask_size': 6.125,
    'masks_to_train': 100,
    'mask_proto_src': 0,
    'mask_proto_net': [(256, 3, {'padding': 1})] * 3 + [(None, -2, {}), (256, 3, {'padding': 1})] + [(32, 1, {})],
    'mask_proto_bias': False,
    'mask_proto_prototype_activation': activation_func.relu,
    'mask_proto_mask_activation': activation_func.sigmoid,
    'mask_proto_coeff_activation': activation_func.tanh,
    'mask_proto_crop': True,
    'mask_proto_crop_expand': 0,
    'mask_proto_loss': None,
    'mask_proto_binarize_downsampled_gt': True,
    'mask_proto_normalize_mask_loss_by_sqrt_area': False,
    'mask_proto_reweight_mask_loss': False,
    'mask_proto_grid_file': 'data/grid.npy',
    'mask_proto_use_grid':  False,
    'mask_proto_coeff_gate': False,
    'mask_proto_prototypes_as_features': False,
    'mask_proto_prototypes_as_features_no_grad': False,
    'mask_proto_remove_empty_masks': False,
    'mask_proto_reweight_coeff': 1,
    'mask_proto_coeff_diversity_loss': False,
    'mask_proto_coeff_diversity_alpha': 1,
    'mask_proto_normalize_emulate_roi_pooling': True,
    'mask_proto_double_loss': False,
    'mask_proto_double_loss_alpha': 1,
    'mask_proto_split_prototypes_by_head': False,
    'mask_proto_crop_with_pred_box': False,
    'mask_proto_debug': False,

    'discard_box_width': 4 / 550,
    'discard_box_height': 4 / 550,

    'share_prediction_module': True,
    'ohem_use_most_confident': False,

    'use_class_balanced_conf': False,

    'use_sigmoid_focal_loss': False,

    'use_objectness_score': False,

    'use_class_existence_loss': False,
    'class_existence_alpha': 1,

    'use_change_matching': False,

    'extra_head_net': [(256, 3, {'padding': 1})],

    'head_layer_params': {'kernel_size': 3, 'padding': 1},

    'extra_layers': (0, 0, 0),

    'positive_iou_threshold': 0.5,
    'negative_iou_threshold': 0.4,

    'ohem_negpos_ratio': 3,

    'crowd_iou_threshold': 0.7,
    
    'force_cpu_nms': True,

    'use_coeff_nms': False,

    'use_instance_coeff': False,
    'num_instance_coeffs': 64,

    'train_masks': True,
    'train_boxes': True,
    'use_gt_bboxes': False,

    'preserve_aspect_ratio': False,

    'use_prediction_module': False,

    'use_yolo_regressors': False,
    
    'use_prediction_matching': False,

    'delayed_settings': [],

    'no_jit': False,

    'mask_dim': None,

    'use_maskiou': True, 
    
    'maskiou_net': [(8, 3, {'stride': 2}), (16, 3, {'stride': 2}), (32, 3, {'stride': 2}), (64, 3, {'stride': 2}), (128, 3, {'stride': 2})],

    'discard_mask_area': 5*5, # -1,

    'maskiou_alpha': 25, # 6.125,
    'rescore_mask': True,
    'rescore_bbox': False,
    'maskious_to_train': -1,
})

In [None]:
cfg = train_yolact_config.copy()

---
### Train

In [None]:
train(configuration=cfg, should_compute_validation_map=False)

---
### Inference

In [None]:
img_path = f"./res/test.jpg"
image = cv2.imread(img_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
image = torch.from_numpy(image).cuda().float()

prepared_image = FastBaseTransform()(image.unsqueeze(0))

In [None]:
with torch.no_grad():
    model = Yolact()
    model.eval()
    model.load_weights("./weights/example.pth")
    model.cuda()

    # create and visualize the results
    preds = model(prepared_image)

    img_numpy = prep_display(preds, image, None, None, undo_transform=False,
                                configuration=cfg)
    
    h, w, _ = image.shape
    classes, scores, boxes, masks = postprocess(preds, w, h, batch_idx=0, interpolation_mode='bilinear',
                                                visualize_lincomb=False, crop_masks=True, score_threshold=0,
                                                configuration=cfg)

    plt.figure(figsize=(10, 10))
    plt.axis("off")
    plt.imshow(img_numpy);

---