In [1]:
import os
import random
import numpy as np
import time
import datetime
import math

import torch
import torch.nn.functional as F 
import torchvision.datasets
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.transforms import v2
from torchvision.ops import nms
from torchmetrics.detection import MeanAveragePrecision as MAP
from PIL import ImageDraw

from torchvision.models.detection.faster_rcnn import FastRCNNPredictor, fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights,\
    fasterrcnn_mobilenet_v3_large_fpn, FasterRCNN_MobileNet_V3_Large_FPN_Weights
from torchvision.models.detection.retinanet import retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights
from torchvision.models.detection import ssdlite320_mobilenet_v3_large, SSDLite320_MobileNet_V3_Large_Weights

from fedot.core.pipelines.pipeline_builder import PipelineBuilder

from fedcore.tools.ruler import PerformanceEvaluatorOD
from fedcore.architecture.dataset.object_detection_datasets import YOLODataset, COCODataset, UnlabeledDataset
from fedcore.architecture.comptutaional.devices import default_device
from fedcore.architecture.utils.loader import collate
from fedcore.data.data import CompressionInputData
from fedcore.inference.onnx import ONNXInferenceModel
from fedcore.neural_compressor.config import Torch2ONNXConfig
from fedcore.repository.constanst_repository import FEDOT_TASK
from fedcore.repository.initializer_industrial_models import FedcoreModels
from fedcore.repository.constanst_repository import CROSS_ENTROPY, MSE
from fedcore.architecture.visualisation.visualization import plot_train_test_loss_metric, apply_nms, get_image, filter_boxes
from fedcore.architecture.utils.loader import get_loader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
device = default_device()
IMG_SIZE = 512
NMS_THRESH = 0.5
THRESH = 0.5

MEAN = [0.485, 0.456, 0.406]
STD = [0.229, 0.224, 0.225]

INIT_LR = 4e-5

EPOCHS = 10
BATCH_SIZE = 4

DATASET_NAME = 'african-wildlife'
OUTPUT_PATH = f'datasets/{DATASET_NAME}/output/images/'

In [3]:
transform = v2.Compose([
    v2.ToImage(),
    v2.ToDtype(torch.float32, scale=True),
    # v2.Normalize(mean=MEAN, std=STD),
    # v2.Resize((IMG_SIZE, IMG_SIZE))
])

train_dataset = YOLODataset(dataset_name=DATASET_NAME, transform=transform, train=True, log = True)
# train_dataset = COCODataset(images_path=COCO_PATH + "train2017/",
#                             json_path=COCO_PATH + "annotations/instances_train2017.json",
#                             transform=transform)

test_dataset = YOLODataset(dataset_name=DATASET_NAME, transform=transform, train=False)
# val_dataset = COCODataset(images_path=COCO_PATH + "val2017/",
#                             json_path=COCO_PATH + "annotations/instances_val2017.json",
#                             transform=transform)
val_dataset = UnlabeledDataset(images_path=f'datasets/{DATASET_NAME}/valid/images/')


train_loader = get_loader(train_dataset, batch_size=BATCH_SIZE, train=True)
test_loader = get_loader(test_dataset)
val_loader = get_loader(val_dataset)

# More accurate, very slow to train
# model = fasterrcnn_resnet50_fpn_v2()

# Less accurate, but faster to train
model = fasterrcnn_mobilenet_v3_large_fpn(pretrained=True)

# test
# model = ssdlite320_mobilenet_v3_large(weights=SSDLite320_MobileNet_V3_Large_Weights.DEFAULT)
# model = retinanet_resnet50_fpn_v2()

num_classes = len(train_dataset.classes)
in_features = model.roi_heads.box_predictor.cls_score.in_features
model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes).to(device)
model = nn.DataParallel(model)
model.to(device)

opt = optim.SGD(model.parameters(), lr=INIT_LR, momentum=0.9, weight_decay=INIT_LR/2)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(opt, mode='max', patience=3, verbose=True)
tr_evaluator = PerformanceEvaluatorOD(model, train_loader, batch_size=BATCH_SIZE)
test_evaluator = PerformanceEvaluatorOD(model, test_loader, batch_size=1)

Data Path:  c:\Users\Kaefsky\Python\Fedcore\FedCore\datasets\african-wildlife


In [4]:
tr_loss = np.zeros(EPOCHS)
test_loss = np.zeros(EPOCHS)
tr_map = np.zeros(EPOCHS)
test_map = np.zeros(EPOCHS)
train_time = np.zeros(EPOCHS)

for epoch in range(EPOCHS):
    tStart = time.time()
    # Train the model
    model.train()
    
    loss_arr = np.zeros(len(train_loader))
    for i, (images, targets) in enumerate(train_loader):
        # forward
        loss_dict = model(images, targets)
        loss = sum(loss for loss in loss_dict.values())
        loss_arr[i] = loss
        # backward + optimize
        opt.zero_grad()
        loss.backward()
        opt.step()           
    tr_loss[epoch] = loss_arr.mean()
    
    # Calculate train mAP
    model.eval()
    target_metric = tr_evaluator.measure_target_metric()
    tr_map[epoch] = float(target_metric["map"])
            
    # Evaluate the model
    model.train()
    loss_arr = np.zeros(len(test_loader)) 
    for i, (images, targets) in enumerate(test_loader):
        loss_dict = model(images, targets)
        loss = sum(loss for loss in loss_dict.values())
        loss_arr[i] = loss
    test_loss[epoch] = loss_arr.mean()
    
    # Calculate test mAP
    model.eval()
    target_metric = test_evaluator.measure_target_metric()
    test_map[epoch] = float(target_metric["map"])
    
    # Optimize learning rate
    scheduler.step(test_map[epoch])
    
    tEnd = time.time()
    train_time[epoch] = float(tEnd - tStart)
    
    # Print metrics
    p = int(math.log(epoch + 1, 10))
    print('-' * (40 + p))
    print('| %d | TRAIN | Loss: %.3f | mAP: %.3f |' %
            (epoch + 1, tr_loss[epoch], tr_map[epoch]))
    print('| %d | TEST  | Loss: %.3f | mAP: %.3f |' %
            (epoch + 1, test_loss[epoch], test_map[epoch]))
    print('-' * (13 + p), 
            'Time: %.2f' % train_time[epoch], 
            '-' * 14)
    
    # Saving best model
    if test_map[epoch].max():
        best_model = model
    
    # Most crucial step
    if device == 'cuda':
        torch.cuda.empty_cache()
        
    if epoch > 5 and test_map[epoch] <= test_map[epoch - 5]:
        tr_loss = tr_loss[:epoch + 1]
        test_loss = test_loss[:epoch + 1]
        tr_map = tr_map[:epoch + 1]
        test_map = test_map[:epoch + 1]
        train_time = train_time[:epoch + 1]
        print("Early stopping")
        break

# Final evaluating
model = best_model
performance = test_evaluator.eval()
print('Before quantization')
print(performance)

Measuring target metric: 202batch [00:10, 18.78batch/s]
Measuring target metric: 202batch [00:07, 25.54batch/s]


[1] [TRAIN] Loss: 2.362 | mAP: 0.012
[1] [TEST] Loss: 2.368 | mAP: 0.012


Measuring target metric: 202batch [00:08, 24.35batch/s]
Measuring target metric: 202batch [00:07, 25.27batch/s]


[2] [TRAIN] Loss: 2.099 | mAP: 0.027
[2] [TEST] Loss: 2.248 | mAP: 0.027


Measuring target metric: 202batch [00:08, 25.12batch/s]
Measuring target metric: 202batch [00:07, 27.51batch/s]


[3] [TRAIN] Loss: 2.017 | mAP: 0.057
[3] [TEST] Loss: 2.219 | mAP: 0.057
Early stopping


Measuring latency: 100%|██████████| 50/50 [00:01<00:00, 28.72rep/s]
Measuring throughput: 100%|██████████| 5/5 [00:00<00:00, 28.57batch/s]
Measuring target metric: 202batch [00:07, 26.19batch/s]


Latency: 0.17066 ms/sample with batch_size 1
Throughput: 84746.0 samples/s with batch_size 1
Model size: 72.458 MB
Before quantization
{'latency': 0.17066, 'throughput': 84746.0, 'model_size': 72.458, 'target_metrics': {'map': tensor(0.0569), 'map_50': tensor(0.1756), 'map_75': tensor(0.0116), 'map_small': tensor(0.0002), 'map_medium': tensor(0.0188), 'map_large': tensor(0.0803), 'mar_1': tensor(0.1879), 'mar_10': tensor(0.4449), 'mar_100': tensor(0.4746), 'mar_small': tensor(0.0333), 'mar_medium': tensor(0.3502), 'mar_large': tensor(0.4955), 'classes': tensor([1, 2, 3, 4], dtype=torch.int32)}}


In [None]:
plot_train_test_loss_metric(tr_loss, test_loss, tr_map, test_map)

In [None]:
now = str(datetime.datetime.now())[2:-16]
torch.save(model, f'{model._get_name()}_{DATASET_NAME}_{now}.pt')

In [4]:
model = torch.load('FasterRCNN_african-wildlife_24-07-07.pt')

In [5]:
model.cpu()
id = random.randint(0, len(val_dataset) - 1) # random or int
test_data = test_loader.dataset[id]
img, target = test_data
input = torch.unsqueeze(img, dim=0)
pred = model(input)
pred = apply_nms(pred[0], NMS_THRESH)
pred = filter_boxes(pred, THRESH)

# Show inference image
transform = v2.ToPILImage()
img = transform(img)
inference_img = get_image(img, pred, train_dataset.classes, target)
inference_img.show()

In [6]:
# Predicting all inference images
for data in val_loader:
    image = data[0][0].cpu()
    name = data[1][0]['name']
    input = torch.unsqueeze(image, dim=0)
    pred = model(input)
    pred = apply_nms(pred[0], NMS_THRESH)
    pred = filter_boxes(pred, THRESH)
    transform = v2.ToPILImage()
    img = transform(image)
    inference_img = get_image(img, pred, train_dataset.classes)
    if not os.path.exists(OUTPUT_PATH):
        os.makedirs(OUTPUT_PATH)
    inference_img.save(OUTPUT_PATH + name)

In [None]:
model = model.cpu()
repo = FedcoreModels().setup_repository()
compression_pipeline = PipelineBuilder().add_node('post_training_quant').build()

input_data = CompressionInputData(features=np.zeros((2, 2)),
                                    idx=None,
                                    calib_dataloader=val_loader,
                                    task=FEDOT_TASK['regression'],
                                    data_type=None,
                                    target=model
)

input_data.supplementary_data.is_auto_preprocessed = True
compression_pipeline.fit(input_data)
quant_model = compression_pipeline.predict(input_data).predict

In [None]:
int8_onnx_config = Torch2ONNXConfig(
    dtype="int8",
    opset_version=18,
    quant_format="QDQ",  # or "QLinear"
    example_inputs=torch.unsqueeze(train_dataset[0][0], dim=0),
    input_names=["input"],
    output_names=["output"],
    dynamic_axes={
            'input' : {0 : 'batch_size'},
            'output' : {0 : 'batch_size'}
        }
)

quant_model.export("int8-model.onnx", int8_onnx_config)
onnx_model = ONNXInferenceModel("int8-model.onnx")

In [None]:
evaluator = PerformanceEvaluatorOD(model, test_loader, batch_size=1)
performance = evaluator.eval()
print('after quantization')
print(performance)