In [1]:
from flash.core.data.utils import download_data
from flash.image import ObjectDetectionData, ObjectDetector
from flash.image.detection.output import FiftyOneDetectionLabelsOutput
from sklearn.model_selection import train_test_split
from tqdm.auto import tqdm
from PIL import Image
from glob import glob

import torch
import io, base64
import numpy as np
import flash
import json
import utils

In [2]:
print("flash version: {}".format(flash.__version__))
print("torch version: {}".format(torch.__version__))

flash version: 0.7.0rc0
torch version: 1.10.1


In [3]:
data_path = "./monkey-opencv"
train_dir = "./train"
test_dir = "./test"

In [4]:
df = utils.create_df_from_dir(data_path)

In [5]:
df_train, df_test = train_test_split(df, test_size=0.15, random_state=42)

In [6]:
utils.create_dataset(df_train, output_path=train_dir)
utils.create_dataset(df_test, output_path=test_dir)

  0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/59 [00:00<?, ?it/s]

## Create coco data

In [7]:
coco_annotation_path = "coco_monkey_annotation.json"
test_coco_annotation_path = "test_coco_monkey_annotation.json"

In [8]:
paths = glob("{}/*.json".format(train_dir))
coco_data_dict = utils.create_coco_data_dict(paths)
json.dump(coco_data_dict, open(coco_annotation_path, "w"), indent=2)

0it [00:00, ?it/s]

In [9]:
paths = glob("{}/*.json".format(test_dir))
test_coco_data_dict = utils.create_coco_data_dict(paths)
json.dump(test_coco_data_dict, open(test_coco_annotation_path, "w"), indent=2)

0it [00:00, ?it/s]

In [10]:
# Train parameter

In [16]:
image_size = 256
batch_size = 16
max_epochs = 20
learning_rate = 0.005

model_head = "efficientdet"
model_backbone= "d0"

In [13]:
datamodule = ObjectDetectionData.from_coco(
    train_folder=train_dir,
    train_ann_file=coco_annotation_path,
    test_folder=test_dir,
    test_ann_file=test_coco_annotation_path,
    batch_size=batch_size,
    transform_kwargs={"image_size": image_size},
    val_split=0.1,
)

  0%|          | 0/663 [00:00<?, ?it/s]

[1m[1mINFO    [0m[1m[0m - [1m[34m[1mAutofixing records[0m[1m[34m[0m[1m[0m | [36micevision.parsers.parser[0m:[36mparse[0m:[36m122[0m


  0%|          | 0/331 [00:00<?, ?it/s]

  0%|          | 0/118 [00:00<?, ?it/s]

[1m[1mINFO    [0m[1m[0m - [1m[34m[1mAutofixing records[0m[1m[34m[0m[1m[0m | [36micevision.parsers.parser[0m:[36mparse[0m:[36m122[0m


  0%|          | 0/59 [00:00<?, ?it/s]

  rank_zero_deprecation(


## Train

In [None]:
"""
model = ObjectDetector(
    head="retinanet", 
    backbone="resnet18_fpn", 
    num_classes=datamodule.num_classes, 
    image_size=image_size,
    output=FiftyOneDetectionLabelsOutput(return_filepath=True),
    learning_rate=0.0001,
    pretrained=True
)
"""

In [17]:
model = ObjectDetector(
    head=model_head, 
    backbone=model_backbone, 
    num_classes=datamodule.num_classes, 
    image_size=image_size,
    output=FiftyOneDetectionLabelsOutput(return_filepath=True),
    learning_rate=learning_rate,
)

Using 'efficientdet' provided by airctic/IceVision (https://github.com/airctic/icevision) and rwightman/efficientdet-pytorch (https://github.com/rwightman/efficientdet-pytorch).


In [18]:
trainer = flash.Trainer(max_epochs=max_epochs, gpus=1)
trainer.finetune(model, datamodule=datamodule, strategy="no_freeze")

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name          | Type                            | Params
------------------------------------------------------------------
0 | train_metrics | ModuleDict                      | 0     
1 | val_metrics   | ModuleDict                      | 0     
2 | test_metrics  | ModuleDict                      | 0     
3 | adapter       | IceVisionObjectDetectionAdapter | 3.8 M 
------------------------------------------------------------------
3.8 M     Trainable params
0         Non-trainable params
3.8 M     Total params
15.303    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

  rank_zero_warn(
  indices_all = cls_topk_indices_all // num_classes
  rank_zero_warn(
  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]



Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

In [19]:
check_point_path = f"finetuned_{model_head}_{model_backbone}_{image_size}_{max_epochs}.ckpt"

In [20]:
trainer.save_checkpoint(check_point_path)

In [22]:
trainer.test(model, datamodule=datamodule)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]



--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'Precision (IoU=0.50:0.95,area=all)': 0.47619298100471497,
 'Recall (IoU=0.50:0.95,area=all,maxDets=100)': 0.5220338702201843,
 'box_loss': 0.00354116247035563,
 'class_loss': 0.27890318632125854,
 'loss': 0.45596131682395935}
--------------------------------------------------------------------------------


[{'loss': 0.45596131682395935,
  'class_loss': 0.27890318632125854,
  'box_loss': 0.00354116247035563,
  'Precision (IoU=0.50:0.95,area=all)': 0.47619298100471497,
  'Recall (IoU=0.50:0.95,area=all,maxDets=100)': 0.5220338702201843}]

## Prediction

In [23]:
from torch.utils.data import DataLoader

In [24]:
predict_files = glob(f"{test_dir}/*.jpg")
len(predict_files)

59

In [29]:
predict_dataset = ObjectDetectionData.from_files(
    predict_files=predict_files,
    batch_size=1, 
    transform_kwargs={"image_size": image_size},

)

In [30]:
predictions = trainer.predict(model, datamodule=predict_dataset)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Predicting: 18it [00:00, ?it/s]

## Visualize with fiftyone

- https://voxel51.com/docs/fiftyone/getting_started/install.html
- install fiftyone (do not forget to restart jupyter notebook)
```sh
pip install fiftyone
```

In [31]:
from flash.core.integrations.fiftyone import visualize
from itertools import chain

In [32]:
_predictions = list(chain.from_iterable(predictions))

In [33]:
session = visualize(_predictions)

 100% |███████████████████| 59/59 [38.6ms elapsed, 0s remaining, 1.5K samples/s]      
