In [254]:
import cv2
import json
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import wandb
import ultralytics

from glob import glob
from torch.utils.data import Dataset
from tqdm import tqdm
from ultralytics import YOLO
from ultralytics.yolo.v8.detect.train import Loss

import params

In [253]:
%load_ext autoreload
%autoreload 2

In [14]:
ultralytics.checks(device='mps')

Ultralytics YOLOv8.0.78 🚀 Python-3.9.16 torch-1.13.1 CPU
Setup complete ✅ (10 CPUs, 16.0 GB RAM, 530.5/926.4 GB disk)


In [5]:
with open('datasets/data/annotations/train.json', 'r') as f:
    train_data = json.load(f)
with open('datasets/data/annotations/val.json', 'r') as f:
    val_data = json.load(f)

In [3]:
import os

In [35]:
### Create YOLO labels

def create_dataset(coco_json_file, mode='train'):
    """
    """

    images_data = coco_json_file['images']
    annotations = coco_json_file['annotations']
    
    for image_data in tqdm(images_data):
        image_id = image_data['id']
        annos = [anno for anno in annotations if anno['image_id'] == image_id]
        file_name = image_data['file_name']
        label_file_name = file_name.split('.')[0] + '.txt'
        label_file_path = f'datasets/data/labels/{mode}/{label_file_name}'
        image_path = f'datasets/data/images/{mode}/{file_name}'
        if not os.path.exists(image_path):
            print(image_path)
        
        image_width = image_data['width']
        image_height = image_data['height']

        with open(label_file_path, 'w') as f:
            for anno in annos:
                category = anno['category_id']
                x, y, w, h = anno['bbox']
                x_center = x + w / 2
                y_center = y + h / 2
                x_center /= image_width
                y_center /= image_height
                w /= image_width
                h /= image_height
                
                f.write(f'{int(category)} {x_center} {y_center} {w} {h}\n')

create_dataset(val_data, 'val')
create_dataset(train_data, 'train')

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1190/1190 [00:00<00:00, 3100.93it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4760/4760 [00:05<00:00, 895.95it/s]


In [19]:
image_size = 640
batch_size = 8
pretrained = True
epochs=10
augment=True
lr=1e-3
warmup_epochs=1
seed = 42
config = dict(
    framework='ultralytics',
    image_size=(image_size, image_size),
    batch_size=batch_size,
    pretrained=pretrained,
    epochs=epochs,
    warmup_epochs=warmup_epochs,
    lr=lr,
    augment=augment,
    seed=seed
)

In [6]:
run = wandb.init(project=params.WANDB_PROJECT, entity=params.ENTITY, job_type='training', config=config)

[34m[1mwandb[0m: Currently logged in as: [33mmatt-zak[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [20]:
model = YOLO('yolov8m.pt')

In [9]:
trainers = []
def on_fit_epoch_end(trainer):
    for metric, val in trainer.metrics.items():
        metric = metric.replace('metrics/', '')
        metric = ' '.join(metric.split('/'))
        run.log({f'epoch {metric}': val}, step=trainer.epoch)
def on_train_batch_end(trainer):
    for metric, val in trainer.metrics.items():
        metric = metric.replace('metrics/', '')
        metric = ' '.join(metric.split('/'))
        run.log({f'running {metric}': val})
    
model.add_callback('on_fit_epoch_end', on_fit_epoch_end)
model.add_callback('on_train_batch_end', on_train_batch_end)

In [22]:
results = model.train(
    data='./dataset.yaml',
    epochs=epochs,
    warmup_epochs=warmup_epochs,
    imgsz=image_size,
    batch=batch_size,
    augment=augment,
    workers=0,
    pretrained=pretrained,
    cache=True,
    save=True, 
    verbose=False,
    name='test_run',
    optimizer='AdamW',
    lr0=lr,
    device='mps',
    seed=seed
)

New https://pypi.org/project/ultralytics/8.0.83 available 😃 Update with 'pip install -U ultralytics'
Ultralytics YOLOv8.0.78 🚀 Python-3.9.16 torch-1.13.1 CPU
[34m[1myolo/engine/trainer: [0mtask=detect, mode=train, model=yolov8m.pt, data=./dataset.yaml, epochs=10, patience=50, batch=8, imgsz=640, save=True, save_period=-1, cache=True, device=mps, workers=0, project=None, name=test_run, exist_ok=False, pretrained=True, optimizer=AdamW, verbose=False, seed=42, deterministic=True, single_cls=False, image_weights=False, rect=False, cos_lr=False, close_mosaic=0, resume=False, amp=True, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, line_thickness=3, visualize=False, augment=True, agnostic_nms=False, classes=None, retina_masks=False, boxes=True, f

[]

In [125]:
torch.save(model.model.state_dict(), 'yolov8_trained.pt')

In [23]:
val_results = model.val()

Ultralytics YOLOv8.0.78 🚀 Python-3.9.16 torch-1.13.1 CPU
Model summary (fused): 218 layers, 25916767 parameters, 0 gradients, 79.1 GFLOPs
[34m[1mval: [0mScanning /Users/matthewzak/projects/fanthomnet/experiments/datasets/data/labels/val.cache... 1190 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1190/1190 [00:00<?, ?it/s][0m
[34m[1mval: [0mCaching images (0.8GB True): 100%|██████████| 1190/1190 [00:03<00:00, 364.24it/s][0m
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 149/149 [02:20<00:00,  1.06it/s]
                   all       1190       4561      0.615      0.281      0.301      0.187
Speed: 0.3ms preprocess, 115.7ms inference, 0.0ms loss, 0.6ms postprocess per image
Results saved to [1mruns/detect/test_run36[0m


In [76]:
all_AP = val_results.box.all_ap
AP = val_results.box.ap
AP50 = val_results.box.ap50
F1 = val_results.box.f1
MAP = val_results.box.map
MAP50 = val_results.box.map50
MAP75 = val_results.box.map75
MP = val_results.box.mp
MR = val_results.box.mr
nr_classes = val_results.box.nc
P = val_results.box.p
val_results.box.ap_class_index.shape

(85,)

In [122]:
P.shape

(85,)

In [81]:
# There's only 85 out of 133 object classes observed in the validation set. 

In [111]:
val_results.box.ap_class_index

array([  0,   1,   2,   3,   4,   5,   6,  10,  11,  13,  14,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,  28,  32,  38,  40,  41,  42,  43,  44,  45,  46,  47,  49,  50,  52,  53,  57,  59,  61,  62,  63,  65,  66,  67,  68,  69,  73,  75,  76,  79,  80,  82,  83,  84,  85,  86,  87,  88,  91,  92,  93,  99,
       100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 113, 114, 115, 116, 117, 118, 119, 120, 122, 123, 127, 129])

In [98]:
class_names = val_results.names

In [123]:
for label_idx, avg_p in zip(val_results.box.ap_class_index, AP):
    run.summary[f'{class_names[label_idx]}_avg_precision'] = avg_p

for label_idx, avg_p50 in zip(val_results.box.ap_class_index, AP50):
    run.summary[f'{class_names[label_idx]}_avg_precision50'] = avg_p50

for label_idx, f1 in zip(val_results.box.ap_class_index, F1):
    run.summary[f'{class_names[label_idx]}_f1'] = f1

for label_idx, p in zip(val_results.box.ap_class_index, P):
    run.summary[f'{class_names[label_idx]}_precision'] = p

In [258]:
val_results.results_dict.items()

dict_items([('metrics/precision(B)', 0.6152105212684763), ('metrics/recall(B)', 0.2814127332609715), ('metrics/mAP50(B)', 0.3007012718597112), ('metrics/mAP50-95(B)', 0.18677930400459652), ('fitness', 0.198171500790108)])

In [100]:
for metric, value in val_results.results_dict.items():
    run.summary[metric] = value

In [109]:
run.summary['mAP'] = MAP
run.summary['mAP50'] = MAP50
run.summary['mAP75'] = MAP75
run.summary['mP'] = MP
run.summary['mR'] = MR

In [151]:
val_images = glob('datasets/data/images/val/*.png')
images, labels = [], []
for image_path in val_images:
    labels_path = image_path.replace('images', 'labels').replace('.png', '.txt')
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    images.append(image)
    height, width = image.shape[:2]
    with open(labels_path, 'r') as f:
        curr_labels = f.readlines()
    processed_labels = []
    for line in curr_labels:
        line = line.strip().split()
        category_id, x_c, y_c, w, h = line[0], float(line[1]), float(line[2]), float(line[3]), float(line[4])
        x_c *= width
        w *= width
        y_c *= height
        h *= height
        # category id, x1, y1, x2, y2
        processed_labels.append(
            [
                int(category_id),
                int(x_c - w / 2 + 0.5),
                int(y_c - h / 2 + 0.5),
                int(x_c + w / 2 + 0.5),
                int(y_c + h / 2 + 0.5)
            ]
        )
    
    labels.append(processed_labels)

In [162]:
batch_pred = model.predict(images[:10])

In [165]:
batch_pred[3].boxes



ultralytics.yolo.engine.results.Boxes object with attributes:

boxes: tensor([[5.6778e+02, 1.0019e+02, 6.0532e+02, 1.2613e+02, 2.9373e-01, 7.5000e+01],
        [4.8712e+02, 1.0600e+02, 5.2796e+02, 1.3626e+02, 2.5055e-01, 7.5000e+01]])
cls: tensor([75., 75.])
conf: tensor([0.2937, 0.2506])
data: tensor([[5.6778e+02, 1.0019e+02, 6.0532e+02, 1.2613e+02, 2.9373e-01, 7.5000e+01],
        [4.8712e+02, 1.0600e+02, 5.2796e+02, 1.3626e+02, 2.5055e-01, 7.5000e+01]])
id: None
is_track: False
orig_shape: tensor([368, 720])
shape: torch.Size([2, 6])
xywh: tensor([[586.5499, 113.1585,  37.5368,  25.9386],
        [507.5367, 121.1315,  40.8382,  30.2598]])
xywhn: tensor([[0.8147, 0.3075, 0.0521, 0.0705],
        [0.7049, 0.3292, 0.0567, 0.0822]])
xyxy: tensor([[567.7816, 100.1892, 605.3184, 126.1278],
        [487.1176, 106.0017, 527.9557, 136.2614]])
xyxyn: tensor([[0.7886, 0.2723, 0.8407, 0.3427],
        [0.6766, 0.2880, 0.7333, 0.3703]])

In [188]:
for l, a in zip(batch_pred[3].boxes.cls.numpy(), batch_pred[3].boxes.xyxy.numpy()):
    print(l, *a.round())

75.0 568.0 100.0 605.0 126.0
75.0 487.0 106.0 528.0 136.0


In [161]:
max_samples = len(images) if len(images) < 600 else 600

In [221]:
batch_size = 10
pred_results = []
for i in tqdm(range(0, max_samples, 10)):
    curr_images = images[i:i + batch_size]
    curr_labels = labels[i:i + batch_size]
    curr_results = model.predict(curr_images)
    for result in curr_results:
        boxes = result.boxes.xyxy.numpy()
        label_ids = result.boxes.cls.numpy()
        confidence = result.boxes.conf.numpy()
        processed_results = []
        for category_id, conf, bbox in zip(label_ids, confidence, boxes):
            processed_results.append(
                [category_id, conf, *bbox]
            )
        pred_results.append(processed_results)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 60/60 [05:01<00:00,  5.02s/it]


In [225]:
with open('datasets/data/annotations/val.json', 'r') as f:
    val_data = json.load(f)

In [229]:
categories = val_data['categories']
id2cat = {cat['id']: cat['name'] for cat in categories}

In [241]:
table = wandb.Table(columns=['Image'])

In [242]:
for image, gt, pred in tqdm(zip(images[:max_samples], labels[:max_samples], pred_results)):
    
    curr_gt = []
    for true_label in gt:
        class_id, x1, y1, x2, y2 = true_label
        curr_gt.append(
            {
                'position': {
                    'minX': x1, 
                    'minY': y1,
                    'maxX': x2,
                    'maxY': y2
                },
                'domain': 'pixel',
                'class_id': class_id,
                'box_caption': id2cat[class_id]
            }
        )
    curr_predictions = []
    for predicted_label in pred:
        class_id, conf, x1, y1, x2, y2 = predicted_label
        class_id = int(class_id)
        curr_predictions.append(
            {
                'position': {
                    'minX': int(x1 + 0.5), 
                    'minY': int(y1 + 0.5),
                    'maxX': int(x2 + 0.5),
                    'maxY': int(y2 + 0.5)
                },
                'domain': 'pixel',
                'class_id': class_id,
                'box_caption': id2cat[class_id],
                'scores': {'score': float(conf)}
            }
        )
    
    for pred_label in pred:
        pass
    row = wandb.Image(
        image, 
        boxes={
            'predictions': {
                'box_data': curr_predictions,
                'class_labels': id2cat
            },
            'ground_truth': {
                'box_data': curr_gt,
                'class_labels': id2cat
            }
        }
    )
    table.add_data(row)

600it [03:30,  2.85it/s]


In [243]:
run.log({'pred_table': table})

In [245]:
run.finish()

In [61]:
image = cv2.imread(file_name)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

In [65]:
for detected_object in pred:
    detected_object = detected_object.boxes  # Boxes object for bbox outputs
    category_id = detected_object.cls
    confidence = detected_object.conf
    x1, y1, x2, y2 = detected_object.xyxy.cpu().numpy()[0].astype(int)
    
    cv2.rectangle(image, (x1, y1), (x2, y2), (255, 0, 0))

In [255]:
run = wandb.init(
    project=params.WANDB_PROJECT,
    entity=params.ENTITY,
    job_type='logging_baseline',
    config=config,
    settings=wandb.Settings(start_method="fork")
)

VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016743429166672284, max=1.0…

Problem at: /var/folders/y6/hrf78r2d3pn5xzgwhhlhb0fm0000gn/T/ipykernel_14090/2854979015.py 1 <module>


CommError: Run initialization has timed out after 60.0 sec. 
Please refer to the documentation for additional information: https://docs.wandb.ai/guides/track/tracking-faq#initstarterror-error-communicating-with-wandb-process-

In [256]:
metrics_dict = dict()
for label_idx, avg_p in zip(val_results.box.ap_class_index, AP):
    metrics_dict[f'{class_names[label_idx]}_avg_precision'] = avg_p

for label_idx, avg_p50 in zip(val_results.box.ap_class_index, AP50):
    metrics_dict[f'{class_names[label_idx]}_avg_precision50'] = avg_p50

for label_idx, f1 in zip(val_results.box.ap_class_index, F1):
    metrics_dict[f'{class_names[label_idx]}_f1'] = f1

for label_idx, p in zip(val_results.box.ap_class_index, P):
    metrics_dict[f'{class_names[label_idx]}_precision'] = p
    
for metric, value in val_results.results_dict.items():
    metrics_dict[metric] = value

metrics_dict['mAP'] = MAP
metrics_dict['mAP50'] = MAP50
metrics_dict['mAP75'] = MAP75
metrics_dict['mP'] = MP
metrics_dict['mR'] = MR

In [260]:
with open('metrics.json', 'w') as f:
    json.dump(metrics_dict, f)