In [None]:
from Repsycle.interact_utils import InteractAPI
import torchvision
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import DataLoader
from torch import nn, optim
from torchvision import models, transforms
from torch.utils.data import Dataset
from torch.optim import lr_scheduler
import random
import numpy as np
from io import BytesIO
import matplotlib.pyplot as plt
import base64
import PIL
from PIL import Image
import json
import os
import cv2
from sklearn.model_selection import StratifiedShuffleSplit
import time
import copy
from tqdm import tqdm
import pandas as pd
import torch
import random
import numpy as np
import yaml
import shutil
from IPython.display import JSON

# Set variables

In [None]:
# Set interact parameters 
username = None
assert username is not None
password = None
assert password is not None
api_root = "https://interact-api.psycle.io/api/v1"
project_id = None  # Project name
assert password is not None
analysis_id = None  # Analysis name
assert analysis_id is not None

filters = {'annotations__isnull': False}

# Set file structure 
project_path = f'{os.getcwd()}'

image_folder = f"{project_path}/images"
datas_folder = f"{project_path}/annotations"
weights_folder = f'{project_path}/weights'
augmented_image_folder = f'{project_path}/augmented_images'

datas_path = f'{datas_folder}/datas.json'
train_annotations_path = f'{datas_folder}/train_annotations.json'
val_annotations_path = f'{datas_folder}/val_annotations.json'


image_extension = 'png'

seed = 42

# Create file structure
if not os.path.exists(image_folder):
    os.mkdir(image_folder)
if not os.path.exists(datas_folder):
    os.mkdir(datas_folder)
if not os.path.exists(weights_folder):
    os.mkdir(weights_folder)
if not os.path.exists(augmented_image_folder):
    os.mkdir(augmented_image_folder)

In [None]:
def save_json(path,file_to_save):
    with open(path,'w') as f:
        json.dump(file_to_save, f)
        
def open_json(path):
    with open(path, 'r') as f:
        file = json.load(f)
    return file

def save_yaml(path, file_to_save):
    with open(path, 'w') as f:
        file = yaml.dump(file_to_save, f)
    return file

def open_yaml(path):
    with open(path, 'r') as f:
            file = yaml.load(f, Loader=yaml.FullLoader)
    return file

def convert(size, box):
    dw = 1./(size[0])
    dh = 1./(size[1])
    x = (box[0] + box[1])/2.0 - 1
    y = (box[2] + box[3])/2.0 - 1
    w = box[1] - box[0]
    h = box[3] - box[2]
    x = x*dw
    w = w*dw
    y = y*dh
    h = h*dh
    return (x,y,w,h)

def get_image_path(data_id: str):
    return f'{image_folder}/{data_id}.{image_extension}'

# Get API

In [None]:
interactAPI = InteractAPI(username, password, api_root)

### Download all annotations

In [None]:
datas = interactAPI.get_datas(project_id, **filters)
save_json(datas_path, datas)

In [None]:
datas = open_json(datas_path)
print(f'Number of images: {len(datas)}')

### Set labels

In [None]:
label_to_int = {}
for idx, label in enumerate(datas[0]['annotations'][0]['analysis']['labels']):
    label_to_int[label] = idx
label_to_int['background'] = idx+1
int_to_label = {v: k for k, v in label_to_int.items()}

print(f'label_to_int: {label_to_int}')
print(f'int_to_label: {int_to_label}')

### Download all images 

In [None]:
datas = open_json(datas_path)

for data in tqdm(datas):
    data_id = data['id']
    image_path = get_image_path(data_id)
    if not os.path.exists(image_path):
        image = interactAPI.get_image(project_id, data_id, base64=True)
        image = base64.b64decode(image)
        image = Image.open(BytesIO(image))
        image.save(image_path)

### Filter datas

In [None]:
datas = open_json(datas_path)
filtered_datas = {}

for data in datas:
    data_id = data['id']
    
    labels = []
    coordinates = []
    viewed = data['viewed']
    shape = cv2.imread(get_image_path(data_id)).shape
    annotations = data['annotations']
    
    if len(annotations) > 0:
        for idx in range(len(annotations)):
            label = annotations[idx]['label']
            x1, y1, x2, y2 = annotations[idx]['coordinates']
            coordinate = convert(size = (shape[1], shape[0]), box=(x1, x2, y1, y2))

            labels.append(label)
            coordinates.append(coordinate)
            
    else :
        labels = ['background']
        coordinates = []
    
    if data['viewed']:        
        filtered_datas[data_id] = {
            'label': labels,
            'coordinates': coordinates, 
            'shape': shape
        }

### Visualize some image

In [None]:
width, height = 4, 4
fig, axs = plt.subplots(width,height, figsize=(15, 15))
fig.subplots_adjust(hspace = .5, wspace=.001)
axs = axs.ravel()

keys = list(filtered_datas.keys())
annotations = list(filtered_datas.values())
idxs = np.random.randint(0, len(keys), width * height)

for plot_idx, i in enumerate(idxs):
    data_id = keys[i]
    img = cv2.imread(get_image_path(data_id))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    h,w,_ = img.shape
    size = (w,h)
    coordinates = annotations[i]['coordinates']
    classes = annotations[i]['label']
    
    for coordinate in coordinates:
        x, y, w, h = coordinate
        p1 = (int( (x - w/2) * size[0]), int( (y - h/2) * size[1]))
        p2 = (int( (x + w/2) * size[0]), int((y + h/2) * size[1]))
        img = cv2.rectangle(img, p1, p2, color=(255, 0, 0), thickness=3)

    axs[plot_idx].imshow(img)
    axs[plot_idx].set_title(label)

### Labels distribution

In [None]:
labels = []
for key, value in filtered_datas.items():
    for label in value['label']:
        labels.append(label)

plt.plot(figsize=(20,20))
plt.hist(labels)
plt.show()

### Coordinates distribution

In [None]:
coordinates = []
for key, value in filtered_datas.items():
    for coordinate in value['coordinates']:
        coordinates.append(coordinate)
coordinates = np.asarray(coordinates)
plt.figure(figsize = (5, 5))
plt.scatter(coordinates[:,0], coordinates[:, 1], s = 10)

### Split train and test

In [None]:
y = []
for key, value in filtered_datas.items():
    y.append(value['label'][0])

image_classes = [value['label'] for key, value in filtered_datas.items()]
image_coordinates = [value['coordinates'] for key, value in filtered_datas.items()]
image_ids = list(filtered_datas.keys())

X = np.asarray(image_ids)
y = np.asarray(y)

val_size = int(0.2*len(X))

sss = StratifiedShuffleSplit(n_splits=1, test_size=val_size, random_state=seed)
split = sss.split(X, y)
train_index, test_index = next(split)


train_annotations = {}

for idx in train_index:
    train_annotations[image_ids[idx]] = {
        'label':image_classes[idx], 
        'coordinates':image_coordinates[idx]
    }

val_annotations = {}
for idx in test_index:
    val_annotations[image_ids[idx]] = {
        'label':image_classes[idx], 
        'coordinates':image_coordinates[idx]
    }

save_json(train_annotations_path, train_annotations)
save_json(val_annotations_path, val_annotations)

print(f'Number of annotations for training: {len(train_annotations)}')
print(f'Number of annotations for validation: {len(val_annotations)}')

In [None]:
labels = []
for key, value in train_annotations.items():
    for label in value['label']:
        labels.append(label)

plt.plot(figsize=(20,20))
plt.hist(labels)
plt.show()
print('Training distribution')

In [None]:
labels = []
for key, value in val_annotations.items():
    for label in value['label']:
        labels.append(label)

plt.plot(figsize=(20,20))
plt.hist(labels)
plt.show()
print('Validation distribution')

# Load on yolov5

In [None]:
!git clone git@github.com:PsycleResearch/yolov5.git

In [None]:
train_annotations_folder =f'{os.getcwd()}/yolov5/yolov5/data/labels/train/'
val_annotations_folder =f'{os.getcwd()}/yolov5/yolov5/data/labels/val/'

train_images_folder = f'{os.getcwd()}/yolov5/yolov5/data/images/train/'
val_images_folder = f'{os.getcwd()}/yolov5/yolov5/data/images/val/'

hyperparameters_scratch = f'{os.getcwd()}/yolov5/yolov5/data/hyp.scratch.yaml'
hyperparameters_finetune = f'{os.getcwd()}/yolov5/yolov5/data/hyp.finetune.yaml'

yolov5s = f'{os.getcwd()}/yolov5/yolov5/data/yolov5s.yaml'

dataset_path = f'{os.getcwd()}/yolov5/yolov5/data/dataset.yaml'

nb_classes = len(int_to_label) - 1  # remove background class

# Create file structure
if not os.path.exists(train_annotations_folder):
    os.makedirs(train_annotations_folder)
if not os.path.exists(val_annotations_folder):
    os.makedirs(val_annotations_folder)
    
if not os.path.exists(train_images_folder):
    os.makedirs(train_images_folder)
if not os.path.exists(val_images_folder):
    os.makedirs(val_images_folder)

In [None]:
names = list(label_to_int.keys())
names.remove('background')

dataset = {
    'train': f'{os.getcwd()}/yolov5/yolov5/data/images/train/',
    'val': f'{os.getcwd()}/yolov5/yolov5/data/images/val/',
    'nc': nb_classes,
    'names': names
}

save_yaml(dataset_path, dataset)

In [None]:
yolov5s_parameters = {
    
    'nc': nb_classes, 
    'depth_multiple': 0.33,
    'width_multiple': 0.5,
    'anchors': [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]],
    
    'backbone':
    
    [[-1, 1, 'Focus', [64, 3]],
     [-1, 1, 'Conv', [128, 3, 2]],
     [-1, 3, 'BottleneckCSP', [128]],
     [-1, 1, 'Conv', [256, 3, 2]],
     [-1, 9, 'BottleneckCSP', [256]],
     [-1, 1, 'Conv', [512, 3, 2]],
     [-1, 9, 'BottleneckCSP', [512]],
     [-1, 1, 'Conv', [1024, 3, 2]],
     [-1, 1, 'SPP', [1024, [5, 9, 13]]],
     [-1, 3, 'BottleneckCSP', [1024, False]]],
    
    'head':
    
    [[-1, 1, 'Conv', [512, 1, 1]],
     [-1, 1, 'nn.Upsample', ['None', 2, 'nearest']],
     [[-1, 6], 1, 'Concat', [1]],
     [-1, 3, 'BottleneckCSP', [512, False]],
     [-1, 1, 'Conv', [256, 1, 1]],
     [-1, 1, 'nn.Upsample', ['None', 2, 'nearest']],
     [[-1, 4], 1, 'Concat', [1]],
     [-1, 3, 'BottleneckCSP', [256, False]],
     [-1, 1, 'Conv', [256, 3, 2]],
     [[-1, 14], 1, 'Concat', [1]],
     [-1, 3, 'BottleneckCSP', [512, False]],
     [-1, 1, 'Conv', [512, 3, 2]],
     [[-1, 10], 1, 'Concat', [1]],
     [-1, 3, 'BottleneckCSP', [1024, False]],
     [[17, 20, 23], 1, 'Detect', ['nc', 'anchors']]]
}

save_yaml(yolov5s, yolov5s_parameters)

In [None]:
yolov5_finetune_hyperparameters = {
    
    'lr0': 0.001,
    'lrf': 0.001,
    'momentum': 0.843,
    'weight_decay': 0.00036,
    'warmup_epochs': 2.0,
    'warmup_momentum': 0.5,
    'warmup_bias_lr': 0.05,
    'box': 0.0296,
    'cls': 0.243,
    'cls_pw': 0.631,
    'obj': 0.301,
    'obj_pw': 0.911,
    'iou_t': 0.2,
    'anchor_t': 2.91,
    'fl_gamma': 0.0,
    'hsv_h': 0,
    'hsv_s': 0,
    'hsv_v': 0,
    'degrees': 0.373,
    'translate': 0.245,
    'scale': 0.1,
    'shear': 0.0,
    'perspective': 0.0,
    'flipud': 0.5,
    'fliplr': 0.5,
    'mixup': 0.0,
    'mosaic': 1.0,
    'copy_paste': 0.0
}

save_yaml(hyperparameters_finetune, yolov5_finetune_hyperparameters)

In [None]:
# Convert datas for yolo

In [None]:
# Copy images/annotations to yolo folders

In [None]:
train_annotations = open_json(train_annotations_path)
val_annotations = open_json(val_annotations_path)

In [None]:
for annotations_set in [(train_annotations, train_images_folder, train_annotations_folder), (val_annotations, val_images_folder, val_annotations_folder)]:
    for data_id, annotation in annotations_set[0].items():
        label_file = open(annotations_set[2] + data_id + '.txt', 'w')
        
        for idx in range(len(annotation['label'])):
            if annotation['label'][idx] != 'background':
                c = label_to_int[annotation['label'][idx]]
                x, y, w, h = annotation['coordinates'][idx]
                label = f'{c} {x} {y} {w} {h}\n'
            else :
                label = ''
                                
            shutil.copyfile(
                get_image_path(data_id), 
                annotations_set[1] + data_id + '.bmp'
            )
            label_file.write(label)
        label_file.close()

# Train (from yolov5 Psycle lib)

In [None]:
# python yolov5/train.py --weights yolov5/weights/yolov5s.pt --cfg yolov5/data/yolov5s.yaml --data yolov5/data/dataset.yaml --hyp yolov5/data/hyp.finetune.yaml --epochs 200 --batch-size 8 --imgsz 640 --adam



# Post weights

In [None]:
preprocessing = {}

In [None]:
augmentations = {}

In [None]:
order = []

In [None]:
hyperparameters = {
    'seed':seed
}

In [None]:
with open(train_annotations_path) as f:
    train_annotations = json.load(f)
training_set = list(train_annotations.keys())

In [None]:
with open(val_annotations_path) as f:
    val_annotations = json.load(f)
val_set = list(val_annotations.keys())

In [None]:
post_dict = {
    'name':'',   # TODO
    'analysis_id':analysis_id, 
    'hyperparameters': yolov5_finetune_hyperparameters, 
    'labels_output_mapping': label_to_int, 
    'train_set': training_set, 
    'validation_set':val_set,
    'metadata':{
        'model':'YOLOV5S', 
    }
}

weights_path = None  # TODO
assert weights_path is not None
weights_choice = ""  # TODO
assert weights_choice is not None

In [None]:
training_id = interactAPI.post_training(post_dict)
print(training_id)
if interactAPI.post_weights(weights_path, training_id['id']) == 204:
    print('Sucess')
interactAPI.put_training_weights_choice('', training_id['id'])  # TODO