In [1]:
import os
import sys

from pathbook.pathbook import *
labels = ['klikun', 'maliy', 'shipun']

import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt

### annotating initial dataset for detection and segmentation

In [15]:
# clearing datasets

!rm -f ../data/train/*
!rm -f ../data/val/*

In [16]:
# prepare directories for classification

!mkdir ../data/train/klikun
!mkdir ../data/train/maliy
!mkdir ../data/train/shipun

!mkdir ../data/val/klikun
!mkdir ../data/val/maliy
!mkdir ../data/val/shipun

In [19]:
# mode = 'seg'
mode = 'cls'
# mode = 'det'

In [21]:
for _ in [path_val_dataset, path_train_dataset]:
    for img in tqdm(os.listdir(_)):
        if img[-4:]=='.jpg':
            label = img.split('-')[1]
            cv2.imwrite(os.path.join(_,label,img), cv2.imread(os.path.join(_,img)))

100%|██████████| 2711/2711 [00:17<00:00, 152.22it/s]
100%|██████████| 15357/15357 [01:44<00:00, 147.12it/s]


In [18]:
columns=['set','path','label','x_min','y_min','x_max','y_max']
train_array = []
val_array = []

np.random.seed(17)
val_p = 0.15 

# iterate through the initial formated for classification dataset
dataset = path_initial_train_dataset
for label_idx, label in enumerate(labels):
    mask_dir = os.path.join(dataset, label, 'masks')
    image_dir = os.path.join(dataset, label, 'images')

    for file_idx, mask in enumerate(tqdm(os.listdir(mask_dir))):
        impath = os.path.join(image_dir, mask.replace('.png','.jpg'))
        if not os.path.exists(impath):
            # print(impath)
            continue # drop masks without images

        image = cv2.imread(impath)
        mask = cv2.imread(os.path.join(mask_dir, mask))

        h, w, _ = image.shape
        h_, w_, _ = mask.shape
        if not (h == h_ and w == w_):
            continue # drop images with uncorrest masks

        save_dir=''
        val = (np.random.random() < val_p)
        if val:
            save_dir = path_val_dataset
        else:
            save_dir = path_train_dataset

        # name format: "[scource: initial|extra|augmented]-[label: klikun|maliy|shipun]-[ingroup index]]"
        name = f'initial-{label}-{file_idx}'
        path = os.path.join(save_dir, name)
        if mode == 'cls': # putting images in proper directories for classification task
            path = os.path.join(save_dir, label, name)

        if not os.path.exists(path+',jpg'): # prevent resouceful rewritting images
            cv2.imwrite(path+'.jpg', image)

        if mode == 'cls': 
            continue # no special files are required for classification

        with open(path+'.txt','w') as labelfile:

            imgray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
            colors = np.unique(imgray)[1:]
            for color in colors:
                #choose one color (one object) from mask and find contours
                _, thresh = cv2.threshold(imgray, color, color, type=cv2.THRESH_TOZERO_INV)
                _, thresh = cv2.threshold(thresh, color-1, color, type=cv2.THRESH_TOZERO)
                contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
                
                superseg = []
                for contour in contours:
                    seg = (contour[:,0]/[w,h]).flatten().tolist()
                    superseg += seg
                    if mode=='seg':
                        print(0,*seg,file=labelfile) # save each spot for segmentation rask

                # ciunters coords
                x = superseg[0::2]
                y = superseg[1::2]

                if mode=='det':
                    print(label_idx,min(x),min(y),max(x),max(y)) # save bboxes for detection

                if val:
                    val_array.append(['val',path+'.jpg',label,min(x),min(y),max(x),max(y)])
                else:
                    train_array.append(['train',path+'.jpg',label,min(x),min(y),max(x),max(y)])

if mode!='cls': #save annotations for detection
    pd.DataFrame(train_array, columns=columns).to_csv(path_train_annotation,index=False)
    pd.DataFrame(val_array, columns=columns).to_csv(path_val_annotation,index=False)

 71%|███████   | 2152/3025 [00:51<00:16, 52.99it/s]

/Users/samedi/Desktop/Минприроды/klikun/images/original (60).jpg


100%|██████████| 3025/3025 [01:11<00:00, 42.41it/s]
100%|██████████| 3002/3002 [00:46<00:00, 63.93it/s]
100%|██████████| 3011/3011 [01:44<00:00, 28.84it/s]
