In [1]:
import os
import sys

from pathbook.pathbook import *
labels = ['klikun', 'maliy', 'shipun']
labeldict = {'klikun':0, 'maliy':1, 'shipun':2}

import numpy as np
import pandas as pd
import cv2
from tqdm import tqdm
import matplotlib.pyplot as plt

### annotating initial dataset for detection and segmentation

In [14]:
# prepare directories for classification
!mkdir ../data/train
!mkdir ../data/train/klikun
!mkdir ../data/train/maliy
!mkdir ../data/train/shipun

!mkdir ../data/val
!mkdir ../data/val/klikun
!mkdir ../data/val/maliy
!mkdir ../data/val/shipun

mkdir: ../data/train: File exists
mkdir: ../data/val: File exists


In [12]:
df = pd.read_csv(path_test_annotation)
for img in df['path'].unique():
    with open(os.path.join(path_test_dataset,img.replace('.jpg','.txt')),'w') as txt:
        info = df[df['path']==img].copy()
        info['x']=(info.x_min+info.x_max)/2
        info['y']=(info.y_min+info.y_max)/2
        info['w']=(-info.x_min+info.x_max)
        info['h']=(-info.y_min+info.y_max)
        for idx, row in info.iterrows():
            print(labeldict[row.label],row.x,row.y,row.w,row.h,file=txt)

In [13]:
!rm -f ../data/test/*.xml

In [21]:
for _ in [path_val_dataset, path_train_dataset]:
    for img in tqdm(os.listdir(_)):
        if img[-4:]=='.jpg':
            label = img.split('-')[1]
            cv2.imwrite(os.path.join(_,label,img), cv2.imread(os.path.join(_,img)))

100%|██████████| 2711/2711 [00:17<00:00, 152.22it/s]
100%|██████████| 15357/15357 [01:44<00:00, 147.12it/s]


In [5]:
columns=['set','path','class_name','class_id','x_min','y_min','x_max','y_max']
train_array = []
val_array = []

np.random.seed(17)
val_p = 0.15 

# iterate through the initial formated for classification dataset
dataset = path_initial_train_dataset
for label_idx, label in enumerate(labels):
    mask_dir = os.path.join(dataset, label, 'masks')
    image_dir = os.path.join(dataset, label, 'images')

    for file_idx, mask in enumerate(tqdm(os.listdir(mask_dir))):
        impath = os.path.join(image_dir, mask.replace('.png','.jpg'))
        if not os.path.exists(impath):
            # print(impath)
            continue # drop masks without images

        image = cv2.imread(impath)
        mask = cv2.imread(os.path.join(mask_dir, mask))

        h, w, _ = image.shape
        h_, w_, _ = mask.shape
        if not (h == h_ and w == w_):
            continue # drop images with uncorrest masks

        save_dir=''
        save_cls_dir=''
        val = (np.random.random() < val_p)
        if val:
            save_dir = path_val_dataset
            save_cls_dir = path_cls_val_dataset
        else:
            save_dir = path_train_dataset
            save_cls_dir = path_cls_train_dataset

        # name format: "[scource: initial|extra|augmented]-[label: klikun|maliy|shipun]-[ingroup index]]"
        name = f'initial-{label}-{file_idx}'
        path = os.path.join(save_dir, label, name)

        # saving image and annotation for segmentation task
        cv2.imwrite(path+'.jpg', image)
        with open(path+'.txt','w') as labelfile:

            imgray = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
            colors = np.unique(imgray)[1:]
            for color_idx, color in enumerate(colors):
                #choose one color (one object) from mask and find contours
                _, thresh = cv2.threshold(imgray, color, color, type=cv2.THRESH_TOZERO_INV)
                _, thresh = cv2.threshold(thresh, color-1, color, type=cv2.THRESH_TOZERO)
                contours, _ = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
                
                superseg = []
                for contour in contours:
                    seg = (contour[:,0]/[w,h]).flatten().tolist()
                    superseg += seg
                    # if mode=='seg':
                    print(0,*seg,file=labelfile) # save each spot for segmentation task

                # contours coords
                x = superseg[0::2]
                x_min = min(x)
                x_max = max(x)

                y = superseg[1::2]
                y_min = min(y)
                y_max = max(y)

                w_ = x_max-x_min
                h_ = y_max-y_min

                # saving different crops for classification task
                cls_path = os.path.join(save_cls_dir, label, name+f"-{color_idx}")
                cls_a_path = os.path.join(save_cls_dir, label, f'augmented-{label}-{file_idx}-{color_idx}')
                cv2.imwrite(cls_a_path+'-1'+'.jpg', 
                            image[int(y_min*h) : int(y_max*h), 
                                  int(x_min*w) : int(x_max*w)])
                if w_ < 0.8 and h_ < 0.8:
                    cv2.imwrite(cls_path+'-1.2'+'.jpg', 
                            image[int(max((y_min-0.1*h_),0)*h) : int(min((y_max+0.1*h_),1)*h), 
                                  int(max((x_min-0.1*w_),0)*w) : int(min((x_max+0.1*w_),1)*w)])
                if w_ < 0.6 and h_ < 0.6:
                    cv2.imwrite(cls_a_path+'-1.4'+'.jpg', 
                            image[int(max((y_min-0.2*h_),0)*h) : int(min((y_max+0.2*h_),1)*h), 
                                  int(max((x_min-0.2*w_),0)*w) : int(min((x_max+0.2*w_),1)*w)])

                # if mode=='det':
                #     print(label_idx,min(x),min(y),max(x),max(y)) # save bboxes for detection

                if val:
                    val_array.append(['val',path+'.jpg',label,labeldict[label],x_min,y_min,x_max,y_max])
                else:
                    train_array.append(['train',path+'.jpg',label,labeldict[label],x_min,y_min,x_max,y_max])

# if mode!='cls': #save annotations for detection
pd.DataFrame(train_array, columns=columns).to_csv(path_train_annotation,index=False)
pd.DataFrame(val_array, columns=columns).to_csv(path_val_annotation,index=False)

100%|██████████| 3025/3025 [01:22<00:00, 36.70it/s]
100%|██████████| 3002/3002 [00:54<00:00, 54.61it/s]
100%|██████████| 3011/3011 [01:57<00:00, 25.70it/s]
