In [3]:
import sys
import pickle
import glob
import shutil
import numpy as np
import pandas as pd

import torch
from torch import nn
# from torchvision.models.vgg.V import VGG19_Weights
import warnings 
import os
warnings.filterwarnings("ignore")
import cv2
from mat4py import loadmat

import matplotlib.pyplot as plt

%matplotlib inline

### define different paths

In [7]:
base_path = os.path.dirname(os.path.abspath('__file__'))
# data_path = os.path.join(base_path, 'data','data_for_vgg')
data_path = os.path.join(base_path, 'data','data_for_vgg_augmented')
train_path = os.path.join(data_path,'train')
test_path = os.path.join(data_path,'test')
labels_file = os.path.join(base_path,'data')

### load additional data (classes names, labels, pre-defined data splits)

In [8]:
with open('classes.txt', 'rb') as f:
    classes = pickle.load(f)

labels = loadmat(labels_file+'\\imagelabels.mat')
data_splits = loadmat(data_path+'\\setid.mat')

len(data_splits['trnid']), len(data_splits['valid']), len(data_splits['tstid'])

## set the data

In [None]:
labels = np.loadtxt('labels.csv', dtype="int") # created from original "imagelabels.mat" file
labels

In [None]:
label_name_dict = {num:name for num, name in enumerate(classes)}

### split images to different folders (by class name) - as prep for pytorch ImageFolder method 

In [None]:
for label, file_path in zip(labels, glob.glob('C:\\Users\\yaron\\projects\\BGU-lior_rokach\\exe3\\data\\images\\*.jpg')):
    file_name = file_path.split('\\')[-1]
    dest = os.path.join(vgg_data_folder, label_name_dict[label-1],file_name)
    shutil.copyfile(file_path,dest)               
    

### train-test split - 50%

In [None]:
def split_data(train_path, test_path):
    print('splitting the data')
    test_ratio = 0.5
    images_classes_dict = {}
    num_of_rec = []
    classes = []
    for f_class in os.listdir(train_path):
        class_path = os.path.join(train_path, f_class)
        files = [entry for entry in os.listdir(class_path) if entry.startswith('image')]
        num_of_records = len(files)   
        num_of_files_to_be_moved = int(np.round(num_of_records * test_ratio))

        files_to_be_moved = np.random.choice(files, size=num_of_files_to_be_moved, replace=False)

        for f in files_to_be_moved:    
            src_path = os.path.join(class_path, f)
            dst_path = os.path.join(test_path, f_class)
            if not os.path.exists(dst_path):
                os.mkdir(dst_path)
            num_of_files_in_dst = len([entry for entry in os.listdir(dst_path)])
            if num_of_files_in_dst < num_of_files_to_be_moved:
                shutil.move(src_path, os.path.join(dst_path),f)


In [None]:
def reset_data(train_path, test_path):
    print('re-setting the data')
    for f_class in os.listdir(test_path):
        class_path = os.path.join(test_path, f_class)
        files = [entry for entry in os.listdir(class_path)]
        
        for f in files:    
            src_path = os.path.join(class_path, f)
            dst_path = os.path.join(train_path, f_class, f)

            shutil.move(src_path, os.path.join(dst_path),f)
            

### Augmenting the data

In [None]:
import skimage.io as io
from skimage.transform import rotate, AffineTransform, warp
from skimage.util import random_noise
from skimage.filters import gaussian

### utility functions

In [None]:
def cut_out(img):
    num_of_cutoff_cubes = np.random.randint(2,10)
    for _ in range(num_of_cutoff_cubes):
        witdh = 70
        hight = 70
        x_start = np.random.randint(0,img.shape[0]-witdh)
        y_start = np.random.randint(0,img.shape[1]-hight)
        img[x_start:x_start+witdh,y_start:y_start+hight] = 0
    return img 

In [None]:
def rotate_image(img):
    angle = np.random.randint(30,90)
    return rotate(img, angle=angle, mode = 'wrap', preserve_range= True).astype('uint8')

In [None]:
def tranlation(img):
    percent_of_x = np.random.randint(10, 30)
    percent_of_y = np.random.randint(10, 30)
    num_of_pixs_x = int(img.shape[0] * percent_of_x / 100)
    num_of_pixs_y = int(img.shape[1] * percent_of_y / 100)
    transform = AffineTransform(translation=(num_of_pixs_x,num_of_pixs_y))
    return warp(cv_img,transform,mode='wrap',preserve_range=True).astype('uint8')

In [None]:
def save_img_with_prefix(file_name, prefix, path, img):
    file_name = prefix + '_' + file_name
    cv2.imwrite(os.path.join(path,file_name), img)
    

### create augment script (produces 6 addtional images from each image)
* i could mix different changes, but i choose to perform them seperatly

In [None]:
def augment_images(train_path):
    for class_name in os.listdir(train_path):
        class_path = os.path.join(train_path,class_name)
        if os.path.isdir(class_path):
            for file_name in os.listdir(class_path):

                # load original train image
                cv_img = cv2.imread(os.path.join(class_path, file_name))

                # rotate image
                rotated = rotate_image(cv_img)
                save_img_with_prefix(file_name, 'rotated', class_path, rotated)

                # translation 
                wrapShift = tranlation(cv_img)
                save_img_with_prefix(file_name, 'wrapShift', class_path, wrapShift)

                # flip image left right 
                flipLR = np.fliplr(cv_img)
                save_img_with_prefix(file_name, 'flipLR', class_path, flipLR)

                # flip image upside down 
                flipUD = np.flipud(cv_img)
                save_img_with_prefix(file_name, 'flipUD', class_path, flipUD)

                # add random noise to the image
                sigma=0.155
                noisyRandom = random_noise(cv_img,var=sigma**2,)
                noisyRandom = (noisyRandom *255 ).astype('uint8')
                save_img_with_prefix(file_name, 'gaussian_noise', class_path, noisyRandom)

                # cutout
                cutout_img = cut_out(cv_img)
                save_img_with_prefix(file_name, 'cut_out', class_path, cutout_img)

### function to delete augmented images

In [None]:
def delete_all_augmented(train_path):
    print(f'deleting all augmented files')
    for class_name in os.listdir(train_path):
        class_path = os.path.join(train_path,class_name)
        if os.path.isdir(class_path):
            for file_name in os.listdir(class_path):
                if not file_name.startswith('image'):
                    image_path = os.path.join(class_path,file_name)
                    os.remove(image_path)

## prepare the data for yolo "detection" format (with Bounding Box)

In [None]:
data_path = os.path.join(base_path, 'data','data_for_yolo')
seg_path = os.path.join(data_path, 'segmim')

### utilities functions

In [None]:
def draw_rect(img, cords):
    img = cv2.rectangle(img, (cords[0],cords[1]), (cords[2],cords[3]), (0,255,0))
    cv2.imshow('image',img)
    key = cv2.waitKey(0)
    cv2.destroyAllWindows()
    
def get_file_name_file_num(f):
    f_name =  f.split('_')[1]
    f_name = f_name.split('.')[0]
    f_num = int(f_name)
    return f_num, 'image_' + f_name + '.txt'


def get_rec_cords(img):
    i_min = img.shape[0]
    j_min = img.shape[1]
    i_max = 0
    j_max = 0
    
    for i in range(img.shape[0]):
        for j in range(img.shape[1]): 
            if any(img[i,j] != np.array([254,0,0],dtype='uint8')):
                if i > i_max:
                    i_max = i
                if i < i_min:
                    i_min = i
                if j > j_max:
                    j_max = j
                if j < j_min:
                    j_min = j

    return j_min, i_min, j_max, i_max

def change_to_yolo_format(img, j_min, i_min, j_max, i_max):
    j_min = j_min / img.shape[1]
    j_max = j_max / img.shape[1]
    i_min = i_min / img.shape[0]
    i_max = i_max / img.shape[0]
    
    j_center = (j_max + j_min) / 2
    i_center = (i_max + i_min) / 2
    
    witdh = j_max - j_min
    height = i_max - i_min
    
    return j_center, i_center, witdh, height

### create label txt files (with BB coordinates) in yolo format 
* while documenting list of files without usfull segemnts (no BB)

#### this phase was eventually truncated due to the fact that there is a "yolov5 classification only" option

#### create label txt files with Bounding Box

In [None]:
bad_files_list = []
for num,f in enumerate(os.listdir(seg_path)):
   
    f_num, f_name  = get_file_name_file_num(f)

    img = cv2.imread(os.path.join(seg_path,f))

    j_min, i_min, j_max, i_max = get_rec_cords(img)

    j_center, i_center, witdh, height = change_to_yolo_format(img, j_min, i_min, j_max, i_max)
    
    if (witdh <= 0) | (height <= 0):
        bad_files_list.append(f_name)

    line = f"{labels[f_num]-1} {j_center} {i_center} {witdh} {height}"

    with open(os.path.join(labels_path,f_name),'w') as f:
        f.write(line)
    
    print('image_' + f.split('_')[1])


### remove "bad" images (with no segments, and therefor no BB)

In [None]:
images_path = os.path.join(data_path, 'images')
labels_path = os.path.join(data_path, 'labels')

data_path = os.path.join(base_path, 'data','data_for_yolo')
no_seg_path = os.path.join(data_path,'images_with_no_segments')

In [None]:
for f_name in bad_files_list[1:]:
    src = os.path.join(labels_path, f_name)
    dst = os.path.join(no_seg_path, f_name)
    shutil.move(src, dst)

    f_name = f_name.split('.')[0] + '.jpg'
    src = os.path.join(images_path, f_name)
    dst = os.path.join(no_seg_path, f_name)
    shutil.move(src, dst)

### split the data to train/test

In [None]:
def split_data():
    print('splitting the data')
    test_ratio = 0.5
    
    base_path = os.path.dirname(os.path.abspath('__file__'))
    data_path = os.path.join(base_path, 'data','data_for_yolo')
    
    train_path = os.path.join(data_path,'train')
    test_path = os.path.join(data_path,'test')
    
    
    files_train = [entry for entry in os.listdir(os.path.join(train_path, 'images'))]
    num_of__train_samples = len(files_train)


    num_of_files_to_be_moved = int(np.round(num_of__train_samples * test_ratio))
    files_to_be_moved = np.random.choice(files_train, size=num_of_files_to_be_moved, replace=False)
    labels_to_be_removed = [get_file_name_file_num(f)[1] for f in files_to_be_moved]

    files_test = [entry for entry in os.listdir(os.path.join(test_path, 'images'))]
    num_of_test_samples = len(files_test)

    if num_of_test_samples < num_of_files_to_be_moved:
        for file in files_to_be_moved:
            _, label_to_be_removed = get_file_name_file_num(file)

            src_file = os.path.join(train_path, 'images', file)
            dst_file = os.path.join(test_path, 'images', file)
            shutil.move(src_file, dst_file)
            src_file = os.path.join(train_path, 'labels', label_to_be_removed)
            dst_file = os.path.join(test_path, 'labels', label_to_be_removed)
            shutil.move(src_file, dst_file)

In [None]:
def reset_data():
    print('resetting the data')
    base_path = os.path.dirname(os.path.abspath('__file__'))
    data_path = os.path.join(base_path, 'data','data_for_yolo')
    
    train_path = os.path.join(data_path,'train')
    test_path = os.path.join(data_path,'test')
    
    for file in os.listdir(os.path.join(test_path,'images')):
        _, label_file = get_file_name_file_num(file)

        src_file = os.path.join(test_path, 'images', file)
        dst_file = os.path.join(train_path, 'images', file)
        shutil.move(src_file, dst_file)
        src_file = os.path.join(test_path, 'labels', label_file)
        dst_file = os.path.join(train_path, 'labels', label_file)
        shutil.move(src_file, dst_file)
    
    

### making list of classes for yolo yaml format (to be added to yaml file)

In [9]:
print(str({int: clas for int, clas in enumerate(classes, start=0)}).replace("'","").replace(",","\n"))

{0: pink primrose
 1: hard-leaved pocket orchid
 2: canterbury bells
 3: sweet pea
 4: english marigold
 5: tiger lily
 6: moon orchid
 7: bird of paradise
 8: monkshood
 9: globe thistle
 10: snapdragon
 11: "colts foot"
 12: king protea
 13: spear thistle
 14: yellow iris
 15: globe-flower
 16: purple coneflower
 17: peruvian lily
 18: balloon flower
 19: giant white arum lily
 20: fire lily
 21: pincushion flower
 22: fritillary
 23: red ginger
 24: grape hyacinth
 25: corn poppy
 26: prince of wales feathers
 27: stemless gentian
 28: artichoke
 29: sweet william
 30: carnation
 31: garden phlox
 32: love in the mist
 33: mexican aster
 34: alpine sea holly
 35: ruby-lipped cattleya
 36: cape flower
 37: great masterwort
 38: siam tulip
 39: lenten rose
 40: barbeton daisy
 41: daffodil
 42: sword lily
 43: poinsettia
 44: bolero deep blue
 45: wallflower
 46: marigold
 47: buttercup
 48: oxeye daisy
 49: common dandelion
 50: petunia
 51: wild pansy
 52: primula
 53: sunflower
 54