In [2]:
import os

import pandas as pd

from prepare_data.statistics import calculate_stats
from prepare_data.utils import create_folder

from sklearn.model_selection import train_test_split
import shutil
from PIL import Image


In [22]:
data_dir = '../data/prepare_data/images'
labels_dir = '../data/prepare_data/labels'
save_dir = '../data/yolov_data'

## Calculate Statistics for sep train - test - val

In [23]:
df = pd.DataFrame(columns=['file_name', 'path_to_image', 'path_to_label'])
for root, dirs, files in os.walk(data_dir):
    for file in files:
        path_to_image = os.path.join(root, file.replace('_label.', '.'))
        path_to_label = os.path.join(labels_dir, file.replace('_label.', '.').replace('jpg', 'txt'))
        df = pd.concat((df, pd.DataFrame({'file_name': file.split('.')[0],
                                          'path_to_image': path_to_image,
                                          'path_to_label': path_to_label}, index=[0])), ignore_index=True)

In [24]:
df

Unnamed: 0,file_name,path_to_image,path_to_label
0,5 (102),../data/prepare_data/images/5 (102).jpg,../data/prepare_data/labels/5 (102).txt
1,7 (34),../data/prepare_data/images/7 (34).jpg,../data/prepare_data/labels/7 (34).txt
2,12 (22),../data/prepare_data/images/12 (22).jpg,../data/prepare_data/labels/12 (22).txt
3,6 (27),../data/prepare_data/images/6 (27).jpg,../data/prepare_data/labels/6 (27).txt
4,8 (30),../data/prepare_data/images/8 (30).jpg,../data/prepare_data/labels/8 (30).txt
...,...,...,...
1157,11 (21),../data/prepare_data/images/11 (21).jpg,../data/prepare_data/labels/11 (21).txt
1158,7 (17),../data/prepare_data/images/7 (17).jpg,../data/prepare_data/labels/7 (17).txt
1159,7 (143),../data/prepare_data/images/7 (143).jpg,../data/prepare_data/labels/7 (143).txt
1160,8 (47),../data/prepare_data/images/8 (47).jpg,../data/prepare_data/labels/8 (47).txt


In [25]:
stat = calculate_stats(df)

In [26]:
stat.groupby('video_id').sum()[['pat0', 'pat1', 'pat2', 'pat3', 'pat4']]

Unnamed: 0_level_0,pat0,pat1,pat2,pat3,pat4
video_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,87,2,7,0,0
2,62,0,0,0,0
3,21,0,18,9,10
4,351,56,124,45,26
5,393,72,198,48,51
6,0,11,97,19,0
7,568,134,299,68,104
8,0,0,183,72,0
9,0,19,53,26,0
11,0,0,84,0,0


Труба - 12, 11, 9, 8, 6, 

In [27]:
tube = [12, 11, 9, 8, 6]
plane = [1, 3, 4, 5, 7]

In [28]:
train_tube, test_tube = train_test_split(tube, test_size=0.2, random_state=43, shuffle=True)
train_plane, test_plane = train_test_split(plane, test_size=0.2, random_state=43, shuffle=True)

train = train_plane + train_tube
test = test_plane + test_tube


In [29]:
test

[5, 8]

In [30]:
stat[stat['video_id'].isin(test)].groupby('video_id').sum()[['pat0', 'pat1', 'pat2', 'pat3', 'pat4']].sum()


pat0    393
pat1     72
pat2    381
pat3    120
pat4     51
dtype: object

In [31]:
stat[stat['video_id'].isin(train)].groupby('video_id').sum()[['pat0', 'pat1', 'pat2', 'pat3', 'pat4']].sum()
    

pat0    1027
pat1     316
pat2     682
pat3     167
pat4     154
dtype: object

In [32]:
stat['train_test'] = stat['video_id'].map(lambda x: 'train' if x in train else 'test')

In [33]:
stat

Unnamed: 0,path_to_label,path_to_image,name,video_id,frame_id,pat0,pat1,pat2,pat3,pat4,train_test
0,../data/prepare_data/labels/5 (102).txt,../data/prepare_data/images/5 (102).jpg,5 (102),5,102,0,1,0,0,0,test
1,../data/prepare_data/labels/7 (34).txt,../data/prepare_data/images/7 (34).jpg,7 (34),7,34,4,0,1,3,0,train
2,../data/prepare_data/labels/12 (22).txt,../data/prepare_data/images/12 (22).jpg,12 (22),12,22,0,1,0,0,0,train
3,../data/prepare_data/labels/6 (27).txt,../data/prepare_data/images/6 (27).jpg,6 (27),6,27,0,1,0,0,0,train
4,../data/prepare_data/labels/8 (30).txt,../data/prepare_data/images/8 (30).jpg,8 (30),8,30,0,0,1,0,0,test
...,...,...,...,...,...,...,...,...,...,...,...
1157,../data/prepare_data/labels/11 (21).txt,../data/prepare_data/images/11 (21).jpg,11 (21),11,21,0,0,1,0,0,train
1158,../data/prepare_data/labels/7 (17).txt,../data/prepare_data/images/7 (17).jpg,7 (17),7,17,7,1,0,2,0,train
1159,../data/prepare_data/labels/7 (143).txt,../data/prepare_data/images/7 (143).jpg,7 (143),7,143,4,0,2,0,0,train
1160,../data/prepare_data/labels/8 (47).txt,../data/prepare_data/images/8 (47).jpg,8 (47),8,47,0,0,1,2,0,test


In [38]:
path_to_save_yolov = r'../data/yolov_data/data_all_resize'

create_folder(os.path.join(path_to_save_yolov))
create_folder(os.path.join(path_to_save_yolov, "labels"))
create_folder(os.path.join(path_to_save_yolov, "labels", "train"))
create_folder(os.path.join(path_to_save_yolov, "labels", "test"))
create_folder(os.path.join(path_to_save_yolov, "labels", "val"))

create_folder(os.path.join(path_to_save_yolov, "images"))
create_folder(os.path.join(path_to_save_yolov, "images", "train"))
create_folder(os.path.join(path_to_save_yolov, "images", "test"))
create_folder(os.path.join(path_to_save_yolov, "images", "val"))


In [39]:
def resize_image(input_path, output_path, width=640):
    with Image.open(input_path) as img:
        # Вычисляем новое соотношение высоты
        ratio = width / float(img.size[0])
        height = int((float(img.size[1]) * float(ratio)))
        
        # Изменяем размер изображения
        resized_img = img.resize((width, height))
        
        # Сохраняем изображение
        resized_img.save(output_path)
        # print(f"Image saved at {output_path}")


In [40]:
def create_yolov_dataset(data: pd.DataFrame, path_to_save: str):
    for i, row in data.iterrows():
        train_test = row['train_test']
                
        name = row['name']
        
        path_to_label = row['path_to_label']
        path_to_image = path_to_label.replace('labels', 'images').replace('.txt', '.jpg')


        # shutil.copy(path_to_image, os.path.join(path_to_save, 'images', train_test, name + '.jpg'))
        resize_image(path_to_image, os.path.join(path_to_save, 'images', train_test, name + '.jpg'))
        shutil.copy(path_to_label, os.path.join(path_to_save, 'labels', train_test, name + '.txt'))
        
        if train_test == 'test':
            # shutil.copy(path_to_image, os.path.join(path_to_save, 'images', 'val', name + '.jpg'))
            resize_image(path_to_image, os.path.join(path_to_save, 'images', 'val', name + '.jpg'))
            shutil.copy(path_to_label, os.path.join(path_to_save, 'labels', 'val', name + '.txt'))



In [41]:
create_yolov_dataset(stat, path_to_save_yolov)