In [4]:
import os
import random
import math
from datetime import datetime
from collections import Counter
import pandas as pd
import numpy as np


# import cv2
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from sklearn.model_selection import train_test_split
import xml.etree.ElementTree as ET

import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from torchvision import models

In [12]:
import cv2

ModuleNotFoundError: No module named 'cv2'

In [6]:
images_path = Path('../images')
anno_path = Path('../annotations')


def filelist(root, file_type):
    """Функция возвращает полностью квалифицированный список файлов в директории"""
    return [os.path.join(directory_path, f) for directory_path, directory_name, 
            files in os.walk(root) for f in files if f.endswith(file_type)]


def generate_train_df (anno_path):
    annotations = filelist(anno_path, '.xml')
    anno_list = []
    for anno_path in annotations:
        root = ET.parse(anno_path).getroot()
        anno = {}
        anno['filename'] = Path(str(images_path) + '/'+ root.find("./filename").text)
        anno['width'] = root.find("./size/width").text
        anno['height'] = root.find("./size/height").text
        anno['class'] = root.find("./object/name").text
        anno['xmin'] = int(root.find("./object/bndbox/xmin").text)
        anno['ymin'] = int(root.find("./object/bndbox/ymin").text)
        anno['xmax'] = int(root.find("./object/bndbox/xmax").text)
        anno['ymax'] = int(root.find("./object/bndbox/ymax").text)
        anno_list.append(anno)
    return pd.DataFrame(anno_list)

In [7]:
df_train = generate_train_df(anno_path)
df_train

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,../images/road712.png,300,400,speedlimit,98,140,139,182
1,../images/road706.png,300,400,speedlimit,136,92,177,135
2,../images/road289.png,300,400,stop,61,140,146,227
3,../images/road538.png,300,400,speedlimit,115,169,149,205
4,../images/road510.png,300,400,speedlimit,89,201,133,245
...,...,...,...,...,...,...,...,...
872,../images/road535.png,300,400,speedlimit,100,254,180,334
873,../images/road284.png,300,400,speedlimit,111,133,165,187
874,../images/road290.png,300,400,speedlimit,105,157,171,224
875,../images/road723.png,300,400,speedlimit,115,185,160,230


In [8]:
df_train['class'].value_counts()

speedlimit      652
crosswalk        88
stop             76
trafficlight     61
Name: class, dtype: int64

In [9]:
class_dict = {'speedlimit': 0, 'stop': 1, 'crosswalk': 2, 'trafficlight': 3}
df_train['class'] = df_train['class'].apply(lambda x:  class_dict[x])

print(df_train.shape)
df_train.head()

(877, 8)


Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,../images/road712.png,300,400,0,98,140,139,182
1,../images/road706.png,300,400,0,136,92,177,135
2,../images/road289.png,300,400,1,61,140,146,227
3,../images/road538.png,300,400,0,115,169,149,205
4,../images/road510.png,300,400,0,89,201,133,245


In [10]:
"""
Purpose: Reads an image from the given file path and converts it from BGR to RGB color space.
How it works: Uses OpenCV's imread function to load the image and cvtColor to convert the color space. 
OpenCV loads images in BGR format by default, but for many applications (and especially in Python), 
RGB is the preferred format.
"""
def read_image(path):
    return cv2.cvtColor(cv2.imread(str(path)), cv2.COLOR_BGR2RGB)


def create_mask(bb, x):
    """Создаем маску для bounding box'a такого же шейпа как и изображение"""
    rows,cols,*_ = x.shape
    Y = np.zeros((rows, cols))
    bb = bb.astype(np.int)
    Y[bb[0]:bb[2], bb[1]:bb[3]] = 1.
    return Y


def mask_to_bb(Y):
    """Конвертируем маску Y в bounding box'a, принимая 0 как фоновый ненулевой объект """
    cols, rows = np.nonzero(Y)
    if len(cols) == 0: 
        return np.zeros(4, dtype=np.float32)
    top_row = np.min(rows)
    left_col = np.min(cols)
    bottom_row = np.max(rows)
    right_col = np.max(cols)
    return np.array([left_col, top_row, right_col, bottom_row], dtype=np.float32)


def create_bb_array(x):
    """Генерируем массив bounding box'a из столбца train_df"""
    return np.array([x[5],x[4],x[7],x[6]])


def resize_image_bb(read_path, write_path, bb, sz):
    """Ресайзим изображение и его bounding box и записываем изображение в новый путь"""
    im = read_image(read_path)
    im_resized = cv2.resize(im, (sz, sz))
    Y_resized = cv2.resize(create_mask(bb, im), (sz, sz))
    new_path = str(write_path/read_path.parts[-1])
    cv2.imwrite(new_path, cv2.cvtColor(im_resized, cv2.COLOR_RGB2BGR))
    return new_path, mask_to_bb(Y_resized)