In [1]:
%load_ext watermark
%watermark

Last updated: 2022-08-27T15:17:55.153552+03:00

Python implementation: CPython
Python version       : 3.10.4
IPython version      : 8.4.0

Compiler    : GCC 7.5.0
OS          : Linux
Release     : 5.15.0-46-generic
Machine     : x86_64
Processor   : x86_64
CPU cores   : 8
Architecture: 64bit



In [2]:
import time
notebookstart= time.time()

In [3]:
import torch

In [4]:
import os
from typing import List, Tuple, Optional

import pandas as pd
import numpy as np

from PIL import Image
from pillow_heif import register_heif_opener
register_heif_opener() # for using Image.open for .heic without changes

from tqdm.auto import tqdm
tqdm.pandas()

In [5]:
#import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

In [6]:
%matplotlib inline

In [7]:
%watermark --iversions

numpy     : 1.23.2
pandas    : 1.4.3
torch     : 1.12.1
seaborn   : 0.11.2
matplotlib: 3.5.3
PIL       : 9.2.0



In [8]:
#import skimage
#print(skimage.__version__)

Блок для воспроизводимости результата

In [9]:
# seed the RNG for all devices (both CPU and CUDA)
#torch.manual_seed(1984)

#Disabling the benchmarking feature causes cuDNN to deterministically select an algorithm, 
#possibly at the cost of reduced performance.
#torch.backends.cudnn.benchmark = False

# for custom operators,
import random
random.seed(5986721)

# 
np.random.seed(62185)

#sklearn take seed from a line abowe

CB_RANDOMSEED  = 309487
XGB_RANDOMSEED = 56
LGB_RANDOMSEED = 874256

In [10]:
DIR_DATA = os.path.join(os.getcwd(), 'data')
DIR_SUBM = os.path.join(os.getcwd(), 'subm')
DIR_SUBM_TRAIN = os.path.join(os.getcwd(), 'subm', 'train')
DIR_DATA_TRAIN = os.path.join(DIR_DATA, 'train')
DIR_DATA_TEST  = os.path.join(DIR_DATA, 'test')

# Загружаем и подготавливаем данные

In [11]:
test_img_names  = set(os.listdir(DIR_DATA_TEST))
train_img_names = set(os.listdir(DIR_DATA_TRAIN))
len(test_img_names), len(train_img_names)

(521, 530)

In [12]:
train_labels_df = pd.read_csv(os.path.join(DIR_DATA, 'train.csv'), sep=';', index_col=None)

In [13]:
def get_car_center(inp_tensor: torch.Tensor) -> Tuple[int, int]:

    car_cntr = (int((inp_tensor[2].int().item() - inp_tensor[0].int().item())/2 + inp_tensor[0].int().item()),
                int((inp_tensor[3].int().item() - inp_tensor[1].int().item())/2 + inp_tensor[1].int().item())
        )
    
    return car_cntr

In [14]:
def get_center_dist(inp_center: Tuple[int, int], inp_point: Tuple[int, int]) -> float:
    
    return np.sqrt((inp_center[0] - inp_point[0])**2 + \
                   (inp_center[1] - inp_point[1])**2)

In [15]:
def determine_targ_car(inp_results, inp_img_cntr: Tuple[int, int]) -> int:
    
    min_dist = 1000000
    min_idx  = -1
    
    for el in range(inp_results.xyxy[0].shape[0]):
        # учитываем только машины
        if inp_results.xyxy[0][el][5].int().item() != 2:
            continue
            
        # минимальные габариты учитываемых машин
        # в противном случае иногда ближе к центру оказываются машины например 27х54
        h = inp_results.xyxy[0][el][3] - inp_results.xyxy[0][el][1]
        w = inp_results.xyxy[0][el][2] - inp_results.xyxy[0][el][0]
        if w < 200 or h < 200:
            continue
            
            
        car_cntr = get_car_center(inp_results.xyxy[0][el])
        cur_dist = get_center_dist(inp_img_cntr, car_cntr)
        if cur_dist < min_dist:
            min_dist = cur_dist
            min_idx = el

    return min_idx

In [23]:
def create_car_feeatures_yolo(inp_fnames: List[str], inp_dir: str, inp_model, use_centr: Optional[bool] = False) -> pd.DataFrame:
    
    ret_data = []

    for img_name in tqdm(inp_fnames): 

        img = Image.open(os.path.join(inp_dir, img_name))
        
        
        img = np.array(img)
        #results = model(np.array(img))
        results = inp_model(img)
    
        # найден хотя бы один объект
        if results.xyxy[0].shape != torch.Size([0, 6]):

            # искать ближайший к центру кадра объект?   
            if use_centr:
                img_cntr = (int(img.shape[1]/2), int(img.shape[0]/2))
                target_goal = determine_targ_car(results, img_cntr)
            else:
                target_goal = 0

            if target_goal < 0:
                print(f'wtf2, {img_name}   {results.xyxy[0].shape}')
                continue
                
            h = results.xyxy[0][target_goal][3] - results.xyxy[0][target_goal][1]
            w = results.xyxy[0][target_goal][2] - results.xyxy[0][target_goal][0]
            results = results.xyxy[0][target_goal].numpy().tolist() + [h.item(), w.item()]
            
            # позволим алгоритмам самим выбирать как заполнить пропуски
            ret_data.append([img_name] + results)
            
            
            #get_label_plate_features(img, results)
            
            
        else:
            print(f'wtf, {img_name}   {results.xyxy[0].shape}')
            # позволим алгоритмам самим выбирать как заполнить пропуски
            #results = [0, 0, 0, 0, 0, 0, 0, 0]

# позволим алгоритмам самим выбирать как заполнить пропуски
#        ret_data.append([img_name] + results)
        
    ret_data = pd.DataFrame(ret_data, columns = ['image_name', 'car_x_min', 'car_y_min', 'car_x_max', 'car_y_max', 'car_conf', 'car_class', 'car_h', 'car_w'])
        
    return ret_data

In [24]:
def create_license_plate_feeatures_yolo(inp_df: pd.DataFrame, inp_dir: str, inp_model, use_centr: Optional[bool] = False) -> pd.DataFrame:
    
    
    for el in inp_df.index:
        img = Image.open(os.path.join(inp_dir, img_name))
        img = np.array(img)
        #results = model(np.array(img))
        results = inp_model(img)
    
        # найден хотя бы один объект
        if results.xyxy[0].shape != torch.Size([0, 6]):
            pass   
            
            
            
    pass

In [25]:
#model = torch.hub.load('ultralytics/yolov5', 'yolov5x6')
model = torch.hub.load('ultralytics/yolov5', 'yolov5l')  #
_ = model.cpu()


#model_plate = torch.hub.load('ultralytics/yolov5', 'custom', path = './models/best_y5m_full_4e.pt', source='local')
#model_plate = torch.hub.load('ultralytics/yolov5', 'custom', path = './models_weights/best_y5l_full_3e.pt', force_reload=True)
#model_plate = torch.load('./models/last_y5m_full_4e.pt')

Using cache found in /home/v010ch/.cache/torch/hub/ultralytics_yolov5_master
YOLOv5 🚀 2022-8-27 Python-3.10.4 torch-1.12.1+cu102 CUDA:0 (NVIDIA GeForce GTX 1050 Ti, 4037MiB)

Fusing layers... 
YOLOv5l summary: 367 layers, 46533693 parameters, 0 gradients
Adding AutoShape... 


In [26]:
#model

In [27]:
train_df = create_car_feeatures_yolo(train_img_names, DIR_DATA_TRAIN, model, use_centr = True) #use_centr
#train_df = create_car_feeatures_resnet(train_img_names, DIR_DATA_TRAIN, model, use_centr = True) #use_centr
train_df = pd.merge(train_labels_df, train_df, how='left')
train_df.shape

  0%|          | 0/530 [00:00<?, ?it/s]

wtf2, img_1890.jpg   torch.Size([2, 6])


(530, 10)

In [29]:
test_df = create_car_feeatures_yolo(test_img_names, DIR_DATA_TEST, model, use_centr = True) #use_centr
test_df.shape

  0%|          | 0/521 [00:00<?, ?it/s]

wtf2, img_1889.jpg   torch.Size([3, 6])
wtf2, img_1888.jpg   torch.Size([2, 6])
wtf, img_2674.heic   torch.Size([0, 6])
wtf2, img_2571.jpg   torch.Size([1, 6])


(517, 9)

yolov5 не найдено машин:     
train:   
img_1890.jpg (w&h < 200),     

test: 
img_1888.jpg (w&h < 200), 
img_1889.jpg(only person)
img_2674.heic, 
img_2571.jpg (w&h < 200), 

In [31]:
sns.histplot(train_df, x='car_h')
plt.show()

  plt.show()


In [32]:
sns.histplot(train_df, x='car_w')
plt.show()

  plt.show()


In [34]:
train_df['car_class'].value_counts()

2.0    529
Name: car_class, dtype: int64

In [35]:
test_df['car_class'].value_counts()

2.0    517
Name: car_class, dtype: int64

In [38]:
for el in ['car_x_min', 'car_y_min', 'car_x_max', 'car_y_max', 'car_h', 'car_w']:
    train_df[f'log_{el}'] = train_df[el].apply(lambda x: np.log(x))
    test_df[f'log_{el}'] = test_df[el].apply(lambda x: np.log(x))

In [40]:
train_df.head(10)

Unnamed: 0,image_name,distance,car_x_min,car_y_min,car_x_max,...,log_car_y_min,log_car_x_max,log_car_y_max,log_car_h,log_car_w
0,img_1596.jpg,4.88,1661.429565,1172.557251,2280.754883,...,7.066942,7.732262,7.448874,6.301465,6.428631
1,img_1600.jpg,1.54,1125.149536,1184.862671,3012.469971,...,7.077382,8.010516,7.907194,7.33426,7.542913
2,img_1601.jpg,3.68,1573.088257,1192.642944,2434.746826,...,7.083927,7.797598,7.610886,6.718317,6.758859
3,img_1603.jpg,2.22,2603.172363,1277.686401,3310.659912,...,7.152806,8.104903,7.522853,6.349404,6.56172
4,img_1605.jpg,3.73,1612.237183,1101.504517,2481.318848,...,7.004432,7.816545,7.562426,6.712962,6.767437
5,img_1606.jpg,4.52,1627.065308,1009.970459,2351.432617,...,6.917676,7.76278,7.44773,6.559604,6.585299
6,img_1607.jpg,5.4,1716.459961,1277.694946,2344.615479,...,7.152813,7.759877,7.541665,6.408974,6.442788
7,img_1608.jpg,6.21,1778.528687,1193.292114,2315.450684,...,7.084471,7.74736,7.449093,6.263421,6.285853
8,img_1612.jpg,7.12,2275.804199,1428.866333,2694.506836,...,7.264637,7.89897,7.457901,5.71913,6.037161
9,img_1613.jpg,5.26,1684.911987,1191.849731,2306.685059,...,7.083262,7.743567,7.474751,6.347587,6.432575


In [41]:
train_df.to_csv(os.path.join(DIR_DATA, 'train_upd.csv'), index = False)
test_df.to_csv(os.path.join(DIR_DATA, 'test_upd.csv'), index = False)