In [1]:
%load_ext watermark
%watermark

Last updated: 2022-08-27T15:37:14.231017+03:00

Python implementation: CPython
Python version       : 3.10.4
IPython version      : 8.4.0

Compiler    : GCC 7.5.0
OS          : Linux
Release     : 5.15.0-46-generic
Machine     : x86_64
Processor   : x86_64
CPU cores   : 8
Architecture: 64bit



In [2]:
import time
notebookstart= time.time()

In [3]:
import torch

from torchvision import models
from torchvision import transforms #import (Compose, Normalize, Resize, ToPILImage,
                                   # ToTensor)

In [4]:
import os
from typing import List, Tuple, Optional
from ast import literal_eval

import pandas as pd
import numpy as np

from PIL import Image
from pillow_heif import register_heif_opener
register_heif_opener() # for using Image.open for .heic without changes

from tqdm.auto import tqdm
tqdm.pandas()

In [5]:
import cv2

In [6]:
%watermark --iversions

numpy      : 1.23.2
cv2        : 4.6.0
torch      : 1.12.1
torchvision: 0.13.1
PIL        : 9.2.0
pandas     : 1.4.3



Блок для воспроизводимости результата

In [7]:
# seed the RNG for all devices (both CPU and CUDA)
#torch.manual_seed(1984)

#Disabling the benchmarking feature causes cuDNN to deterministically select an algorithm, 
#possibly at the cost of reduced performance.
#torch.backends.cudnn.benchmark = False

# for custom operators,
import random
random.seed(5986721)

# 
np.random.seed(62185)

#sklearn take seed from a line abowe

CB_RANDOMSEED  = 309487
XGB_RANDOMSEED = 56
LGB_RANDOMSEED = 874256

In [8]:
DIR_DATA = os.path.join(os.getcwd(), 'data')
DIR_SUBM = os.path.join(os.getcwd(), 'subm')
DIR_SUBM_TRAIN = os.path.join(os.getcwd(), 'subm', 'train')
DIR_DATA_TRAIN = os.path.join(DIR_DATA, 'train')
DIR_DATA_TEST  = os.path.join(DIR_DATA, 'test')

# Загрузка данных

In [9]:
train_df = pd.read_csv(os.path.join(DIR_DATA, 'train_upd.csv'))
test_df = pd.read_csv(os.path.join(DIR_DATA, 'test_upd.csv'))

In [10]:
#115 img_1824.jpg - белая машина с белой рамкой

In [13]:
def create_model(outputchannels: Optional[int] = 1, aux_loss: Optional[bool] = False, freeze_backbone: Optional[bool] = False):
    model = models.segmentation.deeplabv3_resnet101(
        pretrained=True, progress=True)#, aux_loss=aux_loss)

    if freeze_backbone is True:
        for p in model.parameters():
            p.requires_grad = False

    #model.classifier = models.segmentation.segmentation.DeepLabHead(
    model.classifier = models.segmentation.deeplabv3.DeepLabHead(
        2048, outputchannels)

    return model

In [24]:
# Prediction pipeline
def pred(inp_image: np.ndarray, inp_model):
    preprocess = transforms.Compose([transforms.ToTensor(),
                                     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                                    ])

    input_tensor = preprocess(inp_image)
    input_batch = input_tensor.unsqueeze(0)

    with torch.no_grad():
        output = inp_model(input_batch)['out'][0]
    
    return output
 

In [20]:
def get_plate_features_tuple(inp_row: str, inp_folder: str, inp_model) -> Tuple[int, int, int, int]:
    
    #print(inp_row)
    #return 0

    x_min = 0
    y_min = 0
    x_max = 0
    y_max = 0
    
    # найдена licence plate
    if inp_row.car_y_min > 0:

        #img = Image.open(os.path.join(DIR_DATA_TRAIN, tmp.image_name))
        img = Image.open(os.path.join(inp_folder, inp_row.image_name))
        img = np.array(img)
        sub_img = img[int(inp_row.car_y_min) : int(inp_row.car_y_max),
                      int(inp_row.car_x_min) : int(inp_row.car_x_max)
                     ]

        # Defining a threshold for predictions
        threshold = 0.1 # 0.1 seems appropriate for the pre-trained model

        # Predict
        output = pred(sub_img, inp_model)


        output = (output > threshold).type(torch.IntTensor)
        output_np = output.cpu().numpy()[0]

        # Extracting coordinates
        result = np.where(output_np > 0)
        coords = list(zip(result[0], result[1]))

        if len(coords) != 0:
            x_min = sorted(coords, key = lambda x: x[0])[0][0]
            y_min = sorted(coords, key = lambda x: x[1])[0][1]
            x_max = sorted(coords, key = lambda x: x[0])[-1][0]
            y_max = sorted(coords, key = lambda x: x[1])[-1][1]
    
    return (x_min, y_min, x_max, y_max)

In [21]:
def get_plate_features(inp_df: pd.DataFrame) -> pd.DataFrame:
    
    #inp_df.tmp = inp_df.tmp.apply(lambda x: (x))
    
    inp_df['plate_x_min'] = inp_df.tmp.apply(lambda x: float(x[0]))
    inp_df['plate_y_min'] = inp_df.tmp.apply(lambda x: float(x[1]))
    inp_df['plate_x_max'] = inp_df.tmp.apply(lambda x: float(x[2]))
    inp_df['plate_y_max'] = inp_df.tmp.apply(lambda x: float(x[3]))
    
    inp_df['plate_w'] = inp_df.plate_x_max - inp_df.plate_x_min
    inp_df['plate_h'] = inp_df.plate_y_max - inp_df.plate_y_min
    
    #inp_df.drop(['tmp'], axis = 0, inplace = True)
    
    return inp_df

In [22]:
#https://github.com/dennisbappert/pytorch-licenseplate-segmentation
# Load the model:
model = create_model()
checkpoint = torch.load('./models_weights/model_v2.pth', map_location='cpu')
model.load_state_dict(checkpoint['model'])
_ = model.eval()
_ = model.to('cpu')

In [25]:
print('before ', train_df.shape, test_df.shape)
train_df['tmp'] = train_df.progress_apply(lambda x: get_plate_features_tuple(x, DIR_DATA_TRAIN, model), axis = 1)
test_df['tmp'] = test_df.progress_apply(lambda x: get_plate_features_tuple(x, DIR_DATA_TEST, model), axis = 1)
print('after  ', train_df.shape, test_df.shape)

before  (530, 16) (517, 15)


  0%|          | 0/530 [00:00<?, ?it/s]

  0%|          | 0/517 [00:00<?, ?it/s]

after   (530, 17) (517, 16)


In [26]:
print('before ', train_df.shape, test_df.shape)
train_df = get_plate_features(train_df)
test_df = get_plate_features(test_df)
print('after  ', train_df.shape, test_df.shape)

before  (530, 17) (517, 16)
after   (530, 23) (517, 22)


In [29]:
for el in ['plate_w', 'plate_h']:
    train_df[f'log_{el}'] = train_df[el].apply(lambda x: np.log(x))
    test_df[f'log_{el}'] = test_df[el].apply(lambda x: np.log(x))

In [30]:
train_df.to_csv(os.path.join(DIR_DATA, 'train_upd.csv'), index = False)
test_df.to_csv(os.path.join(DIR_DATA,  'test_upd.csv'), index = False)

In [28]:
test_df.head()

Unnamed: 0,image_name,car_x_min,car_y_min,car_x_max,car_y_max,car_conf,car_class,car_h,car_w,log_car_x_min,...,log_car_y_max,log_car_h,log_car_w,tmp,plate_x_min,plate_y_min,plate_x_max,plate_y_max,plate_w,plate_h
0,img_2019.jpg,1534.937988,1122.445801,2258.129395,1753.614258,0.922432,2.0,631.168457,723.191406,7.336245,...,7.469434,6.447573,6.583674,"(349, 241, 419, 502)",349.0,241.0,419.0,502.0,70.0,261.0
1,img_2692.jpg,1778.514648,1269.690674,2090.799072,1552.986694,0.809172,2.0,283.296021,312.284424,7.483534,...,7.347935,5.646492,5.743914,"(146, 97, 174, 211)",146.0,97.0,174.0,211.0,28.0,114.0
2,img_2417.jpg,1622.07312,1102.892822,2346.047363,1695.42749,0.922528,2.0,592.534668,723.974243,7.39146,...,7.43569,6.384409,6.584756,"(51, 245, 396, 629)",51.0,245.0,396.0,629.0,345.0,384.0
3,img_2279.jpg,1669.779541,1172.031006,2143.335693,1597.427612,0.918139,2.0,425.396606,473.556152,7.420447,...,7.37615,6.053022,6.16027,"(307, 153, 348, 321)",307.0,153.0,348.0,321.0,41.0,168.0
4,img_2701.jpg,1309.571289,1094.303101,2470.923096,1935.219482,0.925146,2.0,840.916382,1161.351807,7.177455,...,7.567976,6.734492,7.05734,"(444, 392, 617, 1038)",444.0,392.0,617.0,1038.0,173.0,646.0
