In [None]:
!unzip /content/drive/MyDrive/webPages.zip

In [1]:
import pandas as pd
import numpy as np
import cv2
from os.path import join
from random import randint

import tensorflow as tf
from tensorflow import keras
from keras import layers

CSV_PATH= 'webPages.csv'
IMG_TRAIN_PATH= 'webPages'
IMG_SIZE= (150, 150)
BATCH_SIZE= 32
ROI_SIZES= np.linspace(0.05, 0.6, 15)

Считываем данные и переводим bounding boxes в формат (x1, y1, x2, y2)

In [2]:
data= pd.read_csv(CSV_PATH)
VALID_START= int(len(data)*0.8)
data['bbox_width']+= data['bbox_x']
data['bbox_height']+= data['bbox_y']

Функции для загрузки и преобразования изображений

In [3]:
def batch_generator(dataset, batch_size, steps):
    dataset= dataset.sample(frac=1).reset_index(drop=True)
    
    idx=1
    while True: 
        yield load_data(dataset, idx-1, batch_size)
        
        if idx<steps:
            idx+=1
        else:
            idx=1
            
def load_data(dataset, idx, batch_size):
  def getROIs(img, box, roi_size):
      smallerSide= min(img.shape[:2])

      roi_side= int(smallerSide*roi_size)

      if roi_side<box[2]-box[0] or roi_side<box[3]-box[1]:
        return None, None
      
      xRange= (0, box[0]) if box[0]>img.shape[1]//2 else (box[2]+1, img.shape[1])
      yRange= (0, box[1]) if box[1]>img.shape[0]//2 else (box[3]+1, img.shape[0])

      if xRange[1]-xRange[0]<roi_side or yRange[1]-yRange[0]<roi_side:
         return None, None

      xRange=  (xRange[0], xRange[1]-roi_side)
      yRange=  (yRange[0], yRange[1]-roi_side)

      xLeft, yLeft= randint(*xRange), randint(*yRange)

      imgWithoutBurger= img[yLeft:yLeft+roi_side, xLeft:xLeft+roi_side]

      xLeftMin, xLeftMax= box[2]-roi_side, box[0]
      yLeftMin, yLeftMax= box[3]-roi_side, box[1]

      if xLeftMin<0:
          xLeftMin= 0
      if xLeftMax> img.shape[1]-roi_side:
          xLeftMin= img.shape[1]-roi_side
      if yLeftMin<0:
          yLeftMin= 0
      if yLeftMax> img.shape[0]-roi_side:
          yLeftMin= img.shape[0]-roi_side

      xLeft, yLeft= randint(xLeftMin, xLeftMax), randint(yLeftMin, yLeftMax)
      imgWithBurger= img[yLeft:yLeft+roi_side, xLeft:xLeft+roi_side]

      return imgWithoutBurger, imgWithBurger
  
  x_train, y_train= [], []

  for row in range(idx, idx+batch_size):
    imgName= dataset.iloc[row, 5]

    box= np.array(dataset.iloc[row, 1:5], dtype= np.int32)
    h, w= dataset.iloc[row, 6:8]

    img= cv2.cvtColor(np.array(cv2.imread(join(IMG_TRAIN_PATH, imgName))), cv2.COLOR_BGR2GRAY)/255.0

    for roi_size in ROI_SIZES:
        imgWithoutBurger, imgWithBurger= getROIs(img, box, roi_size)

        if (not imgWithoutBurger is None) and (not imgWithBurger is None):
            x_train.append(cv2.resize(imgWithoutBurger, IMG_SIZE))
            y_train.append(0.0)
            x_train.append(cv2.resize(imgWithBurger, IMG_SIZE))
            y_train.append(1.0)
    
  return tf.convert_to_tensor(x_train), tf.convert_to_tensor(y_train)

Создаём загрузчики данных для тренировочного и проверочного датасетов

In [6]:
train_steps= VALID_START//BATCH_SIZE
test_steps= (len(data)-VALID_START)//BATCH_SIZE

my_training_batch_generator = batch_generator(data[VALID_START:], BATCH_SIZE, train_steps)
my_validation_batch_generator = batch_generator(data[VALID_START:], BATCH_SIZE, test_steps)

Создаём модель для классификации изображений, на те что содержат бургер-меню и те что не содержат.

In [7]:
model = keras.Sequential([
    # tf.keras.layers.Resizing(height, width, interpolation="bilinear"),
    tf.keras.layers.RandomFlip(mode="horizontal_and_vertical", input_shape= (*IMG_SIZE, 1)),
    layers.Conv2D(4, kernel_size= (5, 5), strides=(2, 2), activation='relu', padding= 'same'),
    layers.MaxPool2D(),
    layers.Conv2D(8, kernel_size= (3, 3), activation='relu', padding= 'same'),
    layers.MaxPool2D(),
    layers.Conv2D(16, kernel_size= (3, 3), activation='relu', padding= 'same'),
    layers.MaxPool2D(),
    layers.Conv2D(8, kernel_size= (3, 3), activation='relu', padding= 'same'),
    layers.MaxPool2D(),
    layers.Conv2D(4, kernel_size= (3, 3), activation='relu', padding= 'same'),
    layers.GlobalAveragePooling2D(),
    layers.Dense(1, activation= 'sigmoid')
])

Обучаем модель с ранней остановкой

In [8]:
model.compile(optimizer= keras.optimizers.Adam(1e-2), loss= tf.keras.losses.BinaryCrossentropy(), metrics= [tf.keras.metrics.BinaryAccuracy()], run_eagerly= True)

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stopping = EarlyStopping(
    min_delta=0.001, # minimium amount of change to count as an improvement
    patience= 10, # how many epochs to wait before stopping
    restore_best_weights=True,
)

history= model.fit(
    my_training_batch_generator, 
    epochs= 20, 
    steps_per_epoch= train_steps, 
    verbose=1,
    # callbacks=[early_stopping],
    # validation_data= my_validation_batch_generator, 
    validation_steps= test_steps
)

In [None]:
history_frame = pd.DataFrame(history.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot();

Проверяем модель

In [None]:
%%time

x,y= load_data(data, len(data)-7, 6)
pred= model.predict(x)
pred.shape

Конвертируем в ONNX

In [None]:
import tf2onnx

tf2onnx.convert.from_keras(model, output_path='model.onnx')