In [2]:
!pip install --user --upgrade tensorflow-model-optimization

Collecting tensorflow-model-optimization
  Downloading tensorflow_model_optimization-0.7.5-py2.py3-none-any.whl (241 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m241.2/241.2 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow-model-optimization
Successfully installed tensorflow-model-optimization-0.7.5


In [3]:
pip install tensorflow[and-cuda]==2.14.*

Collecting tensorflow[and-cuda]==2.14.*
  Downloading tensorflow-2.14.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (489.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m489.9/489.9 MB[0m [31m1.2 MB/s[0m eta [36m0:00:00[0m
Collecting tensorboard<2.15,>=2.14 (from tensorflow[and-cuda]==2.14.*)
  Downloading tensorboard-2.14.1-py3-none-any.whl (5.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m52.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow-estimator<2.15,>=2.14.0 (from tensorflow[and-cuda]==2.14.*)
  Downloading tensorflow_estimator-2.14.0-py2.py3-none-any.whl (440 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m440.7/440.7 kB[0m [31m34.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting keras<2.15,>=2.14.0 (from tensorflow[and-cuda]==2.14.*)
  Downloading keras-2.14.0-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m

In [14]:
import tempfile
import os

import tensorflow as tf
import numpy as np
import re
import tensorflow_model_optimization as tfmot

from tensorflow import keras
from keras.datasets import cifar10
from keras.applications import VGG19
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, BatchNormalization
from keras import utils
print(tf.__version__)

%load_ext tensorboard

2.14.1
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [90]:
# Количество классов изображений
nb_classes = 10
# Названия классов из набора данных CIFAR-10
classes=['самолет', 'автомобиль', 'птица', 'кот', 'олень', 'собака', 'лягушка', 'лошадь', 'корабль', 'грузовик']
# разделение тренировочной и тестовой выборки
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train10 = utils.to_categorical(y_train, nb_classes)
y_test10 = utils.to_categorical(y_test, nb_classes)
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'тренировочные примеры')
print(x_test.shape[0], 'тестовые примеры')

x_train shape: (50000, 32, 32, 3)
50000 тренировочные примеры
10000 тестовые примеры


In [3]:
#Загрузка предварительно обученной модели
vgg19 = VGG19(include_top=False, weights ='imagenet', input_shape=(32, 32, 3), pooling=None)

In [4]:
#Фиксация всех слоев в базовой модели
for layer in vgg19.layers:
  layer.trainable = False

In [5]:
#Создание последовательной модели и добавление в неё VGG19
base_model = Sequential()
base_model.add(vgg19)

In [6]:
#Компиляция модели к последующему обучению
base_model.compile(loss='categorical_crossentropy',
              optimizer='SGD',
              metrics=['accuracy'])

In [7]:
base_model.add(BatchNormalization())
base_model.add(Flatten()) # векторим вход
base_model.add(Dense(256,activation='relu'))
base_model.add(Dense(10,activation='softmax'))
base_model.summary()
print(base_model.layers[-1].input_shape)
print(base_model.layers[-1].output_shape)

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 vgg19 (Functional)          (None, 1, 1, 512)         20024384  
                                                                 
 batch_normalization (Batch  (None, 1, 1, 512)         2048      
 Normalization)                                                  
                                                                 
 flatten (Flatten)           (None, 512)               0         
                                                                 
 dense (Dense)               (None, 256)               131328    
                                                                 
 dense_1 (Dense)             (None, 10)                2570      
                                                                 
Total params: 20160330 (76.91 MB)
Trainable params: 134922 (527.04 KB)
Non-trainable params: 20025408 (76.39 MB)
_________

In [8]:
history = base_model.fit(x_train, y_train10,
              #batch_size=batch_size,
              epochs=5,
              validation_data=(x_test, y_test10),
              shuffle=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [9]:
baseline_model_accuracy = history.history['accuracy'][4]

In [10]:
baseline_model_accuracy

0.6114799976348877

## Прунинг

Прунинг нейронной сети это метод сжатия модели, путем удаления части параметров.

In [15]:
EPOCHS = 5
end_step = np.ceil(1.0 * x_train.shape[0] / nb_classes).astype(np.int32) * EPOCHS

pruning_schedule = tfmot.sparsity.keras.PolynomialDecay(
                        initial_sparsity=0.0, final_sparsity=0.5,
                        begin_step=0, end_step=end_step, frequency=100)

#layer.input_shape[-1]
pruned_model = tf.keras.Sequential()
for layer in base_model.layers:
    if(re.match(r"conv_pw_\d+$", layer.name)):
         pruned_model.add(tfmot.sparsity.prune_low_magnitude(
            layer,
            pruning_schedule,
            block_size=(1,1)
         ))
    else:
        pruned_model.add(layer)

pruned_model.compile(loss='categorical_crossentropy',
              optimizer='SGD',
              metrics=['accuracy'])

In [16]:
history_prun = pruned_model.fit(x_train, y_train10,
              #batch_size=batch_size,
              epochs=5,
              validation_data=(x_test, y_test10),
              shuffle=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [17]:
pruned_model_accuracy = history_prun.history['accuracy'][4]

In [18]:
pruned_model_accuracy

0.646120011806488

## Квантизация

Все операции проводятся в целочисленных значениях. Чаще всего это накладывается на слой или на какую-то часть сети. Ряд слоёв работает в int8, благодаря чему он потребляет очень мало вычислений и памяти, последний слой, float32.

In [20]:
#Создание последовательной модели и добавление в неё VGG19
quant_model = Sequential()
quant_model.add(vgg19)

In [21]:
quant_model.compile(loss='categorical_crossentropy',
              optimizer='SGD',
              metrics=['accuracy'])

In [22]:
quant_model.add(BatchNormalization())
quant_model.add(Flatten()) # векторим вход
quant_model.add(Dense(256,activation='relu'))
quant_model.add(Dense(10,activation='softmax'))

In [23]:
history_quant = quant_model.fit(x_train, y_train10,
              #batch_size=batch_size,
              epochs=5,
              validation_data=(x_test, y_test10),
              shuffle=True)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [24]:
quantized_model_accuracy = history_quant.history['accuracy'][4]

In [25]:
quantized_model_accuracy

0.6090599894523621

## Кластеринг

In [69]:
cluster_weights = tfmot.clustering.keras.cluster_weights

In [73]:
cluster_weights

<function tensorflow_model_optimization.python.core.clustering.keras.cluster.cluster_weights(to_cluster, number_of_clusters, cluster_centroids_init=<CentroidInitialization.KMEANS_PLUS_PLUS: 'CentroidInitialization.KMEANS_PLUS_PLUS'>, **kwargs)>

In [76]:
clustering_params = {
    'number_of_clusters': 32,
    'cluster_centroids_init': tfmot.clustering.keras.CentroidInitialization.LINEAR
}

In [85]:
w = base_model.weights[0][0]

In [86]:
clustered_model = cluster_weights(w, **clustering_params)

## Конвертируем модель в TensorRT для инференса

In [35]:
# Save model in the saved_model format
SAVED_MODEL_DIR="./content/my_model/native_saved_model"
base_model.save(SAVED_MODEL_DIR)

In [36]:
from tensorflow.python.compiler.tensorrt import trt_convert as trt

# Instantiate the TF-TRT converter
converter = trt.TrtGraphConverterV2(
   input_saved_model_dir=SAVED_MODEL_DIR,
   precision_mode=trt.TrtPrecisionMode.FP32
)

# Convert the model into TRT compatible segments
trt_func = converter.convert()
converter.summary()

TRTEngineOP Name                 Device        # Nodes # Inputs      # Outputs     Input DTypes       Output Dtypes      Input Shapes       Output Shapes     
TRTEngineOp_000_000              device:GPU:0  102     1             1             ['float32']        ['float32']        [[-1, 32, 32, 3]]  [[-1, 10]]        

	- BiasAdd: 18x
	- Const: 41x
	- Conv2D: 16x
	- FusedBatchNormV3: 1x
	- MatMul: 2x
	- MaxPool: 5x
	- Relu: 17x
	- Reshape: 1x
	- Softmax: 1x

[*] Total number of TensorRT engines: 1
[*] % of OPs Converted: 98.08% [102/104]



In [46]:
# Save the converted model for future use
OUTPUT_SAVED_MODEL_DIR="../content/my_model/tftrt_saved_model"
converter.save(output_saved_model_dir=OUTPUT_SAVED_MODEL_DIR)

Cгенерировать датасет из векторов (размер не менее 1000 векторов)

In [96]:
# Нормализация
train_images = (x_train / 255) * 2 - 1
test_images = (x_test / 255) * 2 - 1

# Векторизация
train_images = train_images.reshape((-1, 1024))
test_images = test_images.reshape((-1, 1024))