Аналогичный signet-keras процесс обучения, но вместо gpu используются тензорные процессоры, отправляя данные на сервер google, которые позволяют существенно сократить время обучения.
К сожалению с ray, из-за проблем внутренней архитектуры, совместить не получилось.

In [55]:
try:
    import gdown
    import natsort
except:
    !pip install natsort gdown

In [56]:
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import keras
from PIL import Image
import cv2
import os
from keras import models
from keras.layers import Conv2D, MaxPooling2D, Dense, Activation, BatchNormalization, Input, Dropout, Flatten
from keras.models import Model
from keras import optimizers
from keras.layers import Lambda
import natsort as ns
import time
from numba import cuda

In [57]:
PATH_ORG = "./signatures/full_org"
PATH_FORG = "./signatures/full_forg"
checkpoints_path = "./checkpoints"


In [58]:


if os.path.exists('signatures.zip') is False:
    !gdown https://drive.google.com/uc?id=1PpPVry5TkfGVpbFDkwOMNx7Xew4vscW5
#

In [59]:
if os.path.exists('signatures') is False:
    !unzip -q -n signatures.zip

In [60]:



class DataGenerator(keras.utils.Sequence):

    def __init__(self, df, batch_size=32, dim=(155, 220), n_channels=3, shuffle=True, lazy=True):
        self.dim = dim
        self.batch_size = batch_size
        self.df = df
        self.labels = df["label"].to_numpy().astype(np.int32)
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.on_epoch_end()
        self.lazy = lazy
        if self.lazy is False:
            self.data = [np.empty((df.shape[0], *dim, n_channels), dtype=np.float32),
                         np.empty((df.shape[0], *dim, n_channels), dtype=np.float32)]
            for i in range(df.shape[0]):
                image_1 = cv2.imread(df.iloc[i, 0])
                image_1 = cv2.resize(image_1, (220, 155))
                image_1 = 1-image_1/255.0

                image_2 = cv2.imread(df.iloc[i, 1])
                image_2 = cv2.resize(image_2, (220, 155))
                image_2 = 1-image_2/255.0
                self.data[0][i, :, :, :] = image_1
                self.data[1][i, :, :, :] = image_2
                # x_1[i,] = 1 - image_1 / 255.0
                # x_2[i,] = 1 - image_2 / 255.0

    def __len__(self):
        return int(np.floor(self.df.shape[0] / self.batch_size))

    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size]

        # return X, y
        if self.lazy is False:
            x = []
            x.append(self.data[0][indexes, :, :, :])
            x.append(self.data[1][indexes, :, :, :])

            y = self.labels[indexes]

        else:
            rows = [self.df.iloc[k] for k in indexes]
            x, y = self.__data_generation(rows)
        return x, y

    def on_epoch_end(self):
        self.indexes = np.arange(self.df.shape[0])
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, rows):
        x_1 = np.empty((self.batch_size, *self.dim, self.n_channels))
        x_2 = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size), dtype=int)

        for i in range(len(rows)):
            image_1 = cv2.imread(rows[i]["image_1"])
            image_1 = cv2.resize(image_1, (220, 155))
            image_1 = np.array(image_1)
            image_2 = cv2.imread(rows[i]["image_2"])
            image_2 = cv2.resize(image_2, (220, 155))
            image_2 = np.array(image_2)
            x_1[i,] = 1 - image_1 / 255.0
            x_2[i,] = 1 - image_2 / 255.0
            y[i] = rows[i]["label"]

        return [x_1, x_2], y





In [61]:
from keras import backend as K


def euclidean_distance2(y):
    return K.sqrt(K.sum(K.square(y[0] - y[1]), axis=-1))


def euclidean_distance(vects):
    x, y = vects
    sum_square = K.sum(K.square(x - y), axis=1, keepdims=True)
    return K.sqrt(K.maximum(sum_square, K.epsilon()))


def eucl_dist_output_shape(shapes):
    shape1, shape2 = shapes
    return (shape1[0], 1)


In [62]:
def contrastive_loss(y_true, y_pred):
    margin = 1
    sqaure_pred = K.square(y_pred)
    margin_square = K.square(K.maximum(margin - y_pred, 0))
    y_true = K.cast(y_true, y_pred.dtype)
    return K.mean(y_true * sqaure_pred + (1 - y_true) * margin_square)


def accuracy(y_true, y_pred):
    return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))

In [63]:





def make_net():
    input = Input(shape=(155, 220, 3))

    conv_1 = Conv2D(filters=96, kernel_size=(11, 11))(input)
    batch_norm_1 = BatchNormalization()(conv_1)
    activation_1 = Activation('relu')(batch_norm_1)
    max_pool_1 = MaxPooling2D(pool_size=(3, 3))(activation_1)

    conv_2 = Conv2D(filters=256, kernel_size=(5, 5))(max_pool_1)
    batch_norm_2 = BatchNormalization()(conv_2)
    activation_2 = Activation('relu')(batch_norm_2)
    max_pool_2 = MaxPooling2D(pool_size=(3, 3))(activation_1)

    dropout_1 = Dropout(rate=0.3)(max_pool_2)

    conv_3_a = Conv2D(filters=384, kernel_size=(3, 3))(dropout_1)
    activation_3_a = Activation('relu')(conv_3_a)
    conv_3_b = Conv2D(filters=256, kernel_size=(3, 3))(activation_3_a)
    activation_3_b = Activation('relu')(conv_3_b)
    max_pool_3 = MaxPooling2D(pool_size=(3, 3))(activation_3_b)

    # dropout_22 = Dropout(rate=0.3)(max_pool_3)
    # conv_4_a = Conv2D(filters=384, kernel_size=(3, 3))(dropout_22)
    # activation_4_a = Activation('relu')(conv_4_a)
    # conv_4_b = Conv2D(filters=512, kernel_size=(3, 3))(activation_4_a)
    # activation_4_b = Activation('relu')(conv_4_b)
    # max_pool_4 = MaxPooling2D(pool_size=(2, 2))(activation_4_b)

    dropout_2 = Dropout(rate=0.3)(max_pool_3)
    # dropout_2 = Dropout(rate=0.3)(max_pool_3)

    flat_1 = Flatten()(dropout_2)
    fc_1 = Dense(units=1024, activation='relu')(flat_1)
    dropout_3 = Dropout(rate=0.5)(fc_1)
    fc_2 = Dense(units=128, activation='relu')(dropout_3)



    input_a = Input(shape=(155, 220, 3))
    input_b = Input(shape=(155, 220, 3))

    base_net = Model(input, fc_2)
    processed_a = base_net(input_a)
    processed_b = base_net(input_b)

    distance = Lambda(euclidean_distance2)([processed_a, processed_b])
    # distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b])
    model = Model([input_a, input_b], distance)
    return base_net,model


In [64]:
params = {
    'dim': (155, 220),
    'batch_size': 16,
    # 'batch_size': 64,
    'n_channels': 3,
    'shuffle': False
}


def get_data(path_org, path_forg, test_size=0.3, random_state=0, lazy=True, ext_data=0):
    org = ns.natsorted(os.listdir(path_org), alg=ns.IGNORECASE)
    forg = ns.natsorted(os.listdir(path_forg), alg=ns.IGNORECASE)
    org = [os.path.join(PATH_ORG, i) for i in org if i.endswith('.png')]
    forg = [os.path.join(PATH_FORG, i) for i in forg if i.endswith('.png')]

    org = [os.path.abspath(i) for i in org]
    forg = [os.path.abspath(i) for i in forg]

    samples = 24
    ppl = len(org) // samples

    data = []
    for i in range(ppl):
        tr = np.array([[org[j], org[j], 1] for j in range(i * samples, (i + 1) * samples)])
        tr[:, 1] = np.concatenate([tr[:-12, 1], tr[-12:, 1]])
        tr[:, 1] = np.random.permutation(tr[:, 1])
        fl = np.array([[org[j], forg[j], 0] for j in range(i * samples, (i + 1) * samples)])

        for j in range(ext_data):
            rand2 = np.random.choice(
                np.concatenate([np.arange(0, i * samples), np.arange((i + 1) * samples, ppl * samples)]),
                samples, replace=False)
            tr2 = np.array([[org[j], org[rand2[j % samples]], 0] for j in range(i * samples, (i + 1) * samples)])
            data.append(tr2)

        data.append(tr)
        data.append(fl)

    df = pd.DataFrame(np.array(data).reshape(-1, 3), columns=["image_1", "image_2", "label"])
    df = df.reindex(np.random.permutation(df.index))

    ds_train, ds_val = train_test_split(df, test_size=test_size, random_state=random_state)

    train_datagen = DataGenerator(ds_train, **params, lazy=lazy)
    validation_datagen = DataGenerator(ds_val, **params, lazy=lazy)
    return train_datagen, validation_datagen

In [65]:

# #!!!!!!!!!!!!!!!!!
resolver = tf.distribute.cluster_resolver.TPUClusterResolver(tpu='')
tf.config.experimental_connect_to_cluster(resolver)
# This is the TPU initialization code that has to be at the beginning.
tf.tpu.experimental.initialize_tpu_system(resolver)
print("All devices: ", tf.config.list_logical_devices('TPU'))
strategy = tf.distribute.TPUStrategy(resolver)





INFO:tensorflow:Initializing the TPU system: grpc://10.89.196.146:8470


INFO:tensorflow:Initializing the TPU system: grpc://10.89.196.146:8470


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Clearing out eager caches


INFO:tensorflow:Finished initializing TPU system.


INFO:tensorflow:Finished initializing TPU system.


All devices:  [LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:7', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:6', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:5', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:4', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:3', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:0', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:1', device_type='TPU'), LogicalDevice(name='/job:worker/replica:0/task:0/device:TPU:2', device_type='TPU')]
INFO:tensorflow:Found TPU system:


INFO:tensorflow:Found TPU system:


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Cores: 8


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Workers: 1


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Num TPU Cores Per Worker: 8


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:localhost/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:CPU:0, CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:0, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:1, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:2, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:3, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:4, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:5, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:6, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU:7, TPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:TPU_SYSTEM:0, TPU_SYSTEM, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


INFO:tensorflow:*** Available Device: _DeviceAttributes(/job:worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 0, 0)


In [66]:
train_datagen, validation_datagen = get_data(PATH_ORG, PATH_FORG,
                                             test_size=0.3, random_state=0, lazy=False, ext_data=0)

In [67]:
with strategy.scope():
    base_net,model = make_net()
    optimizer = optimizers.Adam(learning_rate=0.0005)
    model.compile(loss=contrastive_loss,optimizer=optimizer, metrics=[accuracy])

In [68]:
x1 = (train_datagen.data[0], train_datagen.data[1])
y1 = train_datagen.labels
#
x2 = (validation_datagen.data[0], validation_datagen.data[1])
y2 = validation_datagen.labels

model.fit(x1,y1,batch_size=128, validation_data=(x2,y2), epochs=20, callbacks=None)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f1348065b50>

In [69]:
model.fit(x1,y1,batch_size=128, validation_data=(x2,y2), epochs=20, callbacks=None)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<tensorflow.python.keras.callbacks.History at 0x7f134dc077d0>

Средневзвешенная точность тестовой и оубчающейся выборок 

In [70]:
a1=accuracy(y2,model.predict(x2))

In [71]:
a2=accuracy(y1,model.predict(x1))

In [72]:
(a1*len(y1)+a2*len(y2))/(len(y1)+len(y2))

<tf.Tensor: shape=(), dtype=float32, numpy=1.0>

In [73]:
model_dir = "./checkpoints/best"

localhost_save_option = tf.saved_model.SaveOptions(experimental_io_device="/job:localhost")
model.save_weights(model_dir, options=localhost_save_option)

In [74]:
!zip -r weights.zip ./checkpoints

updating: checkpoints/ (stored 0%)
updating: checkpoints/best.index (deflated 68%)
updating: checkpoints/best.data-00000-of-00001 (deflated 7%)
updating: checkpoints/checkpoint (deflated 35%)
