In [1]:
import tensorflow as tf

In [2]:
# Простой класс Dense
class NaiveDense:
    def __init__(self, input_size, output_size, activation):
        self.activation = activation

        # создадим матрицу W с формой (input_size, output_size), инициализированную случайными значениями

        w_shape = (input_size, output_size)
        w_initial_value = tf.random.uniform(w_shape, minval=0,
                                            maxval=1e-1)
        self.W = tf.Variable(w_initial_value)

        b_shape = (output_size,)
        b_initial_value = tf.zeros(b_shape)
        self.b = tf.Variable(b_initial_value)

    def __call__(self, inputs):
        return self.activation(tf.matmul(inputs, self.W) + self.b)

    @property
    def weights(self):
        return [self.W, self.b]

In [36]:
# Простой класс Sequential
class NaiveSequential:
    def __init__(self, layers):
        self.layers = layers

    def __call__(self, inputs):
        x = inputs
        for layer in self.layers:
            x = layer(x)
        return x

    @property
    def weights(self):
        weights = []
        for layer in self.layers:
            weights += layer.weights
        return weights

Используя классы NaiveDense и NaiveSequential, можно создать имитацию модели Keras:

In [37]:
model = NaiveSequential([
    NaiveDense(input_size=28 * 28, output_size=512, activation=tf.nn.relu),
    NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax)
])

assert len(model.weights) == 4

The number 4 in the assertion statement assert len(model.weights) == 4 refers to the number of weight tensors in the model.

The model consists of two dense layers, NaiveDense(input_size=28 * 28, output_size=512, activation=tf.nn.relu) and NaiveDense(input_size=512, output_size=10, activation=tf.nn.softmax). Each dense layer has two weight tensors: one for the weights connecting the input to the layer, and one for the biases of the layer.

Therefore, the total number of weight tensors in the model is 2 * 2 = 4, which is what the assertion statement checks for. If the code is working correctly, the weights property of the model should contain 4 weight tensors, and the assertion statement will pass without raising an error.

In [38]:
### Генератор пакетов
import math

class BatchGenerator:
    def __init__(self, images, labels, batch_size=128):
        assert len(images) == len(labels)
        self.index = 0
        self.images = images
        self.labels = labels
        self.batch_size = batch_size
        self.num_batches = math.ceil(len(images) / batch_size)

    def next(self):
        images = self.images[self.index : self.index + self.batch_size]
        labels = self.labels[self.index : self.index + self.batch_size]
        self.index += self.batch_size
        return images, labels

#### The math.ceil function is a mathematical function from the math module in Python that rounds a number up to the nearest integer. The math.ceil function takes a floating-point number as input and returns the smallest integer that is greater than or equal to the input.

In the code you posted, the math.ceil function is used to calculate the number of batches required to divide the data into batch-sized chunks. The number of batches is calculated as the ceiling of the division of the number of images by the batch size.

For example, if there are 1000 images and a batch size of 128, then the number of batches is calculated as math.ceil(1000 / 128) = 8. This means that the data will be divided into 8 batches, each containing 128 images and labels.

The math.ceil function is used to ensure that the number of batches is rounded up, so that the last batch contains the remaining images if the number of images is not divisible by the batch size.

#### The next function in the BatchGenerator class is used to generate a batch of images and labels. The function is called repeatedly to generate each batch of data, one at a time.

The function works as follows:

* It takes a slice of the images and labels arrays using the current index and the batch_size. The slice of images and labels contains the data for the current batch.

* It increments the index by batch_size, so that the next call to next will return the next batch of data.

* It returns the batch of images and labels as a tuple.

By using this batch generator, you can easily iterate over the data in batches, without having to manually keep track of the current batch and the index into the data arrays. The batch generator takes care of generating each batch of data, one at a time, until all the data has been processed.

### Выполнение одного этапа обучения

Этап обучения — самая сложная часть процесса. Нам требуется скорректировать веса модели после обучения на одном пакете данных. Для этого нужно сделать следующее.
1. Вычислить прогнозы для изображений в пакете.
2. Найти значения потерь для этих прогнозов с учетом фактических меток.
3. Вычислить градиент потерь с учетом весов модели.
4. Скорректировать веса на небольшую величину в направлении, противоположном градиенту.

In [39]:
LEARNING_RATE = 0.0001

In [40]:
def update_weights(gradients, weights):
    for g, w in zip(gradients, weights):
        w.assign_sub(g * LEARNING_RATE)

In [41]:
def one_training_step(model, images_batch, labels_batch):
    with tf.GradientTape() as tape:
        predictions = model(images_batch)
        per_sample_losses = tf.keras.losses.sparse_categorical_crossentropy( labels_batch, predictions)
        average_loss = tf.reduce_mean(per_sample_losses)
    gradients = tape.gradient(average_loss, model.weights)
    update_weights(gradients, model.weights)
    return average_loss

In [42]:
from tensorflow.keras import optimizers

In [43]:
optimizer = optimizers.SGD(learning_rate=LEARNING_RATE)

In [44]:
def update_weights(gradients, weights):
    optimizer.apply_gradients(zip(gradients, weights))

### Полный цикл обучения

In [45]:
import tensorflow as tf
from tensorflow.keras import layers

In [46]:
def fit(model, images, labels, epochs, batch_size=128):
    for epoch_counter in range(epochs):
        print(f'Epoch {epoch_counter}')
    batch_generator = BatchGenerator(images, labels)
    for batch_counter in range(batch_generator.num_batches):
        images_batch, labels_batch = batch_generator.next()
        loss = one_training_step(model, images_batch, labels_batch)
        if batch_counter % 100 == 0:
            print(f'loss at batch {batch_counter}: {loss: .2f}')

In [47]:
from tensorflow.keras.datasets import mnist

In [48]:
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images

array([[[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       ...,

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]],

       [[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 

In [49]:
train_images = train_images.reshape((60000, 28 * 28))
train_images = train_images.astype("float32") / 255

test_images = test_images.reshape((10000, 28 * 28))
test_images = test_images.astype("float32") / 255

In [50]:
fit(model, train_images, train_labels, epochs=10, batch_size=128)

Epoch 0
Epoch 1
Epoch 2
Epoch 3
Epoch 4
Epoch 5
Epoch 6
Epoch 7
Epoch 8
Epoch 9
loss at batch 0:  4.31
loss at batch 100:  2.30
loss at batch 200:  2.28
loss at batch 300:  2.30
loss at batch 400:  2.25


### Оценка модели

In [53]:
import numpy as np

In [54]:
predictions = model(test_images)
predictions = predictions.numpy()
predicted_labels = np.argmax(predictions, axis=1)
matches = predicted_labels == test_labels
print(f"accuracy: {matches.mean():.2f}")

accuracy: 0.12
