<a href="https://colab.research.google.com/github/vovansim/neural-networks-VSU/blob/main/nn_lessons_practice_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [18]:
import numpy as np
import plotly.graph_objs as go

In [19]:
# Область определения (0, 1)
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
def sigmoid_back(x):
    return x * (1 - x)
# Область определения (-1, 1)
def tanh(x):
    return np.tanh(x)
def tanh_back(x):
    return 1 - x**2

In [20]:
# прямой проход нейронной сети
def predict_values(W1, W2, bias_1, bias_2, X):
    Z1 = W1 @ X.T + bias_1
    Y1 = tanh(Z1)
    Z2 = W2 @ Y1 + bias_2
    return tanh(Z2)
#  функция погрешности (mse)
def cost(y_prediction, y_true):
    return np.sum((y_prediction - y_true)**2) / (2 * y_prediction.shape[0])

In [21]:
def settings_animation(Z, xs, ys):
    fig_dict = {
        "data": [],
        "layout": {},
        "frames": []
    }
    fig_dict["layout"]["hovermode"] = "closest"
    fig_dict["layout"]["updatemenus"] = [
        {
            "buttons": [
                {
                    "args": [None, {"frame": {"duration": 1000, "redraw": True},
                                    "fromcurrent": True, "transition": {"duration": 300,
                                                                        "easing": "quadratic-in-out"}}],
                    "label": "Play",
                    "method": "animate"
                },
                {
                    "args": [[None], {"frame": {"duration": 0, "redraw": True},
                                      "mode": "immediate",
                                      "transition": {"duration": 0}}],
                    "label": "Pause",
                    "method": "animate"
                }
            ],
            "direction": "left",
            "pad": {"r": 10, "t": 90},
            "showactive": False,
            "type": "buttons",
            "x": 0.1,
            "xanchor": "right",
            "y": 0,
            "yanchor": "top"
        }
    ]
    sliders_dict = {
        "active": 0,
        "yanchor": "top",
        "xanchor": "left",
        "currentvalue": {
            "font": {"size": 20},
    #         "prefix": "Epoch:",
            "visible": True,
            "xanchor": "right"
        },
        "transition": {"duration": 300, "easing": "cubic-in-out"},
        "pad": {"b": 10, "t": 50},
        "len": 0.9,
        "x": 0.1,
        "y": 0,
        "steps": []
    }
    data_start = go.Contour(
        z = Z,
        x = xs,
        y = ys,
        contours_coloring='heatmap'
    )
    fig_dict["data"].append(data_start)
    return fig_dict, sliders_dict

$$Z1=W1*X+B1$$

$$Y1=tanh(Z1)$$

$$Z2=W2*Y1+B2$$

$$Y2=tanh(Z2)$$

$$1) \quad \frac{\partial Cost}{\partial Y2} = \frac{1}{batch} * (Y_{true} - Y2)$$

$$2) \quad \frac{\partial Cost}{\partial Z2} = \frac{\partial Cost}{\partial Y2} * \frac{\partial Y2}{\partial Z2} = \frac{\partial Cost}{\partial Y2} * (1 - Y2^2) = \frac{1}{batch} * (Y_{true} - Y2) * (1 - Y2^2)$$

$$3. a) \quad \frac{\partial Cost}{\partial W2} = \frac{\partial Cost}{\partial Z2} * \frac{\partial Z2}{\partial W2} = \frac{1}{batch} * (Y_{true} - Y2) * (1 - Y2^2) * Y1$$

$$3. b) \quad \frac{\partial Cost}{\partial B2} = \frac{\partial Cost}{\partial Z2} * \frac{\partial Z2}{\partial B2} = \sum \frac{\partial Cost}{\partial Z2} * 1 \ \text{1 нейрон}$$

$$4) \quad \frac{\partial Cost}{\partial Y1} = \frac{\partial Cost}{\partial Z2} * \frac{\partial Z2}{\partial Y1}$$

$$5) \quad \frac{\partial Cost}{\partial Z1} = \frac{\partial Cost}{\partial Y1} * \frac{\partial Y1}{\partial Z1}$$

$$6. a) \quad \frac{\partial Cost}{\partial W1} = \frac{\partial Cost}{\partial Z1} * \frac{\partial Z1}{\partial W1}$$

$$6. b) \quad \frac{\partial Cost}{\partial B1} = \frac{\partial Cost}{\partial Z1} * \frac{\partial Z1}{\partial B1} = \sum \frac{\partial Cost}{\partial Z1} * [1, 1]^T \ \text{2 нейрона}$$

$$
$$

$$ Y2 = tanh(bias_1^{(2)} + w_1^{(2)} * tanh(bias_1^{(1)} + w_{11}^{(1)} * x_1 + w_{21}^{(1)} * x_2) + w_2^{(2)} * tanh(bias_2^{(1)} + w_{21}^{(1)} * x_1 + w_{22}^{(1)} * x_2))$$

$$\frac{\partial Cost}{\partial W2} = \Delta W2 = \frac{\partial Cost}{\partial Y2} * \frac{\partial Y2}{\partial Z2} * \frac{\partial Z2}{\partial W2} = \frac{1}{batch} * (Y_{true} - Y2) * (1 - Y2^2) * Y1 = \\ =\frac{1}{batch} * (Y_{true} - Y2) * (1 - Y2^2) * tanh(bias_1^{(1)}+w_{11}^{(1)}*x_1+w_{21}^{(1)}*x_2)$$

$$\frac{\partial Cost}{\partial W1} = \Delta W1 = \frac{\partial Cost}{\partial Y2} * \frac{\partial Y2}{\partial Z2} * \frac{\partial Z2}{\partial Y1} * \frac{\partial Y1}{\partial Z1} * \frac{\partial Z1}{\partial W1} = \frac{1}{batch} * (Y_{true} - Y2) * (1 - Y2^2) * W2 * (1 - Y1^2) * X $$

In [22]:
def gradient_descent(
        W1,
        W2,
        X,
        E,
        batch,
        bias_1,
        bias_2,
        Y_true,
        max_num_epoch,
        fig_dict,
        sliders_dict,
        prediction_data,
        xs,
        ys
    ):
    """
        Градиентный спуск

        W1, W2 : np.array - веса входного и скрытого слоя
        X : np.array - обучающий набор примеров
        E : float - скорость обучения
        batch : int  - количество пакетов
        bias_1, bias_2 : np.array - смещения входного и скрытого слоя
        Y_true : np.arrray - обучающий набор ответов
        max_num_epoch : int - максимальное кол-во эпох обучения
        fig_dict, sliders_dict : dict - настройки фигуры анимации и слайдера
        prediction_data : - множество точек для построения разрешающей границы
        xs, ys : np.array - массив точек на осях

        fig_dict : заполненный словарь данных для анимации
        Z : предсказания
        costs_history : словарь истории функции стоимости
    """
    iterations = int(np.ceil(X.shape[0] / batch))
    costs_history = {}
    for epoch in range(max_num_epoch):
        for iteration in range(iterations):

            index_start = iteration * batch
            index_stop = iteration * batch + batch

        #        forward
        #        predict_values(W1, W2, bias_1, bias_2, X[index_start : index_stop])
            Z1 = W1 @ X[index_start : index_stop].T + bias_1
            Y1 = tanh(Z1)
            Z2 = W2 @ Y1 + bias_2
            Y2 = tanh(Z2)

        #         back
            dCost_dY2 = (Y_true[index_start : index_stop].T - Y2) / batch
            dCost_dZ2 = dCost_dY2 * tanh_back(Y2)
        #         dz2_dW2 = Y1
            dCost_dW2 = dCost_dZ2 @ Y1.T
            dCost_dB2 = np.sum(dCost_dZ2, axis=1, keepdims=True)
        #         dz2_dY1 = W2
            dCost_dY1 = W2.reshape(-1, 1) @ dCost_dZ2
        #         dY1_dZ1 = tanh_back(Y1)
            dCost_dZ1 = dCost_dY1 * tanh_back(Y1)
        #         dZ1_dW1 = X
            dCost_dW1 = dCost_dZ1 @ X[index_start : index_stop]
            dCost_dB1 = np.sum(dCost_dZ1, axis=1, keepdims=True)

        #         update
            W2 = W2 + E * dCost_dW2
            W1 = W1 + E * dCost_dW1
            bias_2 = bias_2 + E * dCost_dB2
            bias_1 = bias_1 + E * dCost_dB1

    #   каждые 20 эпох рисуем границу принятий решений
        if not epoch % 20:
            Z = predict_values(W1, W2, bias_1, bias_2, prediction_data)
            Z = Z.reshape(xs.shape[0], ys.shape[0])
            frame = {"data": [], "name": f'Epoch {epoch}'}
            data_dict = go.Contour(
                z=Z,
                x=xs,
                y=ys,
                contours_coloring = 'heatmap'
            )
            frame["data"].append(data_dict)
            fig_dict["frames"].append(frame)
            slider_step = {"args": [
                [f'Epoch {epoch}'],
                {"frame": {"duration": 300, "redraw": True},
                 "mode": "immediate",
                 "transition": {"duration": 300}}
            ],
                "label": f'Epoch {epoch}',
                "method": "animate"}
            sliders_dict["steps"].append(slider_step)

    #   Сохраняем cost каждую эпоху
        costs_history[epoch] = cost(predict_values(W1, W2, bias_1, bias_2, X).reshape(-1, 1), Y_true)

    print('Предсказания на последней эпохе:',  *predict_values(W1, W2, bias_1, bias_2, X))
    print('Функция стоимости на последней эпохе:', costs_history[max_num_epoch-1])

    fig_dict["layout"]["sliders"] = [sliders_dict]
    return fig_dict, Z, costs_history

In [23]:
X = np.array([
    [-1, -1],
    [-1, 1],
    [1, -1],
    [1, 1]
])
X_x1_and_x2 = np.array([
    [-1, -1, -1],
    [-1, 1, -1],
    [1, -1, -1],
    [1, 1, 1]
])
Y_and = np.array([
    [-1],
    [-1],
    [-1],
    [1]
])
Y_or = np.array([
    [-1],
    [1],
    [1],
    [1]
])
Y_xor = np.array([
    [-1],
    [1],
    [1],
    [-1]
])

In [24]:
bias_1 = np.zeros((2,1))
bias_2 = np.zeros((1,1))
W1 = np.array([
    [0.2, 0.4],
    [0.3, 0.7],
])
W1_feature_enginering = np.array([
    [0.2, 0.4, 0.4],
    [0.3, 0.7, 0.4],
])
W2 = np.array([[0.4, 0.9]])
E = 0.2
max_num_epoch = 201
batch = 2

# настройки предсказаний для 100 точек
xs = np.linspace(-1., 1., 100)
ys = np.linspace(-1., 1., 100)
xx, yy = np.meshgrid(xs, ys)
prediction_data = np.c_[xx.ravel(), yy.ravel()]
# prediction_data_feature_enginering = np.c_[prediction_data, prediction_data[:, 0]*prediction_data[:, 1]]
Z = predict_values(W1, W2, bias_1, bias_2, prediction_data)
# Z = predict_values(W1_feature_enginering, W2, bias_1, bias_2, prediction_data_feature_enginering)
Z = Z.reshape(xx.shape)

In [25]:
animation, Z_last, costs_history = gradient_descent(
    W1,
#     W1_feature_enginering,
    W2,
    X,
#     X_x1_and_x2,
    E,
    batch,
    bias_1,
    bias_2,
    Y_xor,
    max_num_epoch,
    *settings_animation(Z, xs, ys),
    prediction_data,
#     prediction_data_feature_enginering,
    xs,
    ys
)
fig = go.Figure(animation)
fig.show()

Предсказания на последней эпохе: [-0.90014751  0.92407432  0.92440418 -0.89630059]
Функция стоимости на последней эпохе: 0.004025440700573951


In [26]:
# 3D визуализация
fig3 = go.Figure()
fig3.add_trace(go.Surface(
    x=xs,
    y=ys,
    z=Z_last
))
fig3.update_layout(
    width=800,
    height=600
)
fig3.update_traces(
    contours_z=dict(
        show=True,
        usecolormap=True,
        project_z=True
))
fig3.show()

In [27]:
# график cost по эпохам
fig2 = go.Figure()
fig2.add_trace(
    go.Scatter(
        x=list(costs_history.keys()),
        y=list(costs_history.values()),
        line=dict(color='green')
    )
)
fig2.update_layout(
    yaxis=dict(title_text='Cost'),
    xaxis=dict(title_text='Epoch')
)
fig2.show()