<a href="https://colab.research.google.com/github/zw2788/LocalMinimaConstruction/blob/main/LocalMinimuaEx1Creating.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
from typing import Tuple

import numpy as np
import pandas as pd

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from IPython.display import Image

In [3]:
# parameters for the first layer
W_0 = np.array([[1.05954587,-0.03749863],[-0.05625762,1.09518945]])
b = np.array([[-0.050686],[-0.06894291]])

# parameters for the second layer

V_0 = np.array([[3.76921058,-3.72139955]])
c = np.array([[-0.0148436]])

In [4]:
def sigmoid(x):
    """Calculates sigmoid function."""
    return 1. / (1 + np.exp(-x))

def forward_prop(
    X_raw: np.array,
    W_0: np.array,
    b: np.array,
    V_0: np.array,
    c: np.array,
) -> Tuple:
    """Performs the forward propagation of the given NN."""
    # Note the NN structure is passed in from outside.
    Z_1 = X_raw @ W_0.T
    A_1 = sigmoid(Z_1 + b.T)

    Z_2 = A_1 @ V_0.T
    A_2 = Y = sigmoid(Z_2 + c.T)

    return A_2, Z_2, A_1, Z_1

#Y_hat, _, _, _ = forward_prop(X_raw=X_raw, W_0=W_0, b=b, V_0=V_0, c=c)

def derivatives_with_respect_to_input(
    X_raw: np.array,
    Y: np.array,
    W_0: np.array,
    b: np.array,
    V_0: np.array,
    c: np.array,
) -> Tuple:
    """Calculates the derivatives of the loss with respect to the input data, X_raw.

    Here we assume it is a binary classification problem, with sigmoid activation functions.
    """
    # forward propagation
    dX_raw = 0
    Y_hat, Z_2, A_1, Z_1 = forward_prop(X_raw=X_raw, W_0=W_0, b=b, V_0=V_0, c=c)
    n = len(Y_hat)
    # Backward propagation to compute gradient with respect to X_raw
    dZ_2 = Y_hat - Y
    dA_1 = dZ_2 @ V_0 / n
    dZ_1 = np.multiply(dZ_2 @ V_0, sigmoid_derivative(Z_1))  # Assuming sigmoid activation, need its derivative
    dX_raw = dZ_1 @ W_0 / n

    # Compute the loss
    loss = -np.mean(np.multiply(Y, np.log(Y_hat)) + np.multiply(1 - Y, np.log(1 - Y_hat)))

    return dX_raw, loss

# Function to compute the derivative of the sigmoid function
def sigmoid_derivative(Z):
    S = 1 / (1 + np.exp(-Z))
    return S * (1 - S)

# Example usage:
# Initialize your variables (X_raw, Y, W_0, b, V_0, c) before calling this function
#dX_raw, loss = derivatives_with_respect_to_input(X_raw=X_raw, Y=Y, W_0=W_0, b=b, V_0=V_0, c=c)


In [8]:
def gradient_ascent(
    X_raw_init: np.array,
    Y: np.array,
    W_0_init: np.array,
    b_init: np.array,
    V_0_init: np.array,
    c_init: np.array,
    learning_rate: float = 0.5,
    epsilon: float = 1e-2,
    verbose: bool = False,
) -> Tuple:
    """Runs gradient descent to fit the NN via backprop."""

    W_0 = W_0_init
    b = b_init
    V_0 = V_0_init
    c = c_init
    X_raw = X_raw_init
    losses = [float("inf"), ]
    roc_auc_scores = [0.5, ]

    diff_in_loss = float("inf")
    iteration = 0
    while abs(diff_in_loss) > epsilon:
        iteration += 1
        dX_raw, loss = derivatives_with_respect_to_input(
            X_raw=X_raw, Y=Y, W_0=W_0, b=b, V_0=V_0, c=c
        )

        X_raw -= learning_rate * dX_raw
        print(X_raw)
        losses.append(loss)
        diff_in_loss = losses[-1] - losses[-2]

        Y_hat, _, _, _ = forward_prop(X_raw=X_raw, W_0=W_0, b=b, V_0=V_0, c=c)
        roc_auc = roc_auc_score(y_true=Y, y_score=Y_hat)
        roc_auc_scores.append(roc_auc)

        if verbose and iteration % 10 == 0:
            print(loss, roc_auc)
    return X_raw, losses

In [10]:
# parameters for the first layer
W_0_init = np.array([[1.06,-0.037],[-0.056,1.095]])
b_init = np.array([[-0.051],[-0.0689]])

# parameters for the second layer

V_0_init = np.array([[3.769,-3.72]])
c_init = np.array([[-0.0148]])

X_raw = np.array([[ 2.8*(1+0.2*np.random.choice([-1,1])) , 0.4*(1+0.2*np.random.choice([-1,1]))],
 [ 3.1*(1+0.2*np.random.choice([-1,1])) , 4.3*(1+0.01*np.random.choice([-1,1]))],
 [ 0.1*(1+0.001*np.random.choice([-1,1])) , -3.4*(1+0.001*np.random.choice([-1,1]))],
 [-4.2 , -3.3],
 [-0.5 , 0.2],
 [-2.7 , -0.4],
 [-3. , -4.3],
 [-0.1 , 3.4],
 [ 4.2 , 3.2],
 [ 0.4, -0.1]])

Y = np.array([[1],
 [1],
 [1],
 [1],
 [1],
 [0],
 [0],
 [0],
 [0],
 [0]])


X_raw_end, losses = gradient_ascent(
    X_raw_init=X_raw,
    Y=Y,
    W_0_init=W_0_init,
    b_init =b_init,
    V_0_init=V_0_init,
    c_init =c_init,
    learning_rate=0.01,
    epsilon=1e-4,
    verbose=True,
)
print(X_raw_end)
print(losses)

[1;30;43m流式输出内容被截断，只能显示最后 5000 行内容。[0m
 [ 3.72881039  4.25208313]
 [ 0.12385211 -3.40647995]
 [-4.19402064 -3.3134796 ]
 [-0.37423705  0.06840779]
 [-2.71029794 -0.35962537]
 [-3.01859135 -4.29489366]
 [-0.1226696   3.4029226 ]
 [ 4.19383605  3.21521524]
 [ 0.27843871  0.02540345]]
[[ 3.36554525  0.4453442 ]
 [ 3.72885468  4.25205824]
 [ 0.12397161 -3.40649531]
 [-4.19399039 -3.31354718]
 [-0.37362329  0.06777127]
 [-2.71034887 -0.35942436]
 [-3.01868434 -4.29486784]
 [-0.12278278  3.40293719]
 [ 4.19380487  3.21529144]
 [ 0.2778497   0.02600555]]
[[ 3.36557267  0.44517211]
 [ 3.72889897  4.25203335]
 [ 0.12409109 -3.40651067]
 [-4.19396014 -3.31361475]
 [-0.37300977  0.06713504]
 [-2.71039979 -0.35922339]
 [-3.01877732 -4.29484203]
 [-0.12289595  3.40295177]
 [ 4.1937737   3.21536763]
 [ 0.27726095  0.02660732]]
0.5381168370323263 0.64
[[ 3.36560008  0.44500004]
 [ 3.72894325  4.25200846]
 [ 0.12421056 -3.40652602]
 [-4.19392988 -3.31368231]
 [-0.37239647  0.06649911]
 [-2.71045069 