In [7]:
import numpy as np
from keras.datasets import mnist
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

class BatchNorm:
    def __init__(self, num_features, momentum=0.9, epsilon=1e-5):
        self.num_features = num_features
        self.epsilon = epsilon
        self.momentum = momentum
        
        # 初始化缩放和平移参数
        self.gamma = np.ones(num_features)
        self.beta = np.zeros(num_features)
        
        # 运行均值和方差
        self.running_mean = np.zeros(num_features)
        self.running_var = np.ones(num_features)
        
    def forward(self, X, training=True):
        if training:
            self.batch_mean = np.mean(X, axis=0) # 计算均值 三维
            self.batch_var = np.var(X, axis=0) # 计算方差 三维
            
            self.X_normalized = (X - self.batch_mean) / np.sqrt(self.batch_var + self.epsilon)
            self.out = self.gamma * self.X_normalized + self.beta
            
            # 更新运行均值和方差
            self.running_mean = self.momentum * self.running_mean + (1 - self.momentum) * self.batch_mean
            self.running_var = self.momentum * self.running_var + (1 - self.momentum) * self.batch_var
        else:
            self.X_normalized = (X - self.running_mean) / np.sqrt(self.running_var + self.epsilon)
            self.out = self.gamma * self.X_normalized + self.beta
        
        return self.out
    
    def backward(self, d_out ,learning_rate = 0.001):
        N, D = d_out.shape
        
        X_mu = self.X_normalized * np.sqrt(self.batch_var + self.epsilon)
        
        dbeta = np.sum(d_out, axis=0)
        dgamma = np.sum(d_out * self.X_normalized, axis=0)
        
        dX_normalized = d_out * self.gamma
        dvar = np.sum(dX_normalized * X_mu * -0.5 * (self.batch_var + self.epsilon) ** -1.5, axis=0)
        dmean = np.sum(dX_normalized * -1 / np.sqrt(self.batch_var + self.epsilon), axis=0) + dvar * np.mean(-2 * X_mu, axis=0)
        
        dX = (dX_normalized / np.sqrt(self.batch_var + self.epsilon)) + (dvar * 2 * X_mu / N) + (dmean / N)
        
        # 更新参数
        self.gamma -= learning_rate * dgamma
        self.beta -= learning_rate * dbeta
        
        return dX

In [8]:
class TwoLayerMLP:
    def __init__(self, input_size, layer_size, output_size):
        self.input_size = input_size
        self.layer_size = layer_size
        self.output_size = output_size
        hidden_size_prev = 0
        self.weights = {}
        self.biases = {}
        self.data = {}
        self.bn = BatchNorm(output_size)
        for i in range(1, layer_size+1):
            print('请输入第',i,'层的神经元个数：')
            hidden_size = int(input())
            print(hidden_size)
            if i == 1:
                self.weights['W'+str(i)] = np.random.randn(input_size, hidden_size) * np.sqrt(2.0 / input_size)
            else:
                self.weights['W'+str(i)] = np.random.randn(hidden_size_prev, hidden_size) * np.sqrt(2.0 / hidden_size_prev)
            self.biases['b'+str(i)] = np.zeros((1, hidden_size))
            self.data['a'+str(i)] = np.zeros((1, hidden_size))
            for j in range(0,hidden_size):
                square = self.weights['W'+str(i)][:, j]**2
                self.data['a'+str(i)][0][j] = np.sum(square)/self.weights['W'+str(i)][:, j].shape[0]/2 #L2正则化
            hidden_size_prev = hidden_size

        self.weights['W'+str(layer_size+1)] = np.random.randn(hidden_size_prev, output_size) * np.sqrt(2.0 / hidden_size_prev)
        self.biases['b'+str(layer_size+1)] = np.zeros((1, output_size))
        self.data['a'+str(layer_size+1)] = np.zeros((1, output_size))
        for j in range(0,output_size):
            square = self.weights['W'+str(layer_size+1)][:, j]**2
            self.data['a'+str(layer_size+1)][0][j] = np.sum(square)/self.weights['W'+str(i)][:, j].shape[0]/2 #L1正则化
        # Adam optimization parameters
        self.beta1 = 0.9
        self.beta2 = 0.999
        self.epsilon = 1e-8
        self.m = {key: np.zeros_like(val) for key, val in self.weights.items()}
        self.v = {key: np.zeros_like(val) for key, val in self.weights.items()}
        self.t = 0

        print('请输入选择的激活函数：\n1.relu\n2.sigmoid\n3.tanh')
        self.activation_input = input()
        print('选择的激活函数为',self.activation_input)

    def activation(self,X):
        if self.activation_input == '1':
            return self.relu(X)
        elif self.activation_input == '2':
            return self.sigmoid(X)
        elif self.activation_input == '3':
            return self.tanh(X)
        
    def relu(self, x):
        return np.maximum(0, x)
    
    def softmax(self, x):
        exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exp_x / np.sum(exp_x, axis=1, keepdims=True)
    
    def sigmoid(self,x):
        y = 1/(1+np.exp(-x))
        return y
    
    
    def tanh(self,x):
        y = (np.exp(x) - np.exp(-x))/(np.exp(x) + np.exp(-x))
        return y
    
    def forward(self, X):
        # Forward pass
        z = 0
        a = list()
        
        for i in range(1, self.layer_size+1):
            if i == 1:
                z = np.dot(X, self.weights['W'+str(i)]) + self.biases['b'+str(i)] 
            else:
                z = np.dot(a[len(a)-1], self.weights['W'+str(i)]) + self.biases['b'+str(i)] 
            a.append(self.activation(z)) 
        logits = np.dot(a[len(a)-1], self.weights['W'+str(self.layer_size+1)]) + self.biases['b'+str(self.layer_size+1)] 
        #logits = self.bn.forward(logits) #bn层
        if self.output_size == 1: #如果是回归
            output = logits
        else: #分类
            output = self.softmax(logits)
        return a, output, logits
    
    def backward(self, X, y, a, output, logits, learning_rate):
        # Backpropagation
        
        m = X.shape[0]  # Number of examples
        
        # Compute gradients
        if self.output_size == 1: #如果是回归
            delta = logits
        else:
            delta = output - y
        #delta = self.bn.backward(delta) #BN层
        dW = list()
        db = list()
        for i in range(0,self.layer_size+1):
            if i == 0:
                delta = output - y
            else:
                delta = np.dot(delta, self.weights['W'+str(self.layer_size-i+2)].T) * (a[len(a)-i] > 0)
            if i == self.layer_size:
                dW.insert(0, np.dot(X.T, delta) / m)
            else:
                dW.insert(0, np.dot(a[len(a)-1-i].T, delta) / m)
            db.insert(0, np.sum(delta, axis=0, keepdims=True) / m)

        # Update weights and biases using Adam optimizer
        self.t += 1
        for key in self.weights.keys():
            num = int(key[1:]) - 1
            # Update biased first moment estimate
            self.m[key] = self.beta1 * self.m[key] + (1 - self.beta1) * dW[num]
            # Update biased second raw moment estimate
            self.v[key] = self.beta2 * self.v[key] + (1 - self.beta2) * (dW[num] ** 2)
            # Compute bias-corrected first moment estimate
            m_hat = self.m[key] / (1 - self.beta1 ** self.t)
            # Compute bias-corrected second raw moment estimate
            v_hat = self.v[key] / (1 - self.beta2 ** self.t)
            # Update parameters
            self.weights[key] -= (learning_rate * m_hat / (np.sqrt(v_hat) + self.epsilon) + self.weights[key] * 0.001) #l2正则化
            self.biases['b' + key[1:]] -= learning_rate * db[num]

    def train(self, X, y, epochs=10, batch_size = 64, learning_rate=0.001):
        num_examples = X.shape[0]
        for epoch in range(epochs):
            # Shuffle training data
            indices = np.arange(num_examples)
            np.random.shuffle(indices)
            X_shuffled = X[indices]
            y_shuffled = y[indices]
            
            # Mini-batch training
            for i in range(0, num_examples, batch_size):
                X_batch = X_shuffled[i:i+batch_size]
                y_batch = y_shuffled[i:i+batch_size]
                
                # Forward pass
                a, output, logits = self.forward(X_batch)
                
                if self.output_size == 1: #如果是回归
                    logits = 2 * (y_batch - output) / y_batch.size
                
                # Backward pass
                self.backward(X_batch, y_batch, a, output, logits, learning_rate)
                
            # Compute loss and accuracy for each epoch
            _, output, _ = self.forward(X)
            if self.output_size == 1: #如果是回归
                loss = np.mean((output - y) ** 2)
                r2 = 1 - (np.sum((y - output) ** 2) / np.sum((y - np.mean(y)) ** 2))
                print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}, R² Score: {r2:.4f}")
                
            else: #分类
                loss = -np.mean(y * np.log(output + 1e-9))
                accuracy = np.mean(np.argmax(output, axis=1) == np.argmax(y, axis=1))
                print(f"Epoch {epoch+1}/{epochs}, Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

    def test(self, X, y):
        _, output, _ = self.forward(X)
        if self.output_size == 1: #如果是回归
            loss = np.mean((output - y) ** 2)
            r2 = 1 - (np.sum((y - output) ** 2) / np.sum((y - np.mean(y)) ** 2))
            print(f"Test Loss: {loss:.4f}, R² Score: {r2:.4f}")
            
        else: #分类
            loss = -np.mean(y * np.log(output + 1e-9))
            accuracy = np.mean(np.argmax(output, axis=1) == np.argmax(y, axis=1))
            print(f"Test Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

In [9]:

print('1.分类 \t 2.回归')
#user_input = input()
print('请输入隐藏层深度：')
hidden_layer_size = int(input())
print('隐藏层深度为', hidden_layer_size)
#if user_input == '1': #分类
user_input = 1
# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data() 

# Flatten images and normalize
X_train = X_train.reshape(-1, 784) / 255.0
X_test = X_test.reshape(-1, 784) / 255.0

# Convert labels to one-hot encoding
y_train_one_hot = np.eye(10)[y_train]
y_test_one_hot = np.eye(10)[y_test]

# Initialize and train the model
model = TwoLayerMLP(input_size=784, layer_size = hidden_layer_size, output_size=10)
model.train(X_train, y_train_one_hot, epochs=20, batch_size=128, learning_rate=0.001)
model.test(X_test, y_test_one_hot)



1.分类 	 2.回归
请输入隐藏层深度：
隐藏层深度为 2
请输入第 1 层的神经元个数：
200
请输入第 2 层的神经元个数：
100
请输入选择的激活函数：
1.relu
2.sigmoid
3.tanh
选择的激活函数为 1
Epoch 1/20, Loss: 0.0158, Accuracy: 0.9576
Epoch 2/20, Loss: 0.0128, Accuracy: 0.9646
Epoch 3/20, Loss: 0.0100, Accuracy: 0.9742
Epoch 4/20, Loss: 0.0107, Accuracy: 0.9716
Epoch 5/20, Loss: 0.0089, Accuracy: 0.9777
Epoch 6/20, Loss: 0.0087, Accuracy: 0.9775
Epoch 7/20, Loss: 0.0086, Accuracy: 0.9790
Epoch 8/20, Loss: 0.0078, Accuracy: 0.9792
Epoch 9/20, Loss: 0.0086, Accuracy: 0.9772
Epoch 10/20, Loss: 0.0077, Accuracy: 0.9810
Epoch 11/20, Loss: 0.0085, Accuracy: 0.9777
Epoch 12/20, Loss: 0.0078, Accuracy: 0.9806
Epoch 13/20, Loss: 0.0074, Accuracy: 0.9814
Epoch 14/20, Loss: 0.0072, Accuracy: 0.9826
Epoch 15/20, Loss: 0.0067, Accuracy: 0.9838
Epoch 16/20, Loss: 0.0075, Accuracy: 0.9816
Epoch 17/20, Loss: 0.0075, Accuracy: 0.9807
Epoch 18/20, Loss: 0.0073, Accuracy: 0.9814
Epoch 19/20, Loss: 0.0068, Accuracy: 0.9841
Epoch 20/20, Loss: 0.0070, Accuracy: 0.9835
Test Loss: 

In [10]:
#elif user_input == '2': #回归
# Load Boston dataset
user_input = 2
boston = load_boston()
X = boston.data
y = boston.target 
# 划分训练集测试集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Flatten images and normalize
X_train = X_train.reshape(-1, 13) / 255.0
X_test = X_test.reshape(-1, 13) / 255.0
y_train = y_train.reshape(-1, 1) /255.0
y_test = y_test.reshape(-1, 1) 
X_train_mean = np.mean(X_train, axis=0)
X_train_std = np.std(X_train, axis=0)
X_test_mean = np.mean(X_test, axis=0)
X_test_std = np.std(X_test, axis=0)
X_train_normalized = (X_train - X_train_mean) / X_train_std
X_test_normalized = (X_test - X_test_mean) / X_test_std  # 使用训练集的均值和标准差进行标准化
y_train_mean = np.mean(y_train, axis=0)
y_train_std = np.std(y_train, axis=0)
y_test_mean = np.mean(y_test, axis=0)
y_test_std = np.std(y_test, axis=0)
y_train_normalized = (y_train - y_train_mean) / y_train_std
y_test_normalized = (y_test - y_test_mean) / y_test_std  # 使用训练集的均值和标准差进行标准化
model = TwoLayerMLP(input_size=13, layer_size = hidden_layer_size, output_size=1)
model.train(X_train_normalized, y_train_normalized, epochs=1000, batch_size=128, learning_rate=0.001)
model.test(X_test_normalized, y_test_normalized)


    The Boston housing prices dataset has an ethical problem. You can refer to
    the documentation of this function for further details.

    The scikit-learn maintainers therefore strongly discourage the use of this
    dataset unless the purpose of the code is to study and educate about
    ethical issues in data science and machine learning.

    In this special case, you can fetch the dataset from the original
    source::

        import pandas as pd
        import numpy as np


        data_url = "http://lib.stat.cmu.edu/datasets/boston"
        raw_df = pd.read_csv(data_url, sep="\s+", skiprows=22, header=None)
        data = np.hstack([raw_df.values[::2, :], raw_df.values[1::2, :2]])
        target = raw_df.values[1::2, 2]

    Alternative datasets include the California housing dataset (i.e.
    :func:`~sklearn.datasets.fetch_california_housing`) and the Ames housing
    dataset. You can load the datasets as follows::

        from sklearn.datasets import fetch_california_h

请输入第 1 层的神经元个数：
200
请输入第 2 层的神经元个数：
100
请输入选择的激活函数：
1.relu
2.sigmoid
3.tanh
选择的激活函数为 1
Epoch 1/1000, Loss: 0.6866, R² Score: 0.3134
Epoch 2/1000, Loss: 0.6025, R² Score: 0.3975
Epoch 3/1000, Loss: 0.3125, R² Score: 0.6875
Epoch 4/1000, Loss: 0.3881, R² Score: 0.6119
Epoch 5/1000, Loss: 0.2764, R² Score: 0.7236
Epoch 6/1000, Loss: 0.1904, R² Score: 0.8096
Epoch 7/1000, Loss: 0.1865, R² Score: 0.8135
Epoch 8/1000, Loss: 0.1529, R² Score: 0.8471
Epoch 9/1000, Loss: 0.1344, R² Score: 0.8656
Epoch 10/1000, Loss: 0.1342, R² Score: 0.8658
Epoch 11/1000, Loss: 0.1261, R² Score: 0.8739
Epoch 12/1000, Loss: 0.1124, R² Score: 0.8876
Epoch 13/1000, Loss: 0.1064, R² Score: 0.8936
Epoch 14/1000, Loss: 0.1009, R² Score: 0.8991
Epoch 15/1000, Loss: 0.0948, R² Score: 0.9052
Epoch 16/1000, Loss: 0.0931, R² Score: 0.9069
Epoch 17/1000, Loss: 0.0914, R² Score: 0.9086
Epoch 18/1000, Loss: 0.0853, R² Score: 0.9147
Epoch 19/1000, Loss: 0.0836, R² Score: 0.9164
Epoch 20/1000, Loss: 0.0816, R² Score: 0.9184
Ep