## MNIST数据集读取

In [1]:
import gzip
import os
import struct
import numpy as np

def load_mnist(path, kind='train'):
    """加载MNIST数据集"""
    labels_path = os.path.join(path, f'{kind}-labels-idx1-ubyte.gz')
    images_path = os.path.join(path, f'{kind}-images-idx3-ubyte.gz')

    with gzip.open(labels_path, 'rb') as lbpath:
        struct.unpack('>II', lbpath.read(8))
        labels = np.frombuffer(lbpath.read(), dtype=np.uint8)

    with gzip.open(images_path, 'rb') as imgpath:
        struct.unpack('>IIII', imgpath.read(16))
        images = np.frombuffer(imgpath.read(), dtype=np.uint8).reshape(len(labels), 784)

    return images, labels



# 数据集划分
def data_split(images, labels, ratio):
    
    total_len = images.shape[0]
    offset = int(total_len * ratio)
    
    val_img = images[:offset][:]
    val_lb = labels[:offset]
    
    train_img = images[offset:][:]
    train_lb = labels[offset:]
    
    return train_img, train_lb, val_img, val_lb    

# 读取训练集和测试集数据
[images, labels] = load_mnist('./MNIST', kind='train')
[test_img, test_lb] = load_mnist('./MNIST',kind='test')
train_img, train_lb, val_img, val_lb = data_split(images, labels, 1/6)


# 打印查看数据集格式
print('训练集图像格式为:', train_img.shape, '训练集标签格式为:', train_lb.shape)
print('验证集图像格式为:', val_img.shape, '验证集标签格式为:', val_lb.shape)
print('测试集图像格式为:', test_img.shape, '测试集标签格式为:', test_lb.shape)

训练集图像格式为: (50000, 784) 训练集标签格式为: (50000,)
验证集图像格式为: (10000, 784) 验证集标签格式为: (10000,)
测试集图像格式为: (10000, 784) 测试集标签格式为: (10000,)


## 用SGD+Momentum+CrossEntropyLoss来训练 Linear Classifier

### 定义Cross Entropy Loss

In [2]:
def vectorized_loss_softmax(X, y, W):
    """
    Inputs have dimension D=784, there are C=10 classes, and we operate on N=50000 examples.
    
    Inputs:
    - W: Indexs of linear classifier, a numpy array of shape (D, C) containing weights.
    - X: Training images, a numpy array of shape (N, D) containing a minibatch of data.
    - y: Training labels, a numpy array of shape (N,) containing training labels; y[i] = c means
         that X[i] has label c, where 0 <= c < C.
    
    Returns a tuple of:
    - Softmax loss as single float.
    """
    
    # set the value of delta, lamda
    delta = 1.0
    lamda = 1.0
    num_train = X.shape[0]
    num_class = W.shape[1]
    
    # scores: class x examples
    scores = W.T.dot(X.T).reshape(num_class, num_train)
    
    # scores_max: 1 x examples, get the max value from each column
    scores_max = np.reshape(np.max(scores, axis=0), (1, num_train))

    # prob: class x examples, calculate the log probability
    # use scores_max to limit the boundary of exp indexes
    prob = np.exp(scores-scores_max) / np.sum(np.exp(scores-scores_max), axis=0)

    # set value 1 in true label positions, 0 for false labels
    y_true = np.zeros(prob.shape)
    y_true[y, np.arange(num_train)] = 1.0
    
    # calculate the average data loss
    loss = -np.sum(y_true * np.log(prob))/num_train
    
    # add L1 regularization loss
    rw = np.sum(np.abs(W))
    loss += lamda * rw
    
    return loss

### 定义梯度计算函数

In [3]:
# compute the numeric gradient
def compute_gradient(img, lb, X):
    """
    Inputs have dimension D=784, there are C=10 classes, and we operate on N=1000 examples.
    
    Inputs:
    - img: Training images, a numpy array of shape (N, D) containing a minibatch of data.
    - lb: Training labels, a numpy array of shape (N,) containing training labels; y[i] = c means
          that X[i] has label c, where 0 <= c < C.
    - X: Indexs of linear classifier, a numpy array of shape (D, C) containing weights.
    
    Returns:
    - Numeric gradient dx
    """
    
    # initalize the gradient matrix dx
    dx = np.zeros(X.shape)
    h = 0.0001
    
    # calculate the inital loss fx
    fx = vectorized_loss_softmax(img, lb, X)
    
    # iterate the each value
    for c in range(X.shape[1]):
        for d in range(X.shape[0]):
            # evaluate function(x+h)
            org_val = X[d][c]

            # increment by h
            X[d][c] = org_val + h

            # evalute the softmax loss for f(x+h)
            fxh = vectorized_loss_softmax(img, lb, X)

            # restore to previous value
            X[d][c] = org_val

            # compute the partial derivative
            dx[d][c] = (fxh - fx) / h

    return dx


### 定义SGD+Momentum优化器

In [13]:

# SGD training function
def Train_with_SGD(img, lb, epoch):
    """
    Inputs have dimension D=784, there are C=10 classes, and we operate on N=1000 examples.
    
    Inputs:
    - img: Training images, a numpy array of shape (N, D) containing a minibatch of data.
    - lb: Training labels, a numpy array of shape (N,) containing training labels; y[i] = c means
          that X[i] has label c, where 0 <= c < C.
    - epoch: Training iterations, an integer.
    
    Returns:
    - Best indexs X
    """
    
    # set the hyperparameters
    beta1 = 0.9
    beta2 = 0.999
    learning_rate = 5e-4
    first_momentum = 0
    second_momentum = 0
    
    # initialize the indexs X
    X = np.random.randn(img.shape[1], 10) * 0.0001
    
    # start training
    for i in range(1, epoch + 1):
        # compute the gradient
        dx = compute_gradient(img, lb,X)
        
        # momentum
        first_momentum = beta1 * first_momentum + (1-beta1) * dx
        
        # update the indexs X
        X -= learning_rate * first_momentum 
        
        # calculate the loss and accuracy
        loss = vectorized_loss_softmax(img,lb , X)
        scores =  X.T.dot(img.T)
        y_pred = np.argmax(scores, axis=0)
        accuracy = np.mean(y_pred == lb) * 100
        
        # print the result
        print("Epoch: %d  Loss: %.3f  Acc: %.3f%%" % (i, loss, accuracy))
    
    return X

### 在训练集上进行训练

In [14]:
# train the linear classifier with adam in 1000 examples
epoch = 100
best_X = Train_with_SGD(train_img[0:1000], train_lb[0:1000], epoch)

Epoch: 1  Loss: 4.299  Acc: 37.100%
Epoch: 2  Loss: 11.181  Acc: 56.900%
Epoch: 3  Loss: 19.426  Acc: 44.900%
Epoch: 4  Loss: 21.622  Acc: 50.100%
Epoch: 5  Loss: 20.153  Acc: 51.800%
Epoch: 6  Loss: 16.962  Acc: 55.100%
Epoch: 7  Loss: 13.161  Acc: 58.800%
Epoch: 8  Loss: 13.997  Acc: 64.800%
Epoch: 9  Loss: 19.096  Acc: 56.400%
Epoch: 10  Loss: 16.267  Acc: 62.000%
Epoch: 11  Loss: 13.772  Acc: 72.000%
Epoch: 12  Loss: 12.681  Acc: 78.400%
Epoch: 13  Loss: 13.104  Acc: 77.900%
Epoch: 14  Loss: 13.260  Acc: 81.000%
Epoch: 15  Loss: 13.676  Acc: 83.200%
Epoch: 16  Loss: 14.031  Acc: 82.500%
Epoch: 17  Loss: 13.649  Acc: 84.700%
Epoch: 18  Loss: 13.132  Acc: 86.100%
Epoch: 19  Loss: 12.738  Acc: 83.600%
Epoch: 20  Loss: 11.863  Acc: 86.400%
Epoch: 21  Loss: 10.851  Acc: 89.300%
Epoch: 22  Loss: 10.419  Acc: 88.500%
Epoch: 23  Loss: 10.243  Acc: 86.700%
Epoch: 24  Loss: 9.999  Acc: 88.800%
Epoch: 25  Loss: 9.906  Acc: 88.000%
Epoch: 26  Loss: 9.695  Acc: 84.400%
Epoch: 27  Loss: 8.908  A

### 在验证集上测试分类效果

In [15]:
# test the classification accuracy on validation dataset

# scores: class x examples
scores =  best_X.T.dot(val_img.T)

# get the predicted labels
# y_pred: examples
y_pred = np.argmax(scores, axis=0)

# calculate the accuracy
accuracy = np.mean(y_pred == val_lb) * 100

# print the accuracy
print("SGD_Momentum优化器在验证集上的分类精度为: %.3f%%" % accuracy)

SGD_Momentum优化器在验证集上的分类精度为: 76.690%


## 使用Adam优化器进行训练

### 定义Adam

In [7]:

# Adam training function
def Train_with_Adam(img, lb, epoch):
    """
    Inputs have dimension D=784, there are C=10 classes, and we operate on N=1000 examples.
    
    Inputs:
    - img: Training images, a numpy array of shape (N, D) containing a minibatch of data.
    - lb: Training labels, a numpy array of shape (N,) containing training labels; y[i] = c means
          that X[i] has label c, where 0 <= c < C.
    - epoch: Training iterations, an integer.
    
    Returns:
    - Best indexs X
    """
    
    # set the hyperparameters
    beta1 = 0.9
    beta2 = 0.999
    learning_rate = 5e-4
    first_momentum = 0
    second_momentum = 0
    
    # initialize the indexs X
    X = np.random.randn(img.shape[1], 10) * 0.0001
    
    # start training
    for i in range(1, epoch + 1):
        # compute the gradient
        dx = compute_gradient(img, lb, X)
        
        # momentum
        first_momentum = beta1* first_momentum + (1-beta1)*dx
        # adagrad
        second_momentum = beta2*second_momentum +(1-beta2)*dx*dx
        
        # bias correction
        first_unbias = first_momentum /(1-beta1**i)
        second_unbias = second_momentum/(1-beta2**i)
        
        # update the indexs X
        X -= learning_rate * first_unbias / (np.sqrt(second_unbias)+1e-7)
        
        # calculate the loss and accuracy
        loss = vectorized_loss_softmax(img, lb, X)
        scores =  X.T.dot(img.T)
        y_pred = np.argmax(scores, axis=0)
        accuracy = np.mean(y_pred == lb) * 100
        
        # print the result
        print("Epoch: %d  Loss: %.3f  Acc: %.3f%%" % (i, loss, accuracy))
    
    return X

### 在训练集上进行训练

In [8]:
# train the linear classifier with adam in 1000 examples
epoch = 100
best_X = Train_with_Adam(train_img[0:1000], train_lb[0:1000], epoch)

Epoch: 1  Loss: 6.147  Acc: 41.900%
Epoch: 2  Loss: 6.465  Acc: 61.600%
Epoch: 3  Loss: 6.280  Acc: 67.100%
Epoch: 4  Loss: 6.216  Acc: 68.700%
Epoch: 5  Loss: 5.898  Acc: 72.500%
Epoch: 6  Loss: 5.505  Acc: 72.800%
Epoch: 7  Loss: 4.321  Acc: 80.600%
Epoch: 8  Loss: 3.914  Acc: 87.100%
Epoch: 9  Loss: 4.061  Acc: 86.000%
Epoch: 10  Loss: 4.376  Acc: 76.800%
Epoch: 11  Loss: 4.265  Acc: 75.600%
Epoch: 12  Loss: 3.708  Acc: 82.800%
Epoch: 13  Loss: 3.380  Acc: 89.100%
Epoch: 14  Loss: 3.216  Acc: 89.200%
Epoch: 15  Loss: 3.067  Acc: 87.100%
Epoch: 16  Loss: 3.123  Acc: 85.600%
Epoch: 17  Loss: 3.117  Acc: 85.800%
Epoch: 18  Loss: 2.933  Acc: 86.000%
Epoch: 19  Loss: 2.645  Acc: 87.500%
Epoch: 20  Loss: 2.462  Acc: 90.500%
Epoch: 21  Loss: 2.370  Acc: 90.500%
Epoch: 22  Loss: 2.269  Acc: 90.500%
Epoch: 23  Loss: 2.255  Acc: 88.400%
Epoch: 24  Loss: 2.179  Acc: 88.000%
Epoch: 25  Loss: 1.990  Acc: 90.700%
Epoch: 26  Loss: 1.834  Acc: 92.300%
Epoch: 27  Loss: 1.822  Acc: 89.500%
Epoch: 28 

### 在验证集上测试分类效果

In [12]:
# test the classification accuracy on validation dataset

# scores: class x examples
scores =  best_X.T.dot(val_img.T)

# get the predicted labels
# y_pred: examples
y_pred = np.argmax(scores, axis=0)

# calculate the accuracy
accuracy = np.mean(y_pred == val_lb) * 100

# print the accuracy
print("SGD_Momentum优化器在验证集上的分类精度为: %.3f%%" % accuracy)

SGD_Momentum优化器在验证集上的分类精度为: 81.870%
