In [5]:
import numpy as np
from sklearn.preprocessing import LabelBinarizer
from sklearn.metrics import classification_report
import load_MNIST
from sklearn.preprocessing import StandardScaler

利用写好的load_MNIST模块中的load_data方法导入MNIST数据集
- Parameter
    - file_name:list 
        四个文件路径
        - list[0]: 训练集的图片数据文件路径
        - list[1]: 测试集的图片数据文件路径
        - list[2]: 训练集的标签文件路径
        - list[3]: 测试集的标签文件路径
- Return
    - train_data: numpy array
    - train_labels: list
    - test_data: numpy array
    - test_labels: list

In [6]:
file = ["C:/Users/wtser/Desktop/learnData/data/Mnist/train-images.idx3-ubyte",
       "C:/Users/wtser/Desktop/learnData/data/Mnist/t10k-images.idx3-ubyte",
       "C:/Users/wtser/Desktop/learnData/data/Mnist/train-labels.idx1-ubyte",
       "C:/Users/wtser/Desktop/learnData/data/Mnist/t10k-labels.idx1-ubyte"]  

train_data,train_labels,test_data,test_labels = load_MNIST.load_data(file)



把图片数据进行标准化，如果不进行标准化，在未进行softmax之前，每个分类的输出相差过大，导致利用softmax时，结果时某一个分l类的输出为1，其他都是0

In [76]:
#对数据进行预处理
scaler = StandardScaler()
scaler.fit(train_data)
train_data = scaler.transform(train_data)
test_data = scaler.transform(test_data)

#把训练集的标签进行改变，比如x被分为第二类，y = 1，变成[0,1,0,0,0,0,0,0,0,0]
train_y = LabelBinarizer().fit_transform(train_labels)

#为测试集和训练集的数据增加一维当作偏置值
train_data = np.c_[train_data,np.ones([train_data.shape[0],1])]
test_data = np.c_[test_data,np.ones([test_data.shape[0],1])]


In [77]:
class logisticRegression_softMax:
    def __init__(self,size):
        """
        初始化模型
        
        Parameters:
        ___________
        size: list of shape
            权重的规模
        """
        
        self.W = np.random.rand(size[0],size[1])
        
    def softMax(self,x):
        """
        在用softmax函数时，为了避免出现分子为non或粪分母为0出现数值越界，把每个output(i) - max(output(i
        """
        max_dict = [ [np.max(i)] for i in x]
        x = np.exp((x-max_dict))
        return np.array([ i/np.sum(i) for i in x])
    
    def softMax_deriv(self,x,y):
        return x - y
    
    def fit(self,X,Y,batch_size,epoch,alpha,count=10,reg=1):
        """
        训练模型
        
        Parameters:
        ___________
        X:numpy array of shape
        Y:list of label
        batch_size: 批量梯度下降法的批量大小
        epoch:训练次数
        alpha:学习率
        reg:正则化惩罚系数
        """
        
        for i in range(epoch):
            batch_index = np.random.choice(X.shape[0],batch_size,True)
            batch_x = X[batch_index,:]
            batch_y = Y[batch_index]
            
            self.train(batch_x,batch_y,alpha,reg)
            if i == 0 or (i+1)%count ==0:
                
                pre = self.predict(test_data[:250])
                pre = np.argmax(pre,axis=1)
                print("i-loss",self.calculate_loss(X,Y),"acc",self.calluclate_acc(pre,test_labels[:250]))
    
    def train(self,x,y,alpha,reg):
        net = x.dot(self.W)
        out = self.softMax(net)
        deriv = self.softMax_deriv(out,y)

        self.W -= alpha * reg +1.0/x.shape[0] * alpha * x.T.dot(deriv)
    
    def predict(self,x):
        net = x.dot(self.W)
        out = self.softMax(net)
        return out
    
    def calculate_loss(self,x,y):
        predictions = self.predict(x)
        loss = -sum(np.sum(y*np.log(predictions),1))
        return loss
    
    def calluclate_acc(self,pre,y):
        result = np.array(pre)==np.array(y)
        return (1.0*sum(result)/len(result))

In [73]:
model = logisticRegression_softMax([train_data.shape[1],10])
model.fit(train_data[:1000],train_y[:1000],batch_size=100,epoch=30000,alpha=0.01,count=1000,reg=1)

i-loss 10767.341903764725 acc 0.064
i-loss 464.20806925064744 acc 0.736
i-loss 219.3904015471753 acc 0.744
i-loss 130.36061205838934 acc 0.78
i-loss 85.15694682924386 acc 0.776
i-loss 58.60515017850914 acc 0.776
i-loss 42.36124471578768 acc 0.788
i-loss 32.62104918034619 acc 0.804
i-loss 26.620028630328576 acc 0.816
i-loss 22.428863774889752 acc 0.816
i-loss 19.4790466498094 acc 0.82
i-loss 17.179435538897334 acc 0.82
i-loss 15.436972585837893 acc 0.82
i-loss 14.003853401520432 acc 0.82
i-loss 12.82894735501835 acc 0.82
i-loss 11.858251667531894 acc 0.82
i-loss 11.018649058489745 acc 0.82
i-loss 10.296883482737268 acc 0.82
i-loss 9.6714117483202 acc 0.82
i-loss 9.120178080944784 acc 0.824
i-loss 8.627534235124948 acc 0.824
i-loss 8.184276254801784 acc 0.824
i-loss 7.792921077363783 acc 0.824
i-loss 7.433603956513402 acc 0.824
i-loss 7.110057552820278 acc 0.824
i-loss 6.8151209341121985 acc 0.824
i-loss 6.541976914729608 acc 0.824
i-loss 6.29160045074276 acc 0.824
i-loss 6.0616788939153

In [74]:
pre = model.predict(test_data[:250])
pre = np.argmax(pre,axis=1)
print(classification_report(pre,test_labels[:250]))


             precision    recall  f1-score   support

          0       0.95      0.90      0.92        20
          1       0.94      0.91      0.93        35
          2       0.75      0.69      0.72        26
          3       0.78      0.75      0.77        24
          4       0.82      0.84      0.83        32
          5       0.84      0.81      0.82        26
          6       0.68      0.83      0.75        18
          7       0.90      0.84      0.87        31
          8       0.86      0.86      0.86        14
          9       0.70      0.79      0.75        24

avg / total       0.83      0.82      0.83       250

