In [None]:
'''
@Author: fzy
@Date: 2019-05-23 14:53:54
@LastEditors: Zhenying
@LastEditTime: 2019-05-23 15:02:31
@Description: 
'''
import numpy as np
import pandas as pd
import time
import logging

# 1.log函数

In [None]:
def get_logger(algorithm_name):
    LOG_FORMAT = "%(asctime)s - %(levelname)s - %(message)s"
    DATE_FORMAT = "%Y-%m-%d %H:%M:%S %p"
    logger = logging.getLogger()
    logger.setLevel(logging.INFO)
    formatter = logging.Formatter(LOG_FORMAT, DATE_FORMAT)
    chlr = logging.StreamHandler()
    chlr.setFormatter(formatter)
    chlr.setLevel('INFO')
    fhlr = logging.FileHandler(algorithm_name + '.log')
    fhlr.setFormatter(formatter)
    logger.addHandler(chlr)
    logger.addHandler(fhlr)
    return logger

In [None]:
logger = get_logger("perceptron")

# 2.读取数据

In [None]:
def load_data(filename, logg):
    logg.info("===== Loading Data =====")
    df = pd.read_csv(filename, header=None)
    # 获得类别标签
    labels = df.iloc[:, 0].values
    # 获得数据
    datas = df.iloc[:, 1:].values
    # 转换成二分类，分0类和非0类，将原始类别为0的标记为1，原始类别非0的标记为-1
    labels = np.where(labels > 0, 1, -1)
    # 将数据除255
    datas = datas / 255.
    logg.info("===== Loaded Data  =====")
    return datas, labels

In [None]:
datas, labels = load_data("../data/mnist_train.csv", logger)

In [None]:
labels.shape

# 3.感知机训练

In [None]:
def perceptron(datas, labels, logg, iters=100):
    logg.info("===== start train =====")
    # 得到训练数据的数量和维度
    m, n = datas.shape
    # 初始化权重和偏置
    w = np.zeros((1, n))
    b = 0
    # 初始化学习率
    eta = 0.0001
    # 进行iter次迭代计算
    for now_iter in range(iters):
        for i in range(m):
            xi = datas[i]
            yi = labels[i]
            xi = np.mat(xi)
            yi = np.mat(yi)
            # 判断是否是误分类样本
            if (-1 * yi * (w * xi.T + b)) >= 0:
                # 对于误分类样本，进行梯度下降，更新w和b
                w = w + eta *  yi * xi
                b = b + eta * yi
        logg.info('Iter [%d]:[%d]' % (now_iter, iters))
    logg.info("===== trained =====")
    return w, b

In [None]:
w, b = perceptron(datas, labels, logger)

# 4.测试

In [None]:
def test(datas, labels, w, b, logg):
    logg.info("===== start testing =====")
    m, n = datas.shape
    # 用来统计预测错误的个数
    errorCnt = 0
    # 对所有样本进行预测
    for i in range(m):
        xi = datas[i]
        yi = labels[i]
        xi = np.mat(xi)
        yi = np.mat(yi)
        res = -1 * yi * (w * xi.T + b)
        if res >= 0: errorCnt += 1
    accruRate = 1 - (errorCnt / m)
    logg.info("===== tested =====")
    logg.info("accRate: {0}".format(accruRate))
    return accruRate

In [None]:
datas, labels = load_data("../data/mnist_test.csv", logger)

In [None]:
accRate = test(datas, labels, w, b, logger)