In [None]:
import numpy as np
import Perceptron as perceptron_class

class Perceptron(object):
    """
    eta: 学习率
    n_iter: 权重向量的训练次数
    w_: 神经分叉权重向量
    errors_:用于记录神经元判断出错次数
    """
    def __init__(self, eta=0.01, n_iter=10):
        self.eta = eta
        self.n_iter = n_iter
        pass

    def fit(self, X, y):
        """
        输入训练数据，训练神经元，X输入样本向量， y对应样本分类
        
        X:shape[n_samples,n_features]:n_samples样本个数，n_features神经元有多少个分叉
        
        X:[[1,2,3],[4,5,6]]
        n_samples:2
        n_features:3
        y:[1,-1]
        """

        """
        初始化权重向量为0
        +1是因为前面算法提到的w0,也就是步调函数阈值
        """
        self.w_ = np.zero(1 + X.shape[1]) # Add w_0
        self.errors_ = []

        for _ in range(self.n_iter):
            errors = 0

            """
            X:[[1,2,3,],[4,5,6]]
            y:[1,-1]
            zip[X,y]=[[1,2,3,1],[4,5,6,-1]
            """
            for xi, target in zip(X, y):
                """
                update = η * （y-y')
                """
                update = self.eta * (target - self.predict(xi))

                """
                xi 是一个向量
                update * xi 等价
                [∆W(1) = X[1]*update,∆W(2) = X[2]*update,∆W(3) = X[3]*update]
                """
                self.w_[1:] += update * xi
                self.w_[0] += update
                errors += int(update != 0.0)
                self.errors_.append(errors)
                pass
            pass

        def net_input(self, X):
            """
                z = W0*1+W1*X1+....Wn*Xn
            """
            return np.dot(X, self.w_[1:] + self.w_[0])
            pass

        def predict(self, X):
            return np.where(self.net_input(X) >= 0.0 , 1, -1)
            pass
        pass


In [None]:
file = './data/iris.csv'
import pandas as pd

# 该数据集的数据是从第一行开始的所以header要设置为null
df = pd.read_csv(file,header=None)
# 只显示前10条
print(df.head(10))


In [None]:
import matplotlib.pyplot as plt

In [None]:
%matplotlib inline

In [None]:
import numpy as np
# 把0-100行的第4列(特征值)读取出来。赋值给y,
y = df.loc[0:100,4].values
# print(y)
# 如果y中的数据对应的是Iris-settosa值就为-1，否则就为1
y = np.where(y== 'Iris-settosa',-1,1)
print(y)

# 取出第0-100行的第0和第2列的数据做为特征值，其它的值不要
X = df.iloc[0:100,[0,2]].values
print(X)

plt.rc('font',family='SimHei',size=13)
# 取前50条数据，第0列数据当做x轴坐标，第1列的数据做为y轴坐标
plt.scatter(X[:50,0],X[:50,1],color='red',marker='o',label='setosa')
plt.scatter(X[50:100,0],X[50:100,1],color='blue',marker='x',label='versicolor')


In [None]:
from matplotlib.colors import ListedColormap
def plot_decision_reqions(x,y,classsifier,resolution=0.02):
    marker = ('s','x','o','v')
    colors = ('red','blue','lightgreen','gray','cyan')
    cmap = ListedColormap(colors[:len(np.unique(y))])
    
    # 获取花径和花瓣在数据集中的最小值和最大值
    x1_min,x1_max = X[:,0].min()-1,X[:,0].max()
    x2_min,x2_max = X[:,1].min()-1,X[:,1].max()
    
#     print(x1_min,x1_max)
#     print(x2_min,x2_max)

xx1,xx2 = np.meshgrid(np.arange(x1_min,x1_max,resolution),
               np.arange(x2_min,x2_max,resolution))
#     print(np.arange(x1_min,x1_max,resolution).shape)
#     print(np.arange(x1_min,x1_max,resolution))
#     print(xx1.shape)
#     print(xx1)

z = classifier.predict(np.array([xx1.revel(),xx2.ravel()]).T)

# print(xx1.ravel())
# print(xx2.ravel())
# print(z)

# 在两组数据之间绘制一条直线
plt.countourf(xx1,xx2,z,alpha=0.4,camp=camp)
plt.xlim(xx1.min(),xx1.max())
plt.ylim(xx2.min(),xx2.max())

# 设置标签
for idx,c1 in eumerate(np.unique(y)):
    plt.scatter(x=X[y==cl,0],y=X[y==c1,1],alpha=0.8,c=camp(idx),
               marker=markers[idx],label=cl)


In [None]:
# ppn缺失，找不到该类，工程暂存
ppn = perceptron_class.Perceptron(eta=0.1, n_iter=10)
ppn.fit(X, y)

plot_decision_reqions(X, y, ppn, resolution=0.02)

plt.xlabel('花瓣长度')
plt.ylabel('花径长度')
plt.legend(loc='upper left')

plt.show()
