In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
class PLA():    
    def __init__(self, w0):
        self.w = w0
    
    def fit(self, Xtrain, ytrain):
        n = Xtrain.shape[0]
        X_expend = np.hstack((Xtrain, np.ones(n).reshape(-1,1)))
        count = 1
        while count > 0:
            count = 0
            for i in range(n):
                if np.sign(np.dot(self.w, X_expend[i,:])) != ytrain[i] :
                    count = count + 1
                    self.w = self.w + ytrain[i]*X_expend[i,:]
    
    def score(self, Xtest, ytest):
        n = Xtest.shape[0]
        X_expend = np.hstack((Xtest, np.ones(n).reshape(-1,1)))
        count = 0
        for i in range(n):
            if np.sign(np.dot(self.w, X_expend[i,:])) != ytest[i] :
                count = count + 1
        print('PLA正确率为' + str((n-count)/n)[:6])
        return (n-count)/n
        
    def distance(self, X):
        n = X.shape[0]
        X_expend = np.hstack((X, np.ones(n).reshape(-1,1)))
        return np.dot(X_expend, np.array(self.w).reshape(-1,1))/np.sqrt(np.dot(self.w, self.w))

In [None]:
class Pocket():
    def __init__(self, w0, iteration=100):
        self.w = w0
        self.bestw = w0
        self.bestcount = 0
        self.iter = iteration
        
    def fit(self, Xtrain, ytrain):
        n = Xtrain.shape[0]
        X_expend = np.hstack((Xtrain, np.ones(n).reshape(-1,1)))
        for i in range(self.iter):
            count = 0
            for j in range(n):
                if np.sign(np.dot(self.w, X_expend[j,:])) != ytrain[j] :
                    count = count + 1
                    if i > 0:
                        self.w = self.w + ytrain[j]*X_expend[j,:]
            if i == 0:
                self.bestcount = count
            else :
                if count < self.bestcount :
                    self.bestw = self.w
                    self.bestcount = count
                        
    def score(self, Xtest, ytest):
        n = Xtest.shape[0]
        X_expend = np.hstack((Xtest, np.ones(n).reshape(-1,1)))
        count = 0
        for i in range(n):
            if np.sign(np.dot(self.bestw, X_expend[i,:])) != ytest[i] :
                count = count + 1
        print('Pocket正确率为' + str((n-count)/n)[:6])
        return (n-count)/n
        
    def distance(self, X):
        n = X.shape[0]
        X_expend = np.hstack((X, np.ones(n).reshape(-1,1)))
        return np.dot(X_expend, np.array(self.bestw).reshape(-1,1))/np.sqrt(np.dot(self.bestw, self.bestw))

In [None]:
mean1 = [-5, 0]
s1 = [[1, 0], [0, 1]]
data1 = np.random.multivariate_normal(mean1, s1, 200)
mean2 = [0, 5]
s2 = [[1, 0], [0, 1]]
data2 = np.random.multivariate_normal(mean2, s2, 200)
X = np.vstack((data1, data2))
ones = np.ones(200).reshape(-1,1)
y = np.vstack((ones,-1*ones))
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2)

In [None]:
%%time
model1 = PLA([0, 0, 0])
model1.fit(Xtrain, ytrain)
model1.score(Xtest, ytest);

In [None]:
%%time
model2 = Pocket([0, 0, 0])
model2.fit(Xtrain, ytrain)
model2.score(Xtest, ytest);

In [None]:
train_index = (ytrain==1).ravel()
test_index = (ytest==1).ravel()
plt.scatter(Xtrain[train_index, 0], Xtrain[train_index, 1], marker='.', c='b', s=100
            , label='train +1')
plt.scatter(Xtrain[~train_index, 0], Xtrain[~train_index, 1], marker='.', c='k', s=100
           , label='train -1')
plt.scatter(Xtest[test_index, 0], Xtest[test_index, 1], marker='D', c='r', label='test +1')
plt.scatter(Xtest[~test_index, 0], Xtest[~test_index, 1], marker='D', c='y', label='test -1')
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
axisx = np.linspace(xlim[0], xlim[1], 30)
axisy = np.linspace(ylim[0], ylim[1], 30)
axisx, axisy = np.meshgrid(axisx, axisy)
xy = np.vstack([axisx.ravel(), axisy.ravel()]).T
z = model1.distance(xy).reshape(axisx.shape)
plt.contour(axisx, axisy, z, levels=[0], linestyles=['--'])
plt.legend()
plt.title('PLA')
plt.show();

In [None]:
plt.scatter(Xtrain[train_index, 0], Xtrain[train_index, 1], marker='.', c='b', s=100
            , label='train +1')
plt.scatter(Xtrain[~train_index, 0], Xtrain[~train_index, 1], marker='.', c='k', s=100
           , label='train -1')
plt.scatter(Xtest[test_index, 0], Xtest[test_index, 1], marker='D', c='r', label='test +1')
plt.scatter(Xtest[~test_index, 0], Xtest[~test_index, 1], marker='D', c='y', label='test -1')
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
axisx = np.linspace(xlim[0], xlim[1], 30)
axisy = np.linspace(ylim[0], ylim[1], 30)
axisx, axisy = np.meshgrid(axisx, axisy)
xy = np.vstack([axisx.ravel(), axisy.ravel()]).T
z = model2.distance(xy).reshape(axisx.shape)
plt.contour(axisx, axisy, z, levels=[0], linestyles=['--'])
plt.legend()
plt.title('Pocket')
plt.show();