In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
class LR():    
    def __init__(self, algorithm='inv', batch_size=16,
                 eta=0.001, w0=np.zeros(3).reshape(-1,1), iteration=100):
        self.algorithm = algorithm
        self.batchsize = batch_size
        self.eta = eta
        self.w = w0
        self.iter = iteration
        
    def fit(self, Xtrain, ytrain):
        n = Xtrain.shape[0]
        X_expend = np.hstack((Xtrain, np.ones(n).reshape(-1,1)))
        if self.algorithm == 'inv':
            self.w = np.linalg.inv(X_expend.T.dot(X_expend)).dot(X_expend.T).dot(ytrain)
        elif self.algorithm == 'sgd':
            batchnum = int(np.ceil(n/self.batchsize))
            loss = []
            for epoch in range(self.iter):
                epoch_loss = []
                for i in range(batchnum):
                    if i <batchnum:
                        Xtmp = X_expend[i*self.batchsize:(i+1)*self.batchsize, :]
                        ytmp = ytrain[i*self.batchsize:(i+1)*self.batchsize]
                    else :
                        Xtmp = X_expend[i*self.batchsize:, :]
                        ytmp = ytrain[i*self.batchsize:]
                    batch_loss = ((Xtmp.dot(self.w) - ytmp).T.dot((Xtmp.dot(self.w) - ytmp))/self.batchsize).ravel()
                    grad = 2*(Xtmp.T.dot(Xtmp).dot(self.w) - Xtmp.T.dot(ytmp))/self.batchsize
                    self.w = self.w - self.eta*grad
                    epoch_loss.append(batch_loss)
                loss.append(sum(epoch_loss)/batchnum)
            return loss       
        else :
            print('未知算法')
            
    def score(self, Xtest, ytest):
        n = Xtest.shape[0]
        X_expend = np.hstack((Xtest, np.ones(n).reshape(-1,1)))
        count = 0
        for i in range(n):
            if np.sign(np.dot(self.w.T, X_expend[i,:])) != ytest[i] :
                count = count + 1
        print('LR正确率为' + str((n-count)/n)[:6])
        return (n-count)/n
    
    def predict(self, X):
        n = X.shape[0]
        X_expend = np.hstack((X, np.ones(n).reshape(-1,1)))
        return np.dot(X_expend, self.w)

In [None]:
mean1 = [-5, 0]
s1 = [[1, 0], [0, 1]]
data1 = np.random.multivariate_normal(mean1, s1, 200)
mean2 = [0, 5]
s2 = [[1, 0], [0, 1]]
data2 = np.random.multivariate_normal(mean2, s2, 200)
X = np.vstack((data1, data2))
ones = np.ones(200).reshape(-1,1)
y = np.vstack((ones,-1*ones))
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2)

In [None]:
model1 = LR('sgd')
loss=model1.fit(Xtrain, ytrain)
model1.score(Xtrain, ytrain)
model1.score(Xtest, ytest);

In [None]:
plt.plot([i+1 for i in range(len(loss))], loss)
plt.title('average loss for each epoch')
plt.xlabel('epoch')
plt.ylabel('loss')

In [None]:
train_index = (ytrain==1).ravel()
test_index = (ytest==1).ravel()
plt.scatter(Xtrain[train_index, 0], Xtrain[train_index, 1], marker='.', c='b', s=100
            , label='train +1')
plt.scatter(Xtrain[~train_index, 0], Xtrain[~train_index, 1], marker='.', c='k', s=100
           , label='train -1')
plt.scatter(Xtest[test_index, 0], Xtest[test_index, 1], marker='D', c='r', label='test +1')
plt.scatter(Xtest[~test_index, 0], Xtest[~test_index, 1], marker='D', c='y', label='test -1')
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
axisx = np.linspace(xlim[0], xlim[1], 30)
axisy = np.linspace(ylim[0], ylim[1], 30)
axisx, axisy = np.meshgrid(axisx, axisy)
xy = np.vstack([axisx.ravel(), axisy.ravel()]).T
z = model1.predict(xy).reshape(axisx.shape)
plt.contour(axisx, axisy, z, levels=[0], linestyles=['--'])
plt.legend()
plt.title('inv')
plt.show();