In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
class LogR():    
    def __init__(self, batch_size=16, eta=0.001, 
                 w0=np.zeros(3).reshape(-1,1), iteration=100):
        self.batchsize = batch_size
        self.eta = eta
        self.w = w0
        self.iter = iteration
        
    def sigmoid(self, x):
        return 1/(1 + np.exp(-x))
        
    def fit(self, Xtrain, ytrain):
        n = Xtrain.shape[0]
        X_expend = np.hstack((Xtrain, np.ones(n).reshape(-1,1)))
        batchnum = int(np.ceil(n/self.batchsize))
        loss = []
        batch_loss = 0
        grad = 0
        for epoch in range(self.iter):
            epoch_loss = []
            for i in range(batchnum):
                if i < batchnum:
                    Xtmp = X_expend[i*self.batchsize:(i+1)*self.batchsize, :]
                    ytmp = ytrain[i*self.batchsize:(i+1)*self.batchsize]
                else :
                    Xtmp = X_expend[i*self.batchsize:, :]
                    ytmp = ytrain[i*self.batchsize:]
                for j in range(self.batchsize):
                    batch_loss += np.log(1 + np.exp(-ytmp[j]*(Xtmp[j, :].reshape(1,-1).dot(self.w)).ravel()))
                    grad += self.sigmoid(-ytmp[j]*Xtmp[j, :].dot(self.w))*(-ytmp[j]*Xtmp[j, :].T)
                batch_loss = (batch_loss/self.batchsize).ravel()
                grad = grad/self.batchsize
                self.w = self.w - self.eta*grad.reshape(-1,1)
                epoch_loss.append(batch_loss)
            loss.append(sum(epoch_loss)/batchnum)
        return loss       
            
    def score(self, Xtest, ytest):
        n = Xtest.shape[0]
        X_expend = np.hstack((Xtest, np.ones(n).reshape(-1,1)))
        count = 0
        Pr = []
        for i in range(n):
            pr = self.sigmoid(X_expend[i,:].dot(self.w))
            if np.sign(pr - 0.5) != ytest[i] :
                count = count + 1
            Pr.append(*pr)
        print('Logistic Regression正确率为' + str((n-count)/n)[:6])
        return Pr
    
    def predict(self, X):
        n = X.shape[0]
        X_expend = np.hstack((X, np.ones(n).reshape(-1,1)))
        return self.sigmoid(X_expend.dot(self.w))

In [None]:
mean1 = [-5, 0]
s1 = [[1, 0], [0, 1]]
data1 = np.random.multivariate_normal(mean1, s1, 200)
mean2 = [0, 5]
s2 = [[1, 0], [0, 1]]
data2 = np.random.multivariate_normal(mean2, s2, 200)
X = np.vstack((data1, data2))
ones = np.ones(200).reshape(-1,1)
y = np.vstack((ones,-1*ones))
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, test_size=0.2)

In [None]:
model = LogR()
loss = model.fit(Xtrain, ytrain)
Pr = model.score(Xtest, ytest);
print('分类概率为',Pr)

In [None]:
plt.plot([i+1 for i in range(len(loss))], loss)
plt.title('eta= '+ str(model.eta))
plt.xlabel('epoch')
plt.ylabel('loss');

In [None]:
train_index = (ytrain==1).ravel()
test_index = (ytest==1).ravel()
plt.scatter(Xtrain[train_index, 0], Xtrain[train_index, 1], marker='.', c='b', s=100
            , label='train +1')
plt.scatter(Xtrain[~train_index, 0], Xtrain[~train_index, 1], marker='.', c='k', s=100
           , label='train -1')
plt.scatter(Xtest[test_index, 0], Xtest[test_index, 1], marker='D', c='r', label='test +1')
plt.scatter(Xtest[~test_index, 0], Xtest[~test_index, 1], marker='D', c='y', label='test -1')
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
axisx = np.linspace(xlim[0], xlim[1], 30)
axisy = np.linspace(ylim[0], ylim[1], 30)
axisx, axisy = np.meshgrid(axisx, axisy)
xy = np.vstack([axisx.ravel(), axisy.ravel()]).T
z = model.predict(xy).reshape(axisx.shape)
plt.contour(axisx, axisy, z, levels=[0.5], linestyles=['--'])
plt.legend()
plt.title('Logistic Regression')
plt.show();