回归模型
$$y = \frac{1}{{1 + {e^{ - z}}}}$$
最小化
$$l(\boldsymbol \beta ) = \sum\limits_{i = 1}^n {(-y_i\boldsymbol{\beta ^T}\boldsymbol {\hat x_i} + \ln (1 + {e^{{\beta ^T}\hat x_i}}))} $$
即
$$\beta ^* {\text{ = }}\mathop {\arg \min }\limits_\beta  l(\beta )$$
**以牛顿法求解为例**

In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
data = pd.read_csv("../data/irisdata.txt")
# 只保留两种标签，进行二分类任务
data = data[data['name'] != 'Iris-setosa']
data['name'].value_counts()

Iris-virginica     50
Iris-versicolor    50
Name: name, dtype: int64

In [3]:
# 分离标签，并将标签映射到数值
y = data['name']
y[y == 'Iris-versicolor'] = 1
y[y == 'Iris-virginica'] = 0
X = data.drop('name', axis=1)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.


In [4]:
# 划分训练集和验证集
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5)

In [5]:
class LogisticReressionClassifier:
    def __init__(self, max_iter):
        self.max_iter = max_iter
        self.beta = np.nan
        
    def sigmod(self, z):
        return 1 / (1 + np.exp(-z))
    
    def fit(self, X, y):
        self.beta = np.random.normal(size=(X.shape[0], X.shape[1] + 1))
        self.X_hat = np.c_[X, np.ones(X.shape[0])]
        print(self.beta.shape, self.X_hat.shape)
        self.loss_function(X, y)
        for j in range(self.max_iter):
            pd1 = 0  # 一阶偏导
            for i in  range(len(y)):
                pd1 -= self.X_hat[i]*(y[i] - self.sigmod(np.dot(self.beta[i].T, self.X_hat[i])))
            pd2 = 0  # 二阶偏导
            for i in range(len(y)):
                pd2 += self.X_hat[i].dot(self.X_hat[i].T.dot(self.sigmod(self.beta[i].T.dot(self.X_hat[i]))*(1 - self.sigmod(self.beta[i].T.dot(self.X_hat[i])))))
            self.beta = self.beta - (1 / pd2)*pd1
        self.loss_function(X, y)
        print(self.beta.mean(axis=0))
        
    def loss_function(self, X, y):
        loss = 0
        for i in range(len(y)):
            loss += -y[i]*np.dot(self.beta[i].T, self.X_hat[i]) + np.log(1 + np.exp(np.dot(self.beta[i].T, self.X_hat[i])))
        print(loss)
    def predict(self, X):
        y = []
        X = np.c_[X, np.ones(X.shape[0])]
        for i in range(X.shape[0]):
            if (self.sigmod(np.mean(self.beta, axis=0).T.dot(X[i])) >= 0.5):
                y.append(1)
            else:
                y.append(0)
        return y


In [6]:
clf = LogisticReressionClassifier(10000)

In [7]:
clf.fit(X_train.values, y_train.values)

(50, 5) (50, 5)
172.700415072482
12.191770388709697
[  1.17432869  39.89508512 -20.44033299 -31.78804254  29.34639182]


In [8]:
y_pred = clf.predict(X_test.values)

In [9]:
sum(y_pred == y_test)/len(y_test)

0.92