In [10]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
X,y=load_breast_cancer().data,load_breast_cancer().target
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)

### 使用sklearn求解Logistic Regression

In [11]:
from sklearn.linear_model import LogisticRegression as sklearn_LogisticRegression
from sklearn.metrics import accuracy_score,classification_report
model=sklearn_LogisticRegression(penalty='none',fit_intercept=True,solver='lbfgs').fit(X_train,y_train)
pre=model.predict(X_test)
accuracy_score(y_test,pre)



0.9707602339181286

### 使用梯度下降法

In [12]:
from scipy.special import expit
class LogisticRegression:
    def __init__(self,learning_rate=0.004,max_iters=1800):
        self.learning_rate=learning_rate
        self.max_iters=max_iters
    def fit(self,X,y):
        X_=np.full((X.shape[0],1),fill_value=1)   
        X=np.mat(np.concatenate([X,X_],axis=1))       
        y = np.mat(y).transpose()
        m,n = X.shape
        weights = np.mat(np.random.randn(n,1))
        for k in range(self.max_iters):
            h = expit(X*weights)  
            error = h - y            
            weights = weights - self.learning_rate * X.transpose()* error
        self.coef_=np.array(weights).flatten()[:-1]
        self.intercept_=weights[-1]
        self.weights=weights
        return self
    
    def predict(self,X):
        X_=np.full((X.shape[0],1),fill_value=1)  
        X=np.concatenate([X,X_],axis=1)
        y_value=np.array(expit(X@self.weights)).flatten()                
        y_pre=np.array([1 if i>0.5 else 0 for i in y_value])                 
        return y_pre
                    
    def predict_proba(self,X):
        X_=np.full((X.shape[0],1),fill_value=1)   
        X=np.concatenate([X,X_],axis=1)   
        y_value=np.array(expit(X@self.weights)).flatten()   

In [13]:
model=LogisticRegression().fit(X_train,y_train)
y_pre=model.predict(X_test)
print(accuracy_score(y_test,y_pre))

0.9239766081871345


### 使用随机梯度下降法

In [14]:
class LogisticRegression:
    def __init__(self,alpha=0.007,maxCycles=1800):
        self.alpha=alpha
        self.maxCycles=maxCycles
    def fit(self,X, y, numIter=150):
        X_=np.full((X.shape[0],1),fill_value=1)   
        X=(np.concatenate([X,X_],axis=1)) 
        m,n = X.shape
        weights = np.ones(n)   
        for j in range(numIter):
            dataIndex = list(range(m))
            for i in range(m):
                alpha = 4/(1.0+j+i)+0.0001   
                randIndex = int(np.random.uniform(0,len(dataIndex))) 
                h = expit(sum(X[randIndex]*weights))
                error = h-y[randIndex] 
                weights = weights - alpha * error * X[randIndex]
                del (dataIndex[randIndex])
        self.coef_=weights[:-1]
        self.intercept=weights[-1]
        self.weights=np.mat(weights).transpose()
        return weights
    def predict(self,X):
        X_=np.full((X.shape[0],1),fill_value=1)   
        X=(np.concatenate([X,X_],axis=1)) 
        y_value=np.array(expit(X@self.weights)).flatten()
        y_pre=np.array([1 if i>0.5 else 0 for i in y_value])
        return y_pre
    def predict_prob(self,X):
        X_=np.full((X.shape[0],1),fill_value=1)   
        X=(np.concatenate([X,X_],axis=1)) 
        y_value=np.array(expit(X@self.weights)).flatten()
        return y_value

In [15]:
model=LogisticRegression()
model.fit(X_train,y_train)
pre=model.predict(X_test)
accuracy_score(y_test,pre)

0.9298245614035088

### 使用Scipy求解带l2正则化的LogisticRegression

In [47]:
from scipy.special import expit
from scipy.optimize import fmin_l_bfgs_b
from sklearn.metrics import log_loss


class LogisticRegression:
    def __init__(self,c=0.0):
        self.c=c
        self.fitted=False
       
    def fit(self,X,y):
        m,n=X.shape
        W=np.random.randn(n+1)
        self.res=fmin_l_bfgs_b(type(self).log_loss,x0=W,args=(X,y,self.c))
        self.W=self.res[0]
        self.coef_=self.W[:-1]
        self.intercept_=self.W[-1]
        self.fitted=True
        return self
    
    def predict(self,X):
        if self.fitted==False:
            raise Exception('The model has not been trained yet, please train the model.')
        else:
            X_=np.full((X.shape[0],1),fill_value=1)   
            X=(np.concatenate([X,X_],axis=1)) 
            y_prob=expit(X@self.W.T+self.c*np.sum(self.intercept_**2))
            return np.array([1 if i>0.5 else 0 for i in y_prob])
    
    def predict_proba(self,X):
        X_=np.full((X.shape[0],1),fill_value=1)   
        X=(np.concatenate([X,X_],axis=1)) 
        y_prob=expit(X@self.W.T+self.c*np.sum(self.intercept_**2))
        return y_prob
    
    def log_loss(W,X,y,c):
        X_=np.full((X.shape[0],1),fill_value=1)   
        X=(np.concatenate([X,X_],axis=1)) 
        m=X.shape[0]
        y_pred=expit(X@W.T+c*np.sum(W[:-1]**2)) 
        y_pred=np.clip(y_pred,0.0001,0.9999)
        loss=np.sum(-(y*np.log(y_pred)+(1-y)*np.log(1-y_pred)))/m
        gradient=X.T@(y_pred-y)
        return loss,gradient

AttributeError: module 'numpy' has no attribute 'ra'

In [36]:
lr=LogisticRegression(c=0).fit(X_train,y_train)
lr.predict(X_test)



array([1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0,
       1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 1,
       1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0,
       1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1,
       0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1])

In [37]:
accuracy_score(y_test,lr.predict(X_test))

0.9707602339181286

In [34]:
np.log(0.11)

-2.2072749131897207