In [12]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
X,y=load_breast_cancer().data,load_breast_cancer().target
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)

### 使用sklearn求解Logistic Regression

In [13]:
from sklearn.linear_model import LogisticRegression as sklearn_LogisticRegression
from sklearn.metrics import accuracy_score,classification_report
model=sklearn_LogisticRegression(fit_intercept=True,solver='liblinear').fit(X_train,y_train)
pre=model.predict(X_test)
accuracy_score(y_test,pre)

0.935672514619883

### 使用梯度下降法

In [5]:
from scipy.special import expit
class LogisticRegression:
    def __init__(self,learning_rate=0.004,max_iters=1800):
        self.learning_rate=learning_rate
        self.max_iters=max_iters
    def fit(self,X,y):
        X_=np.full((X.shape[0],1),fill_value=1)   
        X=np.mat(np.concatenate([X,X_],axis=1))       
        y = np.mat(y).transpose()
        m,n = X.shape
        weights = np.mat(np.random.randn(n,1))
        for k in range(self.max_iters):
            h = expit(X*weights)  
            error = h - y            
            weights = weights - self.learning_rate * X.transpose()* error
        self.coef_=np.array(weights).flatten()[:-1]
        self.intercept_=weights[-1]
        self.weights=weights
        return self
    
    def predict(self,X):
        X_=np.full((X.shape[0],1),fill_value=1)  
        X=np.concatenate([X,X_],axis=1)
        y_value=np.array(expit(X@self.weights)).flatten()                
        y_pre=np.array([1 if i>0.5 else 0 for i in y_value])                 
        return y_pre
                    
    def predict_proba(self,X):
        X_=np.full((X.shape[0],1),fill_value=1)   
        X=np.concatenate([X,X_],axis=1)   
        y_value=np.array(expit(X@self.weights)).flatten()   

In [6]:
model=LogisticRegression().fit(X_train,y_train)
y_pre=model.predict(X_test)
print(accuracy_score(y_test,y_pre))

0.9122807017543859


##### 使用模型预测收入

In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
data=pd.read_csv('DecisionTree.csv')
X=data[['workclass', 'education', 'marital-status', 'occupation',
       'relationship', 'race', 'gender', 'native-country']]
y=[0 if i=='<=50K' else 1 for i in data['income'].tolist()]
X=pd.get_dummies(X)
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3)
model=LogisticRegression().fit(X_train.values,y_train)
y_pre=model.predict(X_test.values)
accuracy_score(y_test,y_pre)

1.0

### 使用随机梯度下降法

In [8]:
class LogisticRegression:
    def __init__(self,alpha=0.007,maxCycles=1800):
        self.alpha=alpha
        self.maxCycles=maxCycles
    def fit(self,X, y, numIter=150):
        X_=np.full((X.shape[0],1),fill_value=1)   
        X=(np.concatenate([X,X_],axis=1)) 
        m,n = X.shape
        weights = np.ones(n)   
        for j in range(numIter):
            dataIndex = list(range(m))
            for i in range(m):
                alpha = 4/(1.0+j+i)+0.0001   
                randIndex = int(np.random.uniform(0,len(dataIndex))) 
                h = expit(sum(X[randIndex]*weights))
                error = h-y[randIndex] 
                weights = weights - alpha * error * X[randIndex]
                del (dataIndex[randIndex])
        self.coef_=weights[:-1]
        self.intercept=weights[-1]
        self.weights=np.mat(weights).transpose()
        return weights
    def predict(self,X):
        X_=np.full((X.shape[0],1),fill_value=1)   
        X=(np.concatenate([X,X_],axis=1)) 
        y_value=np.array(expit(X@self.weights)).flatten()
        y_pre=np.array([1 if i>0.5 else 0 for i in y_value])
        return y_pre
    def predict_prob(self,X):
        X_=np.full((X.shape[0],1),fill_value=1)   
        X=(np.concatenate([X,X_],axis=1)) 
        y_value=np.array(expit(X@self.weights)).flatten()
        return y_value

In [9]:
model=LogisticRegression()
model.fit(X_train,y_train)
pre=model.predict(X_test)
accuracy_score(y_test,pre)

1.0