# 1. Overview
Data set source:https://www.kaggle.com/tanyaganesan/iris-dataset-logistic-regression  
iris.csv: the original data set  
iris_1_2_train.csv：'Iris-setosa' and 'Iris-versicolor' training data set  
iris_1_2_test.csv：'Iris-setosa' and 'Iris-versicolor' test data set  
iris_train.csv：OvO multiple classifier training data set,'Iris-setosa','Iris-versicolor' and 'Iris-virginica'  
iris_test.csv: OvO multiple classifier test data set,'Iris-setosa','Iris-versicolor' and 'Iris-virginica'  

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.read_csv('iris_1_2_train.csv')
size_mapping = {
           'Iris-setosa': 1,
           'Iris-versicolor': 0}

df['type'] = df['type'].map(size_mapping)
df

Unnamed: 0,x0,x1,x2,x3,x4,type
0,1,5.1,3.8,1.5,0.3,1
1,1,5.4,3.4,1.7,0.2,1
2,1,5.1,3.7,1.5,0.4,1
3,1,4.6,3.6,1.0,0.2,1
4,1,5.1,3.3,1.7,0.5,1
...,...,...,...,...,...,...
57,1,5.7,3.0,4.2,1.2,0
58,1,5.7,2.9,4.2,1.3,0
59,1,6.2,2.9,4.3,1.3,0
60,1,5.1,2.5,3.0,1.1,0


# 2. Logistic Regression Implementation with BGD, SGD, and Mini Batch GD


In [2]:
x = df.iloc[:, 0:-1]
y = df.iloc[:, -1]
# sigmoid function
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

## 2.1 BGD with L2 regularization

In [3]:
%%time
import time
def BGD_L2( x, y, epochs, alpha, Lambda):
    m = x.shape[0]
    n = x.shape[1]
    W = np.mat(np.ones((n, 1)))
    xMat = np.mat(x)
    yMat = np.mat(y).transpose()
    for i in range(epochs):
        h = sigmoid(xMat * W)
        #I set the value of Lambda is 1 for L2 regularization
        gradient = xMat.T * (h - yMat) / m + Lambda * W
        W = W - alpha * gradient
    return W

Wall time: 0 ns


## 2.2 SGD with L2 regularization

In [4]:
%%time
import time
def SGD_L2( x, y, epochs, alpha, Lambda):
    m = x.shape[0]
    n = x.shape[1]
    W = np.mat(np.ones((n, 1)))
    xMat = np.mat(x)
    yMat = np.mat(y).transpose()
    for i in range(epochs):
        h = sigmoid(xMat * W)
        #random index
        index = np.random.randint(m)
        #I set the value of Lambda is 1 for L2 regularization
        gradient = xMat[index].T * (h - yMat)[index] + Lambda * W
        W = W - alpha * gradient
    return W

Wall time: 0 ns


## 2.3 Mini Batch GD with L2 regularization

In [5]:
%%time
import time
def Mini_Batch_GD_L2( x, y, epochs, alpha, Lambda):
    m = x.shape[0]
    n = x.shape[1]
    indexs = []
    W = np.mat(np.ones((n, 1)))
    xMat = np.mat(x)
    yMat = np.mat(y).transpose()
    for i in range(epochs):
        h = sigmoid(xMat * W)
        index = np.random.randint(m)
        #The size of batch is 5
        batch_size = 5
        #I set the value of Lambda is 1 for L2 regularization
        gradient = xMat[index:index+batch_size].T * (h - yMat)[index:index+batch_size] / batch_size + Lambda * W
        W = W - alpha * gradient
    return W

Wall time: 0 ns


## 2.4 Implementation and Comparison with Different GD
The iteration times is 1000.  
Alpha is 0.001.  
L2 regularization Lambda is 1.  
The result of SGD and Mini batch GD may be different cause it is random! ! !

In [6]:
w1 = BGD_L2(x, y, 10000, 0.001, 1)
w2 = SGD_L2(x, y, 10000, 0.001, 1)
w3 = Mini_Batch_GD_L2(x, y, 10000, 0.001, 1)
df_test = pd.read_csv('iris_1_2_test.csv')
x_test = df_test.iloc[:, 0:-1]
y_test = df_test.iloc[:, -1]
w1_count = 0
w2_count = 0
w3_count = 0
y_pre1 = []
y_pre2 = []
y_pre3 = []
for i in x_test.values.tolist():
    y_pre1.append('Iris-setosa' if sigmoid(i * w1) >= 0.5 else 'Iris-versicolor')
    y_pre2.append('Iris-setosa' if sigmoid(i * w2) >= 0.5 else 'Iris-versicolor')
    y_pre3.append('Iris-setosa' if sigmoid(i * w3) >= 0.5 else 'Iris-versicolor')

test_size = len(df_test)
for i in range(test_size):
    if(y_test[i] == y_pre1[i]):
        w1_count+=1
    if (y_test[i] == y_pre2[i]):
        w2_count+=1
    if (y_test[i] == y_pre3[i]):
        w3_count+=1

# The results may be different cause it is random ! ! ! !
#In fact most of the times they are all 100 %
print('The BGD w is:\n{0}, the accuracy is {1}'.format(w1, w1_count / test_size))
print('The SGD w is:\n{0}, the accuracy is {1}'.format(w2, w2_count / test_size))
print('The Mini Batch GD w is:\n{0}, the accuracy is {1}'.format(w3, w3_count / test_size))


The BGD w is:
[[ 0.03782384]
 [ 0.05029671]
 [ 0.22287873]
 [-0.36482298]
 [-0.15334202]], the accuracy is 1.0
The SGD w is:
[[ 0.03676857]
 [ 0.04173796]
 [ 0.21962171]
 [-0.3696762 ]
 [-0.15478461]], the accuracy is 1.0
The Mini Batch GD w is:
[[ 0.03389111]
 [ 0.03332897]
 [ 0.20661149]
 [-0.36298849]
 [-0.15144594]], the accuracy is 1.0


# 3. OvO Multiple Classifier Implementation
I use BGD to implement the OvO Multiple Classifier, The data set has three label types of data. They are 'Iris-setosa','Iris-versicolor' and 'Iris-virginica'  

In [7]:
# 3 data sets
df = pd.read_csv('iris_train.csv')
df_1_2 = df.loc[(df['type'] == 'Iris-setosa') | (df['type'] == 'Iris-versicolor')].copy()
df_1_3 = df.loc[(df['type'] == 'Iris-setosa') | (df['type'] == 'Iris-virginica')].copy()
df_2_3 = df.loc[(df['type'] == 'Iris-versicolor') | (df['type'] == 'Iris-virginica')].copy()


size_mapping_1_2 = {
           'Iris-setosa': 1,
           'Iris-versicolor': 0}
size_mapping_1_3 = {
           'Iris-setosa': 1,
           'Iris-virginica': 0}
size_mapping_2_3 = {
           'Iris-versicolor': 1,
           'Iris-virginica': 0}

# here I set the value of m is 1,to get a better accuracy to classify  'Iris-versicolor' and 'Iris-virginica'.
def _BGD_L2( x, y, epochs, alpha, Lambda):
    m = x.shape[0]
    n = x.shape[1]
    W = np.mat(np.ones((n, 1)))
    xMat = np.mat(x)
    yMat = np.mat(y).transpose()
    for i in range(epochs):
        h = sigmoid(xMat * W)
        #I set the value of Lambda is 1 for L2 regularization
        gradient = xMat.T * (h - yMat)  + Lambda * W
        W = W - alpha * gradient
    return W

df_1_2['type'] = df_1_2['type'].map(size_mapping_1_2)
df_1_3['type'] = df_1_3['type'].map(size_mapping_1_3)
df_2_3['type'] = df_2_3['type'].map(size_mapping_2_3)
x_1_2 = df_1_2.iloc[:, 0:-1]
y_1_2 = df_1_2.iloc[:, -1]
x_1_3 = df_1_3.iloc[:, 0:-1]
y_1_3 = df_1_3.iloc[:, -1]
x_2_3 = df_2_3.iloc[:, 0:-1]
y_2_3 = df_2_3.iloc[:, -1]
w_1_2 = _BGD_L2(x_1_2, y_1_2, 1000, 0.001, 1)
w_1_3 = _BGD_L2(x_1_3, y_1_3, 1000, 0.001, 1)
w_2_3 = _BGD_L2(x_2_3, y_2_3, 1000, 0.001, 1)

df_test = pd.read_csv('iris_test.csv')
x_test = df_test.iloc[:, 0:-1]
y_test = df_test.iloc[:, -1]
y_pre = []
for i in x_test.values.tolist():
    # prediction times of every type
    dict = {'Iris-setosa': 0, 'Iris-versicolor': 0, 'Iris-virginica': 0}
    if sigmoid(i * w_1_2) >= 0.5:
        dict['Iris-setosa'] += 1
    else:dict['Iris-versicolor'] += 1

    if sigmoid(i * w_1_3) >= 0.5:
        dict['Iris-setosa'] += 1
    else:dict['Iris-virginica'] += 1

    if sigmoid(i * w_2_3) >= 0.5:
        dict['Iris-versicolor'] += 1
    else:dict['Iris-virginica'] += 1
    # find the max one
    y_pre.append(max(dict, key=lambda x: dict[x]))

test_size = len(df_test)
count = 0
print('real\tprediction')
for i in range(test_size):
    print(y_test[i]+'\t'+y_pre[i])
    if(y_pre[i] == y_test[i]):
        count += 1
print('Accuracy: ',count / test_size)

real	prediction
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-setosa	Iris-setosa
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	Iris-versicolor
Iris-versicolor	

# 4. Logistic Regression with scikit-learn tools

In [8]:
from sklearn.linear_model import LogisticRegression

x_train = df.iloc[:, 0:-1]
y_train = df.iloc[:, -1]
lr = LogisticRegression(multi_class="auto",max_iter=1000)
lr.fit(x_train, y_train)
acur = lr.score(x_test, y_test)
y_predict = lr.predict(x_test)  
print('real\tprediction')
for i in range(len(x_test)):
    print(y_test[i], y_predict[i])
print("Accuracy:", acur)

real	prediction
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-setosa Iris-setosa
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor Iris-versicolor
Iris-versicolor 

# 4. Conclusion

Ihe this lab, Three gradient descent methods and L2 regularization are used to implement the logistic regression. And BGD is used to implement the multiple classifier. Compare to the result of sk-learn, my classifier has a better accuracy.