In [None]:
%matplotlib inline

# Import Dependencies

In [None]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
from google.colab import drive
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [None]:
plt.rcParams['figure.figsize'] = (7.0, 5.0)

# Data Handling

In [None]:
df = pd.read_csv('data.csv', usecols=['x1', 'x2', 'y'])
df

In [None]:
plt.scatter(df.x1, df.y, c='r')
plt.title('x1 vs y')
plt.xlabel('x1')
plt.ylabel('y')
plt.show()

plt.scatter(df.x2, df.y, c='b')
plt.title('x2 vs y')
plt.xlabel('x2')
plt.ylabel('y')
plt.show()

In [None]:
plt.scatter(df.x1, df.x2, c=df.y, cmap=matplotlib.colors.ListedColormap(['red', 'blue']))
plt.title('x1 vs x2')
plt.xlabel('x1')
plt.ylabel('x2')
plt.show()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df[['x1', 'x2']], df.y, test_size=0.25)

# Gradient Descent

We compute the goodness of a logistic regression fit using Maximum Likelihood Estimation (MLE).

$$\begin{equation*}
  -\frac{1}{n}\sum_{i=1}^n (y_i\ln(\hat{y}_i) + (1-y_i)\ln(1-\hat{y}_i))
\end{equation*}$$

where recall that

$$\begin{equation*}
  y = \frac{1}{1+e^{-(\theta_0+\theta_1x)}}
\end{equation*}$$

In [None]:
def sigmoid(x, theta_0, theta_1):
  return 1 / (1 + np.exp(-(theta_0 + theta_1 * x)))

In [None]:
def loss(labels: pd.Series, 
         predictions: pd.Series) -> float:
         sum = (labels * np.log(predictions) + (1 - labels) * np.log(1 - predictions)).sum()
         return (-sum / len(labels))

In [None]:
def train(X: pd.Series,
          labels: pd.Series,
          learning_rate: float):
  # Initialize variables
  n, m, c, tmp, epoch = float(len(X)), 0, 0, 0, 1
  # Start training loop
  while True:
    # Make predictions
    predictions = sigmoid(X, m, c)
    # Calculate loss
    error = loss(labels, predictions)
    # Print training info
    print(f'Error at epoch {epoch}: {loss}')
    # Partial derivative w.r.t. m
    D_m = (1 / n) * (X * (labels - predictions)).sum()
    # Partial derivative w.r.t. c
    D_c = (1 / n) * (labels - predictions).sum()
    # Update variables
    m -= learning_rate * D_m  
    c -= learning_rate * D_c 
    epoch += 1
    # Compute loss improvement, and break if converged
    if tmp != 0:
      ratio = error/tmp
      if ratio > 0.9999:
        return m, c
    tmp = error

# Hyperparameters

In [None]:
lr = 1e-4

# Train & Evaluate

First, we find the best-fit logistic regression using scikit-learn.

In [None]:
model = LogisticRegression()
model.fit(X_train, y_train)
model.predict(X_test)
model.score(X_test, y_test)

In [None]:
print(model.coef_, model.intercept_)

In [None]:
theta_0 = model.intercept_
theta_1 = model.coef_.T[0]
theta_2 = model.coef_.T[1]

exp_pred = (theta_1 * x1 + theta_0) / (-theta_2)

Then, we do the same using our Gradient Descent method.

In [None]:
m, c = train(X_train, y_train, lr)

In [None]:
my_pred = sigmoid(X_test, c, m)

Compare the two via visual inspection.

In [None]:
x = [0, 20]

plt.scatter(df.x1, df.x2, c=df.y, cmap=matplotlib.colors.ListedColormap(['red', 'blue']))
plt.title('x1 vs x2')
plt.xlabel('x1')
plt.ylabel('x2')
plt.plot(x, my_pred, c='k')
plt.plot(x, exp_pred, c='g')
plt.legend(['Computed', 'Expected'])
plt.show()