In [35]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [36]:
data1_1 = pd.DataFrame({
    'x1': np.random.normal(1, .5, 1000), 
    'x2': np.random.normal(1, .5, 1000), 
    'y': 1
})
data1_2 = pd.DataFrame({
    'x1': np.random.normal(-1, .5, 10), 
    'x2': np.random.normal(-1, .5, 10), 
    'y': -1
})
data2_1 = pd.DataFrame({
    'x1': np.random.normal(1, .5, 100), 
    'x2': np.random.normal(1, .5, 100), 
    'y': 1
})
data2_2 = pd.DataFrame({
    'x1': np.random.normal(-1, .5, 100), 
    'x2': np.random.normal(-1, .5, 100), 
    'y': -1
})
data1_df = pd.concat([data1_1, data1_2], axis=0).reset_index(drop=True)
data2_df = pd.concat([data2_1, data2_2], axis=0).reset_index(drop=True)

In [37]:
def forward_pass(inputs, W):
    net = np.dot(inputs, W[:-1]) + W[-1]
    if net >= 0:
        h = 1
    else:
        h = -1
    return net, h

In [38]:
def forward_pass_tanh(inputs, W):
    net = np.dot(inputs, W[:-1]) + W[-1]
    h = np.tanh(net)
    return net, h

In [39]:
# def train_perceptron(X_train, y_train, gradient=False, max_epochs = 100):
#     lamda = .001
#     W = np.zeros(X_train.shape[1] + 1)
#     lr = .001
#     epochs = 0
#     while True:
#         errors = []
#         epochs += 1
#         for index in X_train.index:
#             net, h = forward_pass(X_train.loc[index], W)
#             if gradient:
#                 error = .5*(y_train.loc[index] - np.tanh(lamda*net)) ** 2
#             else:
#                 error = y_train.loc[index] - net
#             errors.append(error)
#             if gradient:
#                 W[:-1] += lr*(1 - h**2)*lamda*(y_train.loc[index] - h)*X_train.loc[index]
#                 W[-1] += lr*(1 - h**2)*lamda*(y_train.loc[index] - h)
#             else:  
#                 W[:-1] += lr*error*X_train.loc[index]
#                 W[-1] += lr*error
#         if not any(errors) or epochs > max_epochs:
#             break
#         if epochs % 10 == 0:
#             print('epoch: {}'.format(epochs))
#             print('sum of errors: {}'.format(sum(errors)))
#     return W

In [40]:
def train_perceptron(X_train, y_train, max_epochs = 100, gradient=False):
    lamda = .01
    W = np.zeros(X_train.shape[1] + 1)
    lr = .001
    epochs = 1
    while True:
        errors = []
        epochs += 1
        for index in X_train.index:
            net, h = forward_pass(X_train.loc[index], W)
            if gradient:
                error = .5*(y_train.loc[index] - np.tanh(lamda*net)) ** 2
            else:
                error = y_train.loc[index] - net
            errors.append(error)
            if gradient:
                W[:-1] += lr*(1 - h**2)*lamda*(y_train.loc[index] - h)*X_train.loc[index]
                W[-1] += lr*(1 - h**2)*lamda*(y_train.loc[index] - h)
            else:  
                W[:-1] += lr*error*X_train.loc[index]
                W[-1] += lr*error
        if not any(errors) or epochs > max_epochs:
            break
        if epochs % 10 == 0:
            print('epoch: {}'.format(epochs))
            print('sum of errors: {}'.format(sum(errors)))
    return W

In [41]:
def plot_dividing_lines(x, y, ax, points):
    sns.scatterplot(data=points, x='x1', y='x2', alpha=0.8, hue='y', palette='flare', ax=ax)
    sns.lineplot(x=x, y=y, color='black', ax=ax)

In [42]:
def find_dividing_line(X_train, y_train, df, max_epochs, gradient):
    W = train_perceptron(X_train, y_train, max_epochs=100, gradient=gradient)
    x1_min, x1_max = min(df['x1']), max(df['x1'])
    x = np.linspace(x1_min, x1_max, num=10000)
    y = (-W[0]*x - W[-1]) / W[1]
    return W, x, y

In [43]:
def find_misclassifieds(W, X_train, y_train):
    misclassified_indices = []
    for index in X_train.index:
        net, h = forward_pass(X_train.loc[index], W)
        if h != y_train.loc[index]:
            misclassified_indices.append(index)
    return misclassified_indices

In [None]:
fig = plt.figure(figsize=(15, 8))
for ax_index, data_df in enumerate([data1_df, data2_df]):
    X_train = data_df[['x1', 'x2']]
    y_train = data_df['y']
    W, x, y = find_dividing_line(X_train, y_train, data_df, max_epochs=100, gradient=False)
    ax = plt.subplot(1, 2, ax_index + 1)
    misclassified_indices = find_misclassifieds(W, X_train, y_train)
    plot_dividing_lines(x, y, ax, data_df)
    if len(misclassified_indices):
        sns.scatterplot(data=X_train.loc[misclassified_indices], x='x1', y='x2', ax=ax, color='red', label='misclassified points')
    ax.set_title('with {} misclassified points - accuracy {}'
                 .format(
                     len(misclassified_indices), (X_train.shape[0] - len(misclassified_indices)) / X_train.shape[0])
                )
    plt.tight_layout()

epoch: 10
sum of errors: 19.163559219383824
epoch: 20
sum of errors: 6.564025591606031
epoch: 30
sum of errors: 2.2499356166226523
epoch: 40
sum of errors: 0.7713065617882284


In [None]:
fig = plt.figure(figsize=(15, 8))
for ax_index, data_df in enumerate([data1_df, data2_df]):
    X_train = data_df[['x1', 'x2']]
    y_train = data_df['y']
    W, x, y = find_dividing_line(X_train, y_train, data_df, gradient=True)
    ax = plt.subplot(1, 2, ax_index + 1)
    misclassified_indices = find_misclassifieds(W, X_train, y_train)
    plot_dividing_lines(x, y, ax, data_df)
    if len(misclassified_indices):
        sns.scatterplot(data=X_train.loc[misclassified_indices], x='x1', y='x2', ax=ax, color='red', label='misclassified points')
    ax.set_title('with {} misclassified points - accuracy {}'
                 .format(
                     len(misclassified_indices), (X_train.shape[0] - len(misclassified_indices)) / X_train.shape[0])
                )
    plt.tight_layout()