# Simple Linear Classifier



In [None]:
from itertools import cycle
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report

In [None]:
# data gen
X = np.random.normal(0, 1, (1_000, 2))

In [None]:
# define our seperator (y = mx + b)
m, b = 2, -1.2

In [None]:
# do target classes
class_bool = X[:, 1] < m*X[:, 0] + b + np.random.normal(0, 0.5, (1000,))
y = -1.*class_bool + 1.*np.logical_not(class_bool)

In [None]:
# visualize data
plt.scatter(X[class_bool, 0], X[class_bool, 1])
plt.scatter(X[np.logical_not(class_bool), 0], X[np.logical_not(class_bool), 1])
plt.plot(np.arange(-3, 3, 0.1), np.arange(-3, 3, 0.1)*m + b, linewidth=2, color="red")

In [None]:
class LinearClassifier(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.m = torch.nn.Parameter(torch.from_numpy(np.array([0.])), requires_grad=True)
        self.b = torch.nn.Parameter(torch.from_numpy(np.array([0.])), requires_grad=True)
    
    def forward_train(self, X):
        probs = torch.softmax(
            torch.stack([X[:, 0] * self.m + self.b, X[:, 1]], dim=1),
            dim=1
        )
        return -1.*probs[:,0] + 1.*probs[:,1]

    def forward_test(self, X):
        if isinstance(X, np.ndarray):
            X = torch.from_numpy(X)
        class_bool = X[:, 1] < X[:, 0] * self.m + self.b
        return -1.*class_bool + 1.*torch.logical_not(class_bool)
    
    def __repr__(self):
        return f"{self.m}, {self.b}"


In [None]:
class NumpyRepeatDataset(torch.utils.data.IterableDataset):
    def __init__(self, X, y):
        super().__init__()
        self.X = X
        self.y = y
    
    def __iter__(self):
        self._iter_X = iter(cycle(self.X))
        self._iter_y = iter(cycle(self.y))
        return self

    def __next__(self):
        return next(self._iter_X), next(self._iter_y)

In [None]:
lc = LinearClassifier()
optim = torch.optim.SGD(lc.parameters(), lr=0.1)
loss_fn = torch.nn.SoftMarginLoss()

In [None]:
train_dataset = NumpyRepeatDataset(X, y)
train_dataloader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size = 20,
)
iter_train_dataloader = iter(train_dataloader)

In [None]:
total_steps = 1_000
loss_steps = [i for i in range(total_steps//10, total_steps+1, total_steps//10)]

In [None]:
total_steps = 1_000
loss_steps = set([i for i in range(total_steps//10, total_steps+1, total_steps//10)])
viz_steps = set([1, 10, 50, 100, 200])

for i in range(total_steps):
    optim.zero_grad()
    t_X, t_y = next(iter_train_dataloader)
    output = lc.forward_train(t_X)
    loss = loss_fn(output, t_y)
    
    if i in loss_steps:
        print(f"loss @ {i}th step: {loss}")
    if i in viz_steps:
        plt.scatter(X[class_bool, 0], X[class_bool, 1])
        plt.scatter(X[np.logical_not(class_bool), 0], X[np.logical_not(class_bool), 1])
        plt.plot(
            np.arange(-3, 3, 0.1), 
            np.arange(-3, 3, 0.1)*lc.m.detach().cpu().numpy() + lc.b.detach().cpu().numpy(), 
            linewidth=2, color="red"
        )
        plt.gca().set_title(f"{i}th step")
        plt.show()
    
    loss.backward()
    optim.step()

In [None]:
print(classification_report(
    y,
    lc.forward_test(X).numpy())
)

In [None]:
# visualize data
plt.scatter(X[class_bool, 0], X[class_bool, 1])
plt.scatter(X[np.logical_not(class_bool), 0], X[np.logical_not(class_bool), 1])
plt.plot(
    np.arange(-3, 3, 0.1), 
    np.arange(-3, 3, 0.1)*lc.m.detach().cpu().numpy() + lc.b.detach().cpu().numpy(), 
    linewidth=2, color="red"
)