In [13]:
from metrics import *
import pandas as pd

from algorithms import LogisticRegression
from sklearn.datasets import make_classification, make_blobs
from sklearn.model_selection import train_test_split

# x, y = make_classification(n_samples=1000, n_features=2, n_informative=2, n_redundant=0, random_state=1)
x, y = make_blobs(n_samples=200, n_features=2, centers=2, random_state=777)
x = torch.FloatTensor(x)
y = torch.FloatTensor(y)
x = x / torch.abs(x).max()

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3)

In [14]:
model = LogisticRegression(x_train.shape[1], kernel='linear')
model = model.fit(x_train, y_train, 1000, l1_lambda=0)

Epoch:     1 | CrossEntropyLoss:  0.81495
Epoch:     3 | CrossEntropyLoss:  0.81401
Epoch:     6 | CrossEntropyLoss:  0.81260
Epoch:    10 | CrossEntropyLoss:  0.81074
Epoch:    18 | CrossEntropyLoss:  0.80705
Epoch:    30 | CrossEntropyLoss:  0.80163
Epoch:    50 | CrossEntropyLoss:  0.79293
Epoch:    83 | CrossEntropyLoss:  0.77948
Epoch:   138 | CrossEntropyLoss:  0.75955
Epoch:   226 | CrossEntropyLoss:  0.73361
Epoch:   372 | CrossEntropyLoss:  0.70378
Epoch:   610 | CrossEntropyLoss:  0.67664
Epoch:  1000 | CrossEntropyLoss:  0.64846


## Testing

In [15]:
metrics = model.metrics_tab(x_test, y_test, metric='f1')
tab = pd.DataFrame({'0': metrics['0.0'], '1': metrics['1.0']}).T.round(3)
print(f'Accuracy: {metrics["accuracy"]: 0.2%} | Threshold: {metrics["threshold"]: 0.2f}')
tab

Accuracy:  95.00% | Threshold:  0.51


Unnamed: 0,precision,recall,f1-score,support
0,0.931,0.964,0.947,28.0
1,0.968,0.938,0.952,32.0


In [16]:
y_prob = model(x_test).detach().cpu().numpy().flatten()
print(f'AUC-ROC: {auc_roc(y_test, y_prob): 0.4f}')
roc_curve_plot(y_test, y_prob, fill=True)

AUC-ROC:  0.9900


## Visualization of initial data

In [18]:
from visualizer import make_distribution_plot
make_distribution_plot(x_train, y_train, model, metrics['threshold'], k=0., cnt_points=1000, insert_na=True, epsilon=1e-4)