In [1]:
import os
from functools import partial

import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification


In [2]:
os.chdir('..')
os.getcwd().split('/')[-1] == 'GeneralLoss'

True

In [3]:
from minimizers import links
from minimizers import losses
from minimizers.minimize import GeneralLossMinimizer, CustomLossClassifier

In [4]:
seed = 0

In [5]:
X, y = make_classification(
    n_classes=2, 
    n_samples=1000, 
    n_features=10, 
    flip_y=0., 
    n_clusters_per_class=1, 
    random_state=seed,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)
del X, y

---
### Cross Entropy

In [6]:
model = Pipeline([
    ('sc', StandardScaler()),
    ('est', GeneralLossMinimizer(
        loss_fn=losses.bce_loss,
        link_fn=links.sigmoid_link,
        random_state=seed,
    )),
])
model[-1].set_estimator_type('classifier')
model[-1].set_multi_output(False)

model.fit(X_train, y_train)
y_hat = (model.predict(X_train)>0.5).astype(int).flatten()
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = (model.predict(X_test)>0.5).astype(int).flatten()
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.9226666666666666
Testing accuracy: 0.92


In [7]:
model = Pipeline([
    ('sc', StandardScaler()),
    ('est', CustomLossClassifier(
        loss_fn=losses.cce_loss,
        link_fn=links.softmax_link,
        random_state=seed,
    )),
])

model.fit(X_train, y_train)
y_hat = model.predict(X_train)
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = model.predict(X_test)
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.9226666666666666
Testing accuracy: 0.92


---
### Mean Absolute Error

In [8]:
model = Pipeline([
    ('sc', StandardScaler()),
    ('est', GeneralLossMinimizer(
        loss_fn=losses.mae_loss,
        link_fn=links.sigmoid_link,
        random_state=seed,
    )),
])
model[-1].set_estimator_type('classifier')
model[-1].set_multi_output(False)

model.fit(X_train, y_train)
y_hat = (model.predict(X_train)>0.5).astype(int).flatten()
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = (model.predict(X_test)>0.5).astype(int).flatten()
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.9186666666666666
Testing accuracy: 0.888


In [9]:
model = Pipeline([
    ('sc', StandardScaler()),
    ('est', CustomLossClassifier(
        loss_fn=losses.cmae_loss,
        link_fn=links.softmax_link,
        random_state=seed,
        tol=1e-3,
    )),
])

model.fit(X_train, y_train)
y_hat = model.predict(X_train)
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = model.predict(X_test)
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.908
Testing accuracy: 0.892


---
### Negative Box-Cox

In [10]:
lambda_ = 0.5

model = Pipeline([
    ('sc', StandardScaler()),
    ('est', GeneralLossMinimizer(
        loss_fn=partial(losses.neg_box_cox_loss, lam=lambda_),
        link_fn=links.sigmoid_link,
        random_state=seed,
    )),
])
model[-1].set_estimator_type('classifier')
model[-1].set_multi_output(False)

model.fit(X_train, y_train)
y_hat = (model.predict(X_train)>0.5).astype(int).flatten()
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = (model.predict(X_test)>0.5).astype(int).flatten()
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.9226666666666666
Testing accuracy: 0.916


In [11]:
lambda_ = 0.5

model = Pipeline([
    ('sc', StandardScaler()),
    ('est', CustomLossClassifier(
        loss_fn=partial(losses.multi_neg_box_cox_loss, lam=lambda_),
        link_fn=links.softmax_link,
        random_state=seed,
    )),
])

model.fit(X_train, y_train)
y_hat = model.predict(X_train)
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = model.predict(X_test)
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.9226666666666666
Testing accuracy: 0.916


---
### Maximum Likelihood Estimate

In [12]:
model = Pipeline([
    ('sc', StandardScaler()),
    ('est', GeneralLossMinimizer(
        loss_fn=losses.binomial_mle,
        link_fn=links.sigmoid_link,
        random_state=seed,
    )),
])
model[-1].set_estimator_type('classifier')
model[-1].set_multi_output(False)

model.fit(X_train, y_train)
y_hat = (model.predict(X_train)>0.5).astype(int).flatten()
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = (model.predict(X_test)>0.5).astype(int).flatten()
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.9226666666666666
Testing accuracy: 0.92


In [13]:
model = Pipeline([
    ('sc', StandardScaler()),
    ('est', CustomLossClassifier(
        loss_fn=losses.multinomial_mle,
        link_fn=links.softmax_link,
        random_state=seed,
    )),
])

model.fit(X_train, y_train)
y_hat = model.predict(X_train)
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = model.predict(X_test)
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.9226666666666666
Testing accuracy: 0.92


---
### Hinge loss

In [14]:
power = 1.

model = Pipeline([
    ('sc', StandardScaler()),
    ('est', GeneralLossMinimizer(
        loss_fn=partial(losses.hinge_loss, power=power),
        link_fn=links.linear_link,
        random_state=seed,
    )),
])
model[-1].set_estimator_type('classifier')
model[-1].set_multi_output(False)

yp_train = np.where(y_train==0, -1, 1)
yp_test = np.where(y_test==0, -1, 1)
model.fit(X_train, yp_train)
y_hat = np.where(model.predict(X_train)>0., 1, -1).flatten()
print('Training accuracy:', (yp_train==y_hat).mean())

y_hat = np.where(model.predict(X_test)>0., 1, -1).flatten()
print('Testing accuracy:', (yp_test==y_hat).mean())

Training accuracy: 0.9226666666666666
Testing accuracy: 0.924


---
### Perceptron loss

In [15]:
power = 1.

model = Pipeline([
    ('sc', StandardScaler()),
    ('est', GeneralLossMinimizer(
        loss_fn=losses.perceptron_loss,
        link_fn=links.sigmoid_link,
        random_state=seed,
    )),
])
model[-1].set_estimator_type('classifier')
model[-1].set_multi_output(False)

yp_train = np.where(y_train==0, -1, 1)
yp_test = np.where(y_test==0, -1, 1)
model.fit(X_train, yp_train)
y_hat = np.where(model.predict(X_train)>0.5, 1, -1).flatten()
print('Training accuracy:', (yp_train==y_hat).mean())

y_hat = np.where(model.predict(X_test)>0.5, 1, -1).flatten()
print('Testing accuracy:', (yp_test==y_hat).mean())

Training accuracy: 0.7933333333333333
Testing accuracy: 0.76


---