In [1]:
import os
from functools import partial

import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification


In [2]:
os.chdir('..')
os.getcwd().split('/')[-1] == 'GeneralLoss'

True

In [3]:
from minimizers import links
from minimizers import losses
from minimizers.minimize import GeneralLossMinimizer, CustomLossClassifier

In [4]:
seed = 0

In [5]:
X, y = make_classification(
    n_classes=3, 
    n_samples=1000, 
    n_features=10, 
    flip_y=0., 
    n_clusters_per_class=1, 
    random_state=seed,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)
del X, y

---
### Cross Entropy

In [6]:
model = Pipeline([
    ('sc', StandardScaler()),
    ('est', GeneralLossMinimizer(
        loss_fn=losses.cce_loss,
        link_fn=links.softmax_link,
        random_state=seed,
    )),
])
model[-1].set_estimator_type('classifier')
model[-1].set_multi_output(True)

ohe = OneHotEncoder(sparse=False)
ye_train = ohe.fit_transform(y_train.reshape(-1,1))
ye_test = ohe.transform(y_test.reshape(-1,1))

model.fit(X_train, ye_train)
y_hat = model.predict(X_train).argmax(1)
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = model.predict(X_test).argmax(1)
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.86
Testing accuracy: 0.872


In [7]:
model = Pipeline([
    ('sc', StandardScaler()),
    ('est', CustomLossClassifier(
        loss_fn=losses.cce_loss,
        link_fn=links.softmax_link,
        random_state=seed,
    )),
])

model.fit(X_train, y_train)
y_hat = model.predict(X_train)
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = model.predict(X_test)
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.86
Testing accuracy: 0.872


---
### Mean Absolute Error

In [8]:
model = Pipeline([
    ('sc', StandardScaler()),
    ('est', GeneralLossMinimizer(
        loss_fn=losses.cmae_loss,
        link_fn=links.softmax_link,
        random_state=seed,
        tol=5e-3
    )),
])
model[-1].set_estimator_type('classifier')
model[-1].set_multi_output(True)

ohe = OneHotEncoder(sparse=False)
ye_train = ohe.fit_transform(y_train.reshape(-1,1))
ye_test = ohe.transform(y_test.reshape(-1,1))

model.fit(X_train, ye_train)
y_hat = model.predict(X_train).argmax(1)
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = model.predict(X_test).argmax(1)
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.8293333333333334
Testing accuracy: 0.836


In [9]:
model = Pipeline([
    ('sc', StandardScaler()),
    ('est', CustomLossClassifier(
        loss_fn=losses.cmae_loss,
        link_fn=links.softmax_link,
        random_state=seed,
        tol=5e-3,
    )),
])

model.fit(X_train, y_train)
y_hat = model.predict(X_train)
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = model.predict(X_test)
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.8293333333333334
Testing accuracy: 0.836


---
### Negative Box-Cox

In [10]:
lambda_ = 0.5

model = Pipeline([
    ('sc', StandardScaler()),
    ('est', GeneralLossMinimizer(
        loss_fn=partial(losses.multi_neg_box_cox_loss, lam=lambda_),
        link_fn=links.softmax_link,
        random_state=seed,
    )),
])
model[-1].set_estimator_type('classifier')
model[-1].set_multi_output(True)

ohe = OneHotEncoder(sparse=False)
ye_train = ohe.fit_transform(y_train.reshape(-1,1))
ye_test = ohe.transform(y_test.reshape(-1,1))

model.fit(X_train, ye_train)
y_hat = model.predict(X_train).argmax(1)
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = model.predict(X_test).argmax(1)
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.8693333333333333
Testing accuracy: 0.86


In [11]:
lambda_ = 0.5

model = Pipeline([
    ('sc', StandardScaler()),
    ('est', CustomLossClassifier(
        loss_fn=partial(losses.multi_neg_box_cox_loss, lam=lambda_),
        link_fn=links.softmax_link,
        random_state=seed,
    )),
])

model.fit(X_train, y_train)
y_hat = model.predict(X_train)
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = model.predict(X_test)
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.8693333333333333
Testing accuracy: 0.86


---
### Maximum Likelihood Estimate

In [12]:
model = Pipeline([
    ('sc', StandardScaler()),
    ('est', GeneralLossMinimizer(
        loss_fn=losses.multinomial_mle,
        link_fn=links.softmax_link,
        random_state=seed,
    )),
])
model[-1].set_estimator_type('classifier')
model[-1].set_multi_output(True)

ohe = OneHotEncoder(sparse=False)
ye_train = ohe.fit_transform(y_train.reshape(-1,1))
ye_test = ohe.transform(y_test.reshape(-1,1))

model.fit(X_train, ye_train)
y_hat = model.predict(X_train).argmax(1)
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = model.predict(X_test).argmax(1)
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.86
Testing accuracy: 0.872


In [13]:
model = Pipeline([
    ('sc', StandardScaler()),
    ('est', CustomLossClassifier(
        loss_fn=losses.multinomial_mle,
        link_fn=links.softmax_link,
        random_state=seed,
    )),
])

model.fit(X_train, y_train)
y_hat = model.predict(X_train)
print('Training accuracy:', (y_train==y_hat).mean())

y_hat = model.predict(X_test)
print('Testing accuracy:', (y_test==y_hat).mean())

Training accuracy: 0.86
Testing accuracy: 0.872


---