In [1]:
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification, make_regression

from minimizers.minimize import GeneralLossMinimizer, CustomLossClassifier, CustomLossRegressor


In [2]:
seed = 0

---
### Standard classification with  `GeneralLossMinimizer`

In [3]:
X, y = make_classification(
    n_classes=2, 
    n_samples=1000, 
    n_features=10, 
    flip_y=0., 
    n_clusters_per_class=1, 
    random_state=seed,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)

mod = Pipeline([
    ('sc', StandardScaler()),
    ('clf', GeneralLossMinimizer(random_state=seed)),
])
mod[-1].set_estimator_type('classifier')
mod[-1].set_multi_output(False)

mod.fit(X_train, y_train)

y_hat = mod.predict(X_train).round()
print('Training accuracy:', (y_hat==y_train).mean())

y_hat = mod.predict(X_test).round()
print('Testing accuracy:', (y_hat==y_test).mean())

print(mod)


Training accuracy: 0.9226666666666666
Testing accuracy: 0.92
Pipeline(steps=[('sc', StandardScaler()),
                ('clf', GeneralLossMinimizer(random_state=0))])


---
### Standard regression with  `GeneralLossMinimizer`

In [4]:
X, y = make_regression(
    n_targets=1, 
    n_samples=1000, 
    n_features=10, 
    random_state=seed,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)

mod = Pipeline([
    ('sc', StandardScaler()),
    ('clf', GeneralLossMinimizer(random_state=seed)),
])
mod[-1].set_estimator_type('regressor')
mod[-1].set_multi_output(False)

mod.fit(X_train, y_train)

y_hat = mod.predict(X_train).round()
print('Training error:', ((y_train-y_hat)**2).mean()**0.5)

y_hat = mod.predict(X_test).round()
print('Testing error:', ((y_test-y_hat)**2).mean()**0.5)

print(mod)


Training error: 0.29203808573391965
Testing error: 0.2839428616952984
Pipeline(steps=[('sc', StandardScaler()),
                ('clf', GeneralLossMinimizer(random_state=0))])


---
### Binary classification with `CustomLossClassifier` 

In [5]:
X, y = make_classification(
    n_classes=2, 
    n_samples=1000, 
    n_features=10, 
    flip_y=0., 
    n_clusters_per_class=1, 
    random_state=seed,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)

mod = Pipeline([
    ('sc', StandardScaler()),
    ('clf', CustomLossClassifier(random_state=seed)),
])

mod.fit(X_train, y_train)

y_hat = mod.predict(X_train)
print('Training accuracy:', (y_hat==y_train).mean())

y_hat = mod.predict(X_test)
print('Testing accuracy:', (y_hat==y_test).mean())

print(mod)


Training accuracy: 0.9226666666666666
Testing accuracy: 0.92
Pipeline(steps=[('sc', StandardScaler()),
                ('clf', CustomLossClassifier(random_state=0))])


---
### Multi-categorical classification with `CustomLossClassifier`

In [6]:
X, y = make_classification(
    n_classes=3, 
    n_samples=1000, 
    n_features=10, 
    flip_y=0., 
    n_clusters_per_class=1, 
    random_state=seed,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)

mod = Pipeline([
    ('sc', StandardScaler()),
    ('clf', CustomLossClassifier(random_state=seed)),
])

mod.fit(X_train, y_train)

y_hat = mod.predict(X_train)
print('Training accuracy:', (y_hat==y_train).mean())

y_hat = mod.predict(X_test)
print('Testing accuracy:', (y_hat==y_test).mean())

print(mod)


Training accuracy: 0.86
Testing accuracy: 0.872
Pipeline(steps=[('sc', StandardScaler()),
                ('clf', CustomLossClassifier(random_state=0))])


---
### Single output `CustomLossRegressor`

In [7]:
X, y = make_regression(
    n_targets=1, 
    n_samples=1000, 
    n_features=10, 
    random_state=seed,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)

mod = Pipeline([
    ('sc', StandardScaler()),
    ('clf', CustomLossRegressor(random_state=seed)),
])

mod.fit(X_train, y_train)

y_hat = mod.predict(X_train)
print('Training error:', ((y_train-y_hat)**2).mean()**0.5)

y_hat = mod.predict(X_test)
print('Testing error:', ((y_test-y_hat)**2).mean()**0.5)

print(mod)


Training error: 4.9928711244253936e-05
Testing error: 5.395171147134042e-05
Pipeline(steps=[('sc', StandardScaler()),
                ('clf', CustomLossRegressor(random_state=0))])


---
### Multi-output regression with `CustomLossRegressor`

In [8]:
X, y = make_regression(
    n_targets=2, 
    n_samples=1000, 
    n_features=10, 
    random_state=seed,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)

mod = Pipeline([
    ('sc', StandardScaler()),
    ('clf', CustomLossRegressor(random_state=seed)),
])

mod.fit(X_train, y_train)

y_hat = mod.predict(X_train)
print('Training error:', ((y_train-y_hat)**2).mean(1).mean()**0.5)

y_hat = mod.predict(X_test)
print('Testing error:', ((y_test-y_hat)**2).mean(1).mean()**0.5)

print(mod)


Training error: 3.618715002718675e-05
Testing error: 3.7006844356435374e-05
Pipeline(steps=[('sc', StandardScaler()),
                ('clf', CustomLossRegressor(random_state=0))])


---
### Squared hinge loss with  `GeneralLossMinimizer`

In [9]:
def squared_hinge_loss(y, y_hat):

    zeros = np.zeros_like(y)
    margin = 1 - (y*y_hat)

    return np.c_[zeros, margin].max(1) ** 2

def linear_link(X, b):
    
    return X.dot(b)


X, y = make_classification(
    n_classes=2, 
    n_samples=1000, 
    n_features=10, 
    flip_y=0., 
    n_clusters_per_class=1, 
    random_state=seed,
)
y = np.where(y==1, 1, -1)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)

mod = Pipeline([
    ('sc', StandardScaler(
    )),
    ('clf', GeneralLossMinimizer(
        random_state=seed, 
        loss_fn=squared_hinge_loss, 
        link_fn=linear_link,
    )),
])

mod.fit(X_train, y_train)

y_hat = mod.predict(X_train)
y_hat = np.where(y_hat>0, 1, -1)
print('Training accuracy:', (y_hat==y_train).mean())

y_hat = mod.predict(X_test)
y_hat = np.where(y_hat>0, 1, -1)
print('Testing accuracy:', (y_hat==y_test).mean())

print(mod)


Training accuracy: 0.9226666666666666
Testing accuracy: 0.928
Pipeline(steps=[('sc', StandardScaler()),
                ('clf',
                 GeneralLossMinimizer(link_fn=<function linear_link at 0x106d01fc0>,
                                      loss_fn=<function squared_hinge_loss at 0x106d01c60>,
                                      random_state=0))])


---