In [1]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification, make_regression

from minimizers.minimize import CustomLossMinimizer, CustomLossClassifier, CustomLossRegressor


In [2]:
seed = 0

---
### Standard classification with  `CustomLossMinimizer`

In [3]:
X, y = make_classification(
    n_classes=2, 
    n_samples=1000, 
    n_features=10, 
    flip_y=0., 
    n_clusters_per_class=1, 
    random_state=seed,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)
del X, y

In [4]:
mod = Pipeline([
    ('sc', StandardScaler()),
    ('clf', CustomLossMinimizer(random_state=seed)),
])
mod[-1].set_estimator_type('classifier')

CustomLossMinimizer(random_state=0)

In [5]:
mod.fit(X_train, y_train)

Pipeline(steps=[('sc', StandardScaler()),
                ('clf', CustomLossMinimizer(random_state=0))])

In [6]:
y_hat = mod.predict(X_train).round()
(y_hat==y_train).mean()

0.9226666666666666

In [7]:
y_hat = mod.predict(X_test).round()
(y_hat==y_test).mean()

0.92

---
### Standard regression with  `CustomLossMinimizer`

In [8]:
X, y = make_regression(
    n_targets=1, 
    n_samples=1000, 
    n_features=10, 
    random_state=seed,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)
del X, y

In [9]:
mod = Pipeline([
    ('sc', StandardScaler()),
    ('clf', CustomLossMinimizer(random_state=seed)),
])
mod[-1].set_estimator_type('regressor')

CustomLossMinimizer(random_state=0)

In [10]:
mod.fit(X_train, y_train)

Pipeline(steps=[('sc', StandardScaler()),
                ('clf', CustomLossMinimizer(random_state=0))])

In [11]:
y_hat = mod.predict(X_train).round()
((y_train-y_hat)**2).mean()**0.5

0.29203808573391965

In [12]:
y_hat = mod.predict(X_test).round()
((y_test-y_hat)**2).mean()**0.5

0.2839428616952984

---
### Binary classification with `CustomLossClassifier` 

In [13]:
X, y = make_classification(
    n_classes=2, 
    n_samples=1000, 
    n_features=10, 
    flip_y=0., 
    n_clusters_per_class=1, 
    random_state=seed,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)
del X, y

In [14]:
mod = Pipeline([
    ('sc', StandardScaler()),
    ('clf', CustomLossClassifier(random_state=seed)),
])

In [15]:
mod.fit(X_train, y_train)

Pipeline(steps=[('sc', StandardScaler()),
                ('clf', CustomLossClassifier(random_state=0))])

In [16]:
y_hat = mod.predict(X_train)

In [17]:
y_hat = mod.predict(X_train).argmax(1)
(y_hat==y_train).mean()

0.9226666666666666

In [18]:
y_hat = mod.predict(X_test).argmax(1)
(y_hat==y_test).mean()

0.92

---
### Multi-categorical classification with `CustomLossClassifier`

In [19]:
X, y = make_classification(
    n_classes=3, 
    n_samples=1000, 
    n_features=10, 
    flip_y=0., 
    n_clusters_per_class=1, 
    random_state=seed,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)
del X, y

In [20]:
mod = Pipeline([
    ('sc', StandardScaler()),
    ('clf', CustomLossClassifier(random_state=seed)),
])

In [21]:
mod.fit(X_train, y_train)

Pipeline(steps=[('sc', StandardScaler()),
                ('clf', CustomLossClassifier(random_state=0))])

In [22]:
y_hat = mod.predict(X_train).argmax(1)
(y_hat==y_train).mean()

0.86

In [23]:
y_hat = mod.predict(X_test).argmax(1)
(y_hat==y_test).mean()

0.872

---
### Single output `CustomLossRegressor`

In [24]:
X, y = make_regression(
    n_targets=1, 
    n_samples=1000, 
    n_features=10, 
    random_state=seed,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)
del X, y

In [25]:
mod = Pipeline([
    ('sc', StandardScaler()),
    ('clf', CustomLossRegressor(random_state=seed)),
])

In [26]:
mod.fit(X_train, y_train)

Pipeline(steps=[('sc', StandardScaler()),
                ('clf', CustomLossRegressor(random_state=0))])

In [27]:
y_hat = mod.predict(X_train).squeeze()
((y_train-y_hat)**2).mean()**0.5

4.9928711244253936e-05

In [28]:
y_hat = mod.predict(X_test).squeeze()
((y_test-y_hat)**2).mean()**0.5

5.395171147134042e-05

---
### Multi-output regression with `CustomLossRegressor`

In [29]:
X, y = make_regression(
    n_targets=2, 
    n_samples=1000, 
    n_features=10, 
    random_state=seed,
)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=seed)
del X, y

In [30]:
mod = Pipeline([
    ('sc', StandardScaler()),
    ('clf', CustomLossRegressor(random_state=seed)),
])

In [31]:
mod.fit(X_train, y_train)

Pipeline(steps=[('sc', StandardScaler()),
                ('clf', CustomLossRegressor(random_state=0))])

In [32]:
y_hat = mod.predict(X_train)
((y_train-y_hat)**2).mean(1).mean()**0.5

3.3306480815632496e-05

In [33]:
y_hat = mod.predict(X_test)
((y_test-y_hat)**2).mean(1).mean()**0.5

3.4671141657123606e-05

---