In [27]:
from torch import nn

from training.metrics import accuracy_at_k


def get_spacecutter_predictor(input_size: int):
    network = nn.Sequential(
        nn.Linear(input_size, 128),
        nn.ReLU(),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 1, bias=False),
    )

    return network

In [68]:
from dataset.splitting_dataset import split_dataframe
from dataset.creating_dataset import min_max_scale_data
import pandas as pd
import os

PATH_TO_DATASET = os.path.join(
    "..", "preprocessed_bestiaries", "bestiaries_reduced.csv"
)
TEST_RESULT_FILE = os.path.join("results", "results_tests.xlsx")
TRAIN_RESULT_FILE = os.path.join("results", "results_trains.xlsx")


bestiaries = pd.read_csv(PATH_TO_DATASET, index_col=0)
bestiaries = min_max_scale_data(bestiaries)

X_train, X_test, y_train, y_test = split_dataframe(bestiaries)
y_train += 1
y_test += 1

In [30]:
num_features = X_train.shape[1]

predictor = nn.Sequential(
    nn.Linear(num_features, num_features),
    nn.ReLU(inplace=True),
    nn.Linear(num_features, num_features),
    nn.ReLU(inplace=True),
    nn.Linear(num_features, 1, bias=False),
)

In [19]:
from sklearn.metrics import mean_absolute_error, make_scorer


def mae_scorer(y_true, y_pred):
    return mean_absolute_error(y_true, y_pred.argmax(axis=1))


scoring = make_scorer(mae_scorer, greater_is_better=False, needs_proba=True)

In [22]:
import numpy as np
import torch
from torch import nn

from spacecutter.models import OrdinalLogisticModel


X = np.array([[0.5, 0.1, -0.1], [1.0, 0.2, 0.6], [-2.0, 0.4, 0.8]], dtype=np.float32)

y = np.array([0, 1, 2]).reshape(-1, 1)

num_features = X.shape[1]
num_classes = len(np.unique(y))

predictor = nn.Sequential(
    nn.Linear(num_features, num_features), nn.ReLU(), nn.Linear(num_features, 1)
)

model = OrdinalLogisticModel(predictor, num_classes)

y_pred = model(torch.as_tensor(X))

print(y_pred)

# tensor([[0.2325, 0.2191, 0.5485],
#         [0.2324, 0.2191, 0.5485],
#         [0.2607, 0.2287, 0.5106]], grad_fn=<CatBackward>)

tensor([[0.2747, 0.2326, 0.4927],
        [0.2747, 0.2326, 0.4927],
        [0.1705, 0.1880, 0.6415]], grad_fn=<CatBackward0>)


In [23]:
from skorch import NeuralNet

from spacecutter.callbacks import AscensionCallback


skorch_model = NeuralNet(
    module=OrdinalLogisticModel,
    module__predictor=predictor,
    module__num_classes=num_classes,
    criterion=CumulativeLinkLoss,
    train_split=None,
    callbacks=[
        ("ascension", AscensionCallback()),
    ],
)

skorch_model.fit(X, y)

RuntimeError: gather(): Expected dtype int64 for index

In [91]:
from copy import deepcopy

import torch
from torch import nn


class LogisticCumulativeLink(nn.Module):
    """
    Converts a single number to the proportional odds of belonging to a class.

    Parameters
    ----------
    num_classes : int
        Number of ordered classes to partition the odds into.
    init_cutpoints : str (default='ordered')
        How to initialize the cutpoints of the model. Valid values are
        - ordered : cutpoints are initialized to halfway between each class.
        - random : cutpoints are initialized with random values.
    """

    def __init__(self, num_classes: int, init_cutpoints: str = "ordered") -> None:
        assert num_classes > 2, "Only use this model if you have 3 or more classes"
        super().__init__()
        self.num_classes = num_classes
        self.init_cutpoints = init_cutpoints
        if init_cutpoints == "ordered":
            num_cutpoints = self.num_classes - 1
            cutpoints = torch.arange(num_cutpoints).float() - num_cutpoints / 2
            self.cutpoints = nn.Parameter(cutpoints)
        elif init_cutpoints == "random":
            cutpoints = torch.rand(self.num_classes - 1).sort()[0]
            self.cutpoints = nn.Parameter(cutpoints)
        else:
            raise ValueError(f"{init_cutpoints} is not a valid init_cutpoints " f"type")

    def forward(self, X: torch.Tensor) -> torch.Tensor:
        """
        Equation (11) from
        "On the consistency of ordinal regression methods", Pedregosa et. al.
        """
        sigmoids = torch.sigmoid(self.cutpoints - X)
        link_mat = sigmoids[:, 1:] - sigmoids[:, :-1]
        link_mat = torch.cat(
            (sigmoids[:, [0]], link_mat, (1 - sigmoids[:, [-1]])), dim=1
        )
        return link_mat


class OrdinalLogisticModel(nn.Module):
    """
    "Wrapper" model for outputting proportional odds of ordinal classes.
    Pass in any model that outputs a single prediction value, and this module
    will then pass that model through the LogisticCumulativeLink module.

    Parameters
    ----------
    predictor : nn.Module
        When called, must return a torch.FloatTensor with shape [batch_size, 1]
    init_cutpoints : str (default='ordered')
        How to initialize the cutpoints of the model. Valid values are
        - ordered : cutpoints are initialized to halfway between each class.
        - random : cutpoints are initialized with random values.
    """

    def __init__(
        self, predictor: nn.Module, num_classes: int, init_cutpoints: str = "ordered"
    ) -> None:
        super().__init__()
        self.num_classes = num_classes
        self.predictor = deepcopy(predictor)
        self.link = LogisticCumulativeLink(
            self.num_classes, init_cutpoints=init_cutpoints
        )

    # def fit(self, ):

    def forward(self, *args, **kwargs) -> torch.Tensor:
        return self.link(self.predictor(*args, **kwargs))

In [92]:
from skorch.callbacks import Callback
from torch.nn import Module

from spacecutter.models import LogisticCumulativeLink


class AscensionCallback(Callback):
    """
    Ensure that each cutpoint is ordered in ascending value.
    e.g.

    .. < cutpoint[i - 1] < cutpoint[i] < cutpoint[i + 1] < ...

    This is done by clipping the cutpoint values at the end of a batch gradient
    update. By no means is this an efficient way to do things, but it works out
    of the box with stochastic gradient descent.

    Parameters
    ----------
    margin : float, (default=0.0)
        The minimum value between any two adjacent cutpoints.
        e.g. enforce that cutpoint[i - 1] + margin < cutpoint[i]
    min_val : float, (default=-1e6)
        Minimum value that the smallest cutpoint may take.
    """

    def __init__(self, margin: float = 0.0, min_val: float = -1.0e6) -> None:
        super().__init__()
        self.margin = margin
        self.min_val = min_val

    def clip(self, module: Module) -> None:
        # NOTE: Only works for LogisticCumulativeLink right now
        # We assume the cutpoints parameters are called `cutpoints`.
        if isinstance(module, LogisticCumulativeLink):
            cutpoints = module.cutpoints.data
            for i in range(cutpoints.shape[0] - 1):
                cutpoints[i].clamp_(self.min_val, cutpoints[i + 1] - self.margin)

    def on_batch_end(self, net: Module, *args, **kwargs) -> None:
        net.module_.apply(self.clip)

In [93]:
import numpy as np
import torch
from torch import nn
from typing import Optional


def _reduction(loss: torch.Tensor, reduction: str) -> torch.Tensor:
    """
    Reduce loss

    Parameters
    ----------
    loss : torch.Tensor, [batch_size, num_classes]
        Batch losses.
    reduction : str
        Method for reducing the loss. Options include 'elementwise_mean',
        'none', and 'sum'.

    Returns
    -------
    loss : torch.Tensor
        Reduced loss.

    """
    if reduction == "elementwise_mean":
        return loss.mean()
    elif reduction == "none":
        return loss
    elif reduction == "sum":
        return loss.sum()
    else:
        raise ValueError(f"{reduction} is not a valid reduction")


def cumulative_link_loss(
    y_pred: torch.Tensor,
    y_true: torch.Tensor,
    reduction: str = "elementwise_mean",
    class_weights: Optional[np.ndarray] = None,
) -> torch.Tensor:
    """
    Calculates the negative log likelihood using the logistic cumulative link
    function.

    See "On the consistency of ordinal regression methods", Pedregosa et. al.
    for more details. While this paper is not the first to introduce this, it
    is the only one that I could find that was easily readable outside of
    paywalls.

    Parameters
    ----------
    y_pred : torch.Tensor, [batch_size, num_classes]
        Predicted target class probabilities. float dtype.
    y_true : torch.Tensor, [batch_size, 1]
        True target classes. long dtype.
    reduction : str
        Method for reducing the loss. Options include 'elementwise_mean',
        'none', and 'sum'.
    class_weights : np.ndarray, [num_classes] optional (default=None)
        An array of weights for each class. If included, then for each sample,
        look up the true class and multiply that sample's loss by the weight in
        this array.

    Returns
    -------
    loss: torch.Tensor

    """
    eps = 1e-15
    likelihoods = torch.clamp(
        torch.gather(y_pred, 1, y_true.unsqueeze(1)), eps, 1 - eps
    )
    neg_log_likelihood = -torch.log(likelihoods)

    if class_weights is not None:
        # Make sure it's on the same device as neg_log_likelihood
        class_weights = torch.as_tensor(
            class_weights,
            dtype=neg_log_likelihood.dtype,
            device=neg_log_likelihood.device,
        )
        neg_log_likelihood *= class_weights[y_true]

    loss = _reduction(neg_log_likelihood, reduction)
    return loss


class CumulativeLinkLoss(nn.Module):
    """
    Module form of cumulative_link_loss() loss function

    Parameters
    ----------
    reduction : str
        Method for reducing the loss. Options include 'elementwise_mean',
        'none', and 'sum'.
    class_weights : np.ndarray, [num_classes] optional (default=None)
        An array of weights for each class. If included, then for each sample,
        look up the true class and multiply that sample's loss by the weight in
        this array.

    """

    def __init__(
        self,
        reduction: str = "elementwise_mean",
        class_weights: Optional[torch.Tensor] = None,
    ) -> None:
        super().__init__()
        self.class_weights = class_weights
        self.reduction = reduction

    def forward(self, y_pred: torch.Tensor, y_true: torch.Tensor) -> torch.Tensor:
        return cumulative_link_loss(
            y_pred, y_true, reduction=self.reduction, class_weights=self.class_weights
        )

In [94]:
import numpy as np
import torch

num_classes = 23
model = OrdinalLogisticModel(predictor, num_classes)

X_tensor = torch.as_tensor(X_train.values.astype(np.float32))

predictor_output = predictor(X_tensor).detach()
model_output = model(X_tensor).detach()

# print(predictor_output)
# print(model_output)

In [95]:
from skorch import NeuralNet


skorch_model = NeuralNet(
    module=OrdinalLogisticModel,
    module__predictor=predictor,
    module__num_classes=num_classes,
    criterion=CumulativeLinkLoss,
    max_epochs=100,
    optimizer=torch.optim.Adam,
    optimizer__weight_decay=0.0,
    lr=0.1,
    device="cpu",
    callbacks=[
        ("ascension", AscensionCallback()),
    ],
    train_split=None,
    verbose=0,
)

In [96]:
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline


def to_float(x):
    return x.astype(np.float32)


pipeline = Pipeline([("caster", FunctionTransformer(to_float)), ("net", skorch_model)])

In [109]:
from sklearn.model_selection import GridSearchCV


class Spacecutter(GridSearchCV):
    def fit(self, X, y=None, **fit_params):
        X_train = X.values.astype(np.float32)
        super().fit(X_train, y, **fit_params)

    def predict(self, X):
        X_test = X.values.astype(np.float32)
        return super().predict(X_test).argmax(axis=1)

    def predict_proba(self, X):
        X_test = X.values.astype(np.float32)
        return super().predict_proba(X_test)

In [110]:
param_grid = {
    "net__max_epochs": [100],
    "net__lr": [0.1],
    "net__optimizer__weight_decay": [0],
}

sc_grid_search = Spacecutter(
    pipeline, param_grid, scoring=scoring, n_jobs=-1, cv=5, verbose=1, refit=True
)


sc_grid_search.fit(X_train, y_train)

Fitting 5 folds for each of 1 candidates, totalling 5 fits




In [114]:
len(sc_grid_search.predict(X_test).argmax(axis=1))

845

In [74]:
m = OrdinalLogisticModel(predictor, 23)

In [117]:
from training.create_model import get_fitted_model

model = get_fitted_model("spacecutter", X_train, y_train, 30)

  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m5.5006[0m        [32m5.1799[0m  0.1114




      2        [36m5.3635[0m        [32m4.9987[0m  0.1033
      3        [36m5.0826[0m        [32m4.5932[0m  0.1337
      4        [36m4.5376[0m        [32m3.9117[0m  0.1424
      5        [36m3.7408[0m        [32m3.0540[0m  0.1421
      6        [36m2.9834[0m        [32m2.4815[0m  0.1313
      7        [36m2.5256[0m        [32m2.1534[0m  0.1316
      8        [36m2.1995[0m        [32m1.9066[0m  0.1420
      9        [36m1.9696[0m        [32m1.7559[0m  0.1322
     10        [36m1.8255[0m        [32m1.6673[0m  0.1212


In [119]:
y_pred = model.predict(X_test)

In [122]:
len(y_pred), len(y_test)

(845, 845)

In [124]:
from training.metrics import accuracy_at_k

accuracy_at_k(y_pred, y_test)

IndexError: invalid index to scalar variable.

In [130]:
def accuracy_at_k(y_true: np.ndarray, y_predicted: np.ndarray, k: int = 0) -> float:
    """Calculates accuracy of prediction, allowing error of at most `k` classes."""
    result = np.sum(np.abs(y_true - y_predicted) <= k) / len(y_true)
    if isinstance(result, float):
        return result
    return result[0]


accuracy_at_k(y_test, y_pred, 1)

0.778698224852071

In [118]:
from training.train_and_evaluate_models import get_model_results

model_train_results, model_test_results = get_model_results(
    sc_grid_search,
    y_train,
    X_train,
    y_test,
    X_test,
    thresholds=[[0.05 * i for i in range(1, 20)], [0.05 * i for i in range(5, 16)]],
    model_name="spacecutter",
)

ValueError: y_true and y_pred have different number of output (1!=23)