# `sklearn` to `torch`

Inference implementations of Logistic Regression and Isotonic Regression in PyTorch defined from the sklearn objects

Reference Links:
- https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
- https://scikit-learn.org/stable/modules/isotonic.html#isotonic

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from sklearn.isotonic import IsotonicRegression
from sklearn.linear_model import LogisticRegression

In [None]:
DEVICE = "mps"

## Data Generation

In [None]:
rand_feature_map = torch.normal(0, 1, (10, 512, 512))  # start w/ an image
X = rand_feature_map.numpy().reshape(-1)

# deterministically define y and add noise to X
y = X >= 0
X = np.clip(X + np.random.normal(0, 1, X.shape), 0, 1)

# take a subsample
X = X[:10_000]
y = y[:10_000]

## Define and Fit Models

In [None]:
lr = LogisticRegression()
_ = lr.fit(X.reshape(-1, 1), y)

ir = IsotonicRegression(out_of_bounds="clip")
_ = ir.fit(X, y)

## Module Definition

In [None]:
class TorchLogisticRegression(torch.nn.Module):
    def __init__(self, lr: LogisticRegression):
        super().__init__()
        self._coef = torch.nn.Parameter(torch.from_numpy(lr.coef_))
        self._intercept = torch.nn.Parameter(torch.from_numpy(lr.intercept_))
    def forward(self, probs):
        return torch.sigmoid(probs*self._coef + self._intercept)

In [None]:
class TorchIsotonicRegression(torch.nn.Module):
    def __init__(self, ir: IsotonicRegression):
        super().__init__()
        self.x_vals = torch.nn.Parameter(torch.from_numpy(ir.f_.x), requires_grad=False)
        self.y_vals = torch.nn.Parameter(torch.from_numpy(ir.f_.y), requires_grad=False)
        self.slopes = torch.nn.Parameter(
            torch.from_numpy(
                np.concatenate([
                    (ir.f_.y[1:] - ir.f_.y[:-1]) / (ir.f_.x[1:] - ir.f_.x[:-1]),
                    np.array([0.]),
                ])
        ))
    def forward(self, inputs):
        """Calculates appropriate segment for every value
        and the linear interpolation w/ formula below
        
        Note: 
            linear definition: y = y1 + (x - x1)*slope
            => slope[n] is for xs in interval x[n] - x[n+1]
            => y = y[n] + (x - x[n])*slopes[n]

        """
        masks = []
        for x_val in self.x_vals:
            masks.append(torch.where(inputs >= x_val, x_val, 0.))
        _, ind = torch.max(torch.stack(masks, dim=0), dim=0)
        y = self.y_vals[ind] + (inputs - self.x_vals[ind]) * self.slopes[ind]
        y = torch.clamp(y, self.y_vals.min(), self.y_vals.max())
        return y

### LogisticRegression

In [None]:
torchLR = TorchLogisticRegression(lr)
xs = rand_feature_map.numpy()
xs = xs.reshape(-1, 1)

In [None]:
%%timeit
res = lr.predict_proba(xs)[:, 1]
res = res.reshape(rand_feature_map.shape)

45.4 ms ± 394 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
%%timeit
with torch.inference_mode():
    res = torchLR.forward(rand_feature_map)
    _ = res.numpy()

8.92 ms ± 298 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
torchLR.to(DEVICE)
rand_feature_map = rand_feature_map.to(DEVICE)

In [None]:
%%timeit
with torch.inference_mode():
    res = torchLR.forward(rand_feature_map)
    _ = res.cpu().numpy()

4.15 ms ± 8.49 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### Test Results

In [None]:
torchLR.to("cpu")
rand_feature_map = rand_feature_map.to("cpu")
with torch.no_grad():
    torch_res = torchLR.forward(rand_feature_map)
    torch_res = torch_res.numpy()

xs = rand_feature_map.numpy()
xs = xs.reshape(-1, 1)
sklearn_res = lr.predict_proba(xs)[:, 1]
sklearn_res = sklearn_res.reshape(rand_feature_map.shape)

np.allclose(torch_res, sklearn_res)

### IsotonicRegression Test

In [None]:
torchIR = TorchIsotonicRegression(ir)
xs = rand_feature_map.numpy()
xs = xs.reshape(-1)

In [None]:
%%timeit
res = ir.predict(xs)
res = res.reshape(rand_feature_map.shape)

59.7 ms ± 167 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
%%timeit
with torch.inference_mode():
    res = torchIR.forward(rand_feature_map)
    _ = res.numpy()

266 ms ± 29.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
torchIR.to(DEVICE)
rand_feature_map = rand_feature_map.to(DEVICE)

In [None]:
%%timeit
with torch.inference_mode():
    res = torchIR.forward(rand_feature_map)
    _ = res.cpu().numpy()

16.6 ms ± 22.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


#### Test Results

In [None]:
torchIR.to("cpu")
rand_feature_map = rand_feature_map.to("cpu")
with torch.inference_mode():
    torch_res = torchIR.forward(rand_feature_map)
    torch_res = torch_res.numpy()

xs = rand_feature_map.numpy()
xs = xs.reshape(-1)
sklearn_res = ir.predict(xs)
sklearn_res = sklearn_res.reshape(rand_feature_map.shape)

np.allclose(torch_res, sklearn_res)