# Regression Evaluation

Notebook for testing regression using the California Housing dataset


# Setup


In [None]:
import numpy as np
import pandas as pd
import scipy
import tqdm

# This is a draft---don't overengineer!
# NO renaming!
# NO refactoring!
# TODO: Remove this when the draft is done.

In [None]:
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_squared_error

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

## Config


In [None]:
config = {
    "random_state": 15943,
    "test_size": 0.2,
    "n_splits": 5,
    "scoring": "neg_mean_squared_error",
}

## Code

In [None]:
import torch
from torch import nn

In [None]:
from sklearn.base import BaseEstimator

In [None]:
# Get cpu, gpu or mps device for training.
DEVICE = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)
print(f"Using {DEVICE} device")

In [None]:
class TorchLinearEstimator(BaseEstimator):

    def __init__(self, lr=1e-2, n_epoch=1000):

        self.lr = lr
        self.n_epoch = n_epoch

    def fit(self, X, y):

        X = torch.Tensor(X).to(DEVICE)
        y = torch.Tensor(y).to(DEVICE)

        # Create parameters and turn on gradient tracking
        w = nn.init.uniform_(torch.rand(n_features), a=-y.max(), b=y.max())
        b = nn.init.uniform_(torch.rand(1), a=-y.max(), b=y.max())
        w = w.to(DEVICE).requires_grad_()
        b = b.to(DEVICE).requires_grad_()

        # Training loop
        losses = []
        ws = []
        bs = []
        i_best = None
        for i in tqdm.tqdm(range(self.n_epoch)):

            # Make the prediction
            y_pred = self.linear_model(X, w, b)

            # Get the loss
            loss = self.loss(y_pred, y)

            # Calculate the gradient
            loss.backward()

            # Modify the parameters
            w.data -= w.grad.data * self.lr
            b.data -= b.grad.data * self.lr

            # Zero the gradient
            w.grad = None
            b.grad = None

            # Store
            losses.append(loss.cpu().detach().numpy())
            ws.append(w.cpu().detach().numpy())
            bs.append(b.cpu().detach().numpy())

        self.w_ = w.clone()
        self.b_ = b.clone()
        self.losses_ = np.array(losses)
        self.ws_ = np.array(ws)
        self.bs_ = np.array(bs)

        return self

    def predict(self, X):

        X = torch.Tensor(X).to(DEVICE)

        # Make the prediction
        y_pred = self.linear_model(X, self.w_, self.b_)

        return y_pred.cpu().detach().numpy()

    def linear_model(self, X, weights=None, bias=None):
        """The model itself."""

        return X @ weights + bias

    def loss(self, y_pred, y_actual):

        return ((y_pred.flatten() - y_actual.flatten()) ** 2.0).mean()

In [None]:
from torch import optim
from torch.utils.data import TensorDataset, DataLoader

In [None]:
class TorchEstimator(BaseEstimator):

    def __init__(
        self,
        net,
        lr: float = 1e-2,
        epochs: int = 20,
        batch_size: int = 64,
        device: str = DEVICE,
        optimizer=optim.Adam,
    ):

        self.net = net.to(device)
        self.lr = lr
        self.epochs = epochs
        self.batch_size = batch_size
        self.device = device
        self.optimizer = optimizer

    def fit(
        self,
        X: np.ndarray,
        y: pd.Series,
        X_val: np.ndarray = None,
        y_val: pd.Series = None,
    ) -> "TorchEstimator":

        self.net.train()

        # Prep data
        X = torch.Tensor(X).to(self.device)
        y = torch.Tensor(y).to(self.device)
        dataset = TensorDataset(X, y)
        dataloader = DataLoader(dataset, batch_size=self.batch_size)

        # Prep validation data
        if X_val is not None and y_val is not None:
            X_val = torch.Tensor(X_val).to(self.device)
            y_val = torch.Tensor(y_val).to(self.device)
            dataset_val = TensorDataset(X_val, y_val)
            dataloader_val = DataLoader(dataset_val, batch_size=self.batch_size)
            self.losses_val_ = []

        # Initialize parameters
        params = [
            nn.init.uniform_(param.requires_grad_(), a=-y.max(), b=y.max())
            for param in self.net.parameters()
        ]

        optimizer = self.optimizer(params, lr=self.lr)

        # Training loop
        self.losses_ = []
        self.params_list_ = []
        for i in tqdm.tqdm(range(self.epochs)):
            self.net.train()
            loss = 0.0
            for j, (X_j, y_j) in enumerate(dataloader):

                # Make the prediction
                y_pred_j = self.net(X_j)

                # Get the loss
                loss_j = self.loss_fn(y_pred_j, y_j)

                # Backpropagation
                optimizer.zero_grad()
                loss_j.backward()
                optimizer.step()

                loss += loss_j.cpu().detach().numpy() * len(y_j)
            # Store for later use
            loss /= len(y)
            self.losses_.append(loss)
            self.params_list_.append([param.cpu().detach().numpy() for param in params])

            # Evaluation for validation data
            if X_val is not None and y_val is not None:
                self.net.eval()
                with torch.no_grad():
                    loss_val = 0.0
                    for j, (X_val_j, y_val_j) in enumerate(dataloader_val):

                        # Make the prediction
                        pred_val_j = self.net(X_val_j)

                        # Get the loss
                        loss_val_j = self.loss_fn(pred_val_j, y_val_j)

                        loss_val += loss_val_j.cpu().detach().numpy() * len(y_val_j)

                    # Store for later use
                    loss_val /= len(y_val)
                    self.losses_val_.append(loss_val)

        return self

    def predict(self, X):

        X = torch.Tensor(X).to(self.device)

        # Make the prediction
        self.net.eval()
        y_pred = self.net(X)

        return y_pred.cpu().detach().numpy()

    def loss_fn(self, y_pred, y_actual):
        return ((y_pred.flatten() - y_actual.flatten()) ** 2.0).mean()

In [None]:
class NetVisualizer:
    """Class for visualizing how a net transforms data."""

    def __init__(self, net):
        self.net = net

    def plot(self, X, x_axis="X"):

        children = list(self.net.named_children())
        mosaic = [
            [
                child[0],
            ]
            for child in children
        ]

        fig = plt.figure(figsize=(len(mosaic[0]) * 5, len(mosaic) * 5))
        ax_dict = fig.subplot_mosaic(mosaic=mosaic)

        X_in = torch.Tensor(X).to(DEVICE).reshape(-1, 1)
        for name, child in children:

            if x_axis == "X":
                X_plotted = X
            elif x_axis == "X_in":
                X_plotted = X_in.cpu().detach().numpy()
            else:
                raise KeyError(f"Unrecognized option for x_axis, {x_axis}")

            ax = ax_dict[name]
            X_out = child(X_in)
            ax.plot(
                X_plotted,
                X_out.cpu().detach().numpy(),
            )

            X_in = X_out

            if x_axis == "X":
                ax.set_xlabel("X")
            elif x_axis == "X_in":
                ax.set_xlabel("X_in")
            ax.set_ylabel(f"X_out @ {name}")
            ax.set_title(child)

        return fig

# Data


## California Housing Data


In [None]:
dataset = fetch_california_housing()

In [None]:
X = pd.DataFrame(dataset["data"], columns=dataset["feature_names"])
X

In [None]:
y = pd.DataFrame(dataset["target"], columns=dataset["target_names"])
y

In [None]:
df = pd.concat([X, y], axis=1)

In [None]:
g = sns.PairGrid(df)
g.map_diag(sns.histplot, bins=32)
g.map_offdiag(sns.histplot, bins=32)

# Simple Single-Feature Regression


## Set up


In [None]:
results = {}

### Select data


In [None]:
from sklearn.datasets import make_regression

In [None]:
X_var = "MedInc"
y_var = "MedHouseVal"

In [None]:
X = df[X_var].values.reshape(-1, 1)
y = df[y_var].values

In [None]:
# X, y = make_regression(n_samples=1000, n_features=1, noise=10)

In [None]:
n_features = X.shape[1]

In [None]:
fig = plt.figure()
ax = plt.gca()

ax.hist2d(
    X[:, 0],
    y,
    bins=32,
)

ax.set_xlabel("X")
ax.set_ylabel("y")

### Split data


In [None]:
from sklearn.model_selection import train_test_split, cross_val_score, KFold

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=config["test_size"], random_state=config["random_state"]
)

In [None]:
cv = KFold(
    n_splits=config["n_splits"], shuffle=True, random_state=config["random_state"]
)

## Baseline


### Build


In [None]:
class Baseline(BaseEstimator):

    def fit(self, X, y):
        """Baseline is we just use the fraction of classifications as a binomial probability."""

        self.mean_ = y.mean()

    def predict(self, X):

        return np.full(X.shape[0], self.mean_)

In [None]:
# Make the estimator
model_name = "mean"
model = Baseline()

### Evaluate


In [None]:
result = {}

In [None]:
# Full prediction
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
result["mse"] = mean_squared_error(y_test, y_pred)
result["mse"]

In [None]:
# Crossval score
result["cross_val_score"] = cross_val_score(
    model, X_train, y_train, cv=cv, scoring=config["scoring"]
)
result["cross_val_score"]

In [None]:
results[model_name] = result

## Linear Regression


### Build


In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
# Make the estimator
model_name = "linear_regression"
model = LinearRegression()

### Evaluate


In [None]:
result = {}

In [None]:
# Full prediction
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
result["mse"] = mean_squared_error(y_test, y_pred)
result["mse"]

In [None]:
# Crossval score
result["cross_val_score"] = cross_val_score(
    model, X_train, y_train, cv=cv, scoring=config["scoring"]
)
result["cross_val_score"]

In [None]:
fig = plt.figure()
ax = plt.gca()

h, x_edges, y_edges, mesh = ax.hist2d(
    X_test[:, 0],
    y_test,
    bins=32,
    cmap="Greys",
)

y_pred_plot = model.predict(x_edges.reshape(-1, 1))
ax.plot(x_edges, y_pred_plot, color="r")

ax.set_xlabel(X_var)
ax.set_ylabel(y_var)

In [None]:
result["w"] = model.coef_[0]
result["b"] = model.intercept_

In [None]:
results[model_name] = result

## Model: Single Linear Layer

Same thing as traditional linear regression, but trained with gradient descent.


In [None]:
# Make the estimator
model_name = "linear_model"
model = TorchLinearEstimator(n_epoch=20)

### Evaluate


In [None]:
result = {}

In [None]:
# Full prediction
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
# Calculate the error
result["mse"] = mean_squared_error(y_test, y_pred)
result["mse"]

In [None]:
# Store the parameters
result["w"] = model.w_[0]
result["b"] = model.b_[0]

In [None]:
# Evolution of fit over epochs
fig = plt.figure()
ax = plt.gca()

# Data
h, x_edges, y_edges, mesh = ax.hist2d(
    X_train[:, 0],
    y_train,
    bins=32,
    cmap="Greys",
)

# Change in fit parameters
norm = plt.Normalize(0, len(model.ws_))
cmap = sns.color_palette("flare", as_cmap=True)
for i, w in enumerate(model.ws_):

    if i == 0 or i == len(model.ws_) - 1:
        label = i
    else:
        label = None

    ax.plot(x_edges, w * x_edges + model.bs_[i], color=cmap(norm(i)), label=label)

# Linear regression best fit
l_params = results["linear_regression"]
ax.plot(x_edges, x_edges * l_params["w"] + l_params["b"], color="b")

ax.legend()

ax.set_xlabel("X")
ax.set_ylabel("y")

In [None]:
# Generate the loss map
ws = np.linspace(-y.max(), y.max(), 32)
bs = np.linspace(-y.max(), y.max(), 32)
loss_map = []
w_map = []
b_map = []
for w in tqdm.tqdm(ws):
    loss_row = []
    w_row = []
    b_row = []
    for b in bs:
        y_pred = w * X_train + b
        loss = ((y_pred.flatten() - y_train) ** 2.0).mean()
        w_row.append(w)
        b_row.append(b)
        loss_row.append(loss)
    w_map.append(w_row)
    b_map.append(b_row)
    loss_map.append(loss_row)
w_map = np.array(w_map)
b_map = np.array(b_map)
loss_map = np.array(loss_map)

In [None]:
# Plot trajectory in loss map
fig = plt.figure()
ax = plt.gca()

p = ax.pcolormesh(w_map, b_map, loss_map, cmap="Greys")

ax.scatter(l_params["w"], l_params["b"], color="k", marker='X', s=100)

ax.scatter(
    model.ws_,
    model.bs_,
    c=cmap(norm(np.arange(model.ws_.size))),
)

plt.colorbar(p)

ax.set_xlabel("w")
ax.set_ylabel("b")

In [None]:
# Plot the training progress (loss curve)
fig = plt.figure()
ax = plt.gca()

ax.plot(
    range(len(model.losses_)),
    model.losses_,
)

ax.set_xlabel("epoch")
ax.set_ylabel("mse")

In [None]:
# Crossval score
result["cross_val_score"] = cross_val_score(
    model, X_train, y_train, cv=cv, scoring=config["scoring"]
)
result["cross_val_score"]

In [None]:
results[model_name] = result

## Model: Single Linear Layer (using nn.Sequential)


### Build


In [None]:
class CustomOptimizer:

    def __init__(self, params, lr=1e-4):
        self.params = params
        self.lr = lr

    def zero_grad(self):
        for param in self.params:
            param.grad = None

    def step(self):
        for param in self.params:
            param.data -= param.grad.data * self.lr

In [None]:
# Make the estimator
model_name = "linear_net"
model = nn.Sequential(
    nn.Linear(n_features, 1),
)
model = TorchEstimator(net=model, optimizer=CustomOptimizer, epochs=10)

### Evaluate


In [None]:
result = {}

In [None]:
# Full prediction
model.fit(X_train, y_train, X_val=X_test, y_val=y_test)
y_pred = model.predict(X_test)

In [None]:
# Calcualte the error
result["mse"] = mean_squared_error(y_test, y_pred)
result["mse"]

In [None]:
# Store the parameters
result["w"] = list(model.net.parameters())[0][0][0]
result["b"] = list(model.net.parameters())[1][0]

In [None]:
# Evolution of fit over epochs
fig = plt.figure()
ax = plt.gca()

# Data
h, x_edges, y_edges, mesh = ax.hist2d(
    X_train[:, 0],
    y_train,
    bins=32,
    cmap="Greys",
)

# Change in fit parameters
norm = plt.Normalize(0, model.epochs)
cmap = sns.color_palette("flare", as_cmap=True)
for i, params in enumerate(model.params_list_):

    if i == 0 or i == model.epochs - 1:
        label = i
    else:
        label = None

    w = params[0][0][0]
    b = params[1][0]
    ax.plot(x_edges, w * x_edges + b, color=cmap(norm(i)), label=label)

# Linear regression best fit
l_params = results["linear_regression"]
ax.plot(x_edges, x_edges * l_params["w"] + l_params["b"], color="b")

ax.legend()

ax.set_xlabel("X")
ax.set_ylabel("y")

In [None]:
# Plot the training progress (loss curve)
fig = plt.figure()
ax = plt.gca()

ax.plot(
    range(len(model.losses_)),
    model.losses_,
    label = 'training',
)

ax.plot(
    range(len(model.losses_val_)),
    model.losses_val_,
    label = 'validation',
)

ax.legend()

ax.set_xlabel("epoch")
ax.set_ylabel("mse")

In [None]:
# Crossval score
result["cross_val_score"] = cross_val_score(
    model, X_train, y_train, cv=cv, scoring=config["scoring"]
)
result["cross_val_score"]

In [None]:
results[model_name] = result

## Model: Single Linear Layer (using nn.Sequential and a torch Optimizer)


### Build


In [None]:
# Make the estimator
model_name = "linear_net_adam"
model = nn.Sequential(
    nn.Linear(n_features, 1),
)
model = TorchEstimator(net=model, optimizer=optim.Adam, epochs=10)

### Evaluate


In [None]:
result = {}

In [None]:
# Full prediction
model.fit(X_train, y_train, X_val=X_test, y_val=y_test)
y_pred = model.predict(X_test)

In [None]:
# Calcualte the error
result["mse"] = mean_squared_error(y_test, y_pred)
result["mse"]

In [None]:
# Store the parameters
result["w"] = list(model.net.parameters())[0][0][0]
result["b"] = list(model.net.parameters())[1][0]

In [None]:
# Evolution of fit over epochs
fig = plt.figure()
ax = plt.gca()

# Data
h, x_edges, y_edges, mesh = ax.hist2d(
    X_train[:, 0],
    y_train,
    bins=32,
    cmap="Greys",
)

# Change in fit parameters
norm = plt.Normalize(0, model.epochs)
cmap = sns.color_palette("flare", as_cmap=True)
for i, params in enumerate(model.params_list_):

    if i == 0 or i == model.epochs - 1:
        label = i
    else:
        label = None

    w = params[0][0][0]
    b = params[1][0]
    ax.plot(x_edges, w * x_edges + b, color=cmap(norm(i)), label=label)

# Linear regression best fit
l_params = results["linear_regression"]
ax.plot(x_edges, x_edges * l_params["w"] + l_params["b"], color="b")

ax.legend()

ax.set_xlabel("X")
ax.set_ylabel("y")

In [None]:
# Plot the training progress (loss curve)
fig = plt.figure()
ax = plt.gca()

ax.plot(
    range(len(model.losses_)),
    model.losses_,
    label = 'training',
)

ax.plot(
    range(len(model.losses_val_)),
    model.losses_val_,
    label = 'validation',
)

ax.legend()

ax.set_xlabel("epoch")
ax.set_ylabel("mse")

In [None]:
# Crossval score
result["cross_val_score"] = cross_val_score(
    model, X_train, y_train, cv=cv, scoring=config["scoring"]
)
result["cross_val_score"]

In [None]:
results[model_name] = result

## Model: Double Linear Layer
Should be same as a single linear layer


### Build


In [None]:
# Make the estimator
model_name = "double_linear_net"
model = nn.Sequential(
    nn.Linear(n_features, 1),
    nn.Linear(1, 1),
)
model = TorchEstimator(net=model, optimizer=optim.Adam, epochs=10)

### Evaluate


In [None]:
result = {}

In [None]:
# Full prediction
model.fit(X_train, y_train, X_val=X_test, y_val=y_test)
y_pred = model.predict(X_test)

In [None]:
# Calcualte the error
result["mse"] = mean_squared_error(y_test, y_pred)
result["mse"]

In [None]:
# Store the parameters
result["w"] = list(model.net.parameters())[0][0][0]
result["b"] = list(model.net.parameters())[1][0]

In [None]:
nv = NetVisualizer(model.net)
fig = nv.plot(x_edges)

In [None]:
# Evolution of fit over epochs
fig = plt.figure()
ax = plt.gca()

# Data
h, x_edges, y_edges, mesh = ax.hist2d(
    X_train[:, 0],
    y_train,
    bins=32,
    cmap="Greys",
)

# Change in fit parameters
norm = plt.Normalize(0, model.epochs)
cmap = sns.color_palette("flare", as_cmap=True)
for i, params in enumerate(model.params_list_):

    if i == 0 or i == model.epochs - 1:
        label = i
    else:
        label = None

    w = params[0][0][0]
    b = params[1][0]
    ax.plot(x_edges, w * x_edges + b, color=cmap(norm(i)), label=label, zorder=i+1)

# Linear regression best fit
l_params = results["linear_regression"]
ax.plot(x_edges, x_edges * l_params["w"] + l_params["b"], color="b")

ax.legend()

ax.set_xlabel("X")
ax.set_ylabel("y")

In [None]:
# Plot the training progress (loss curve)
fig = plt.figure()
ax = plt.gca()

ax.plot(
    range(len(model.losses_)),
    model.losses_,
    label = 'training',
)

ax.plot(
    range(len(model.losses_val_)),
    model.losses_val_,
    label = 'validation',
)

ax.legend()

ax.set_xlabel("epoch")
ax.set_ylabel("mse")

In [None]:
# Crossval score
result["cross_val_score"] = cross_val_score(
    model, X_train, y_train, cv=cv, scoring=config["scoring"]
)
result["cross_val_score"]

In [None]:
results[model_name] = result

## Model: Simplest Nonlinear Net


In [None]:
# Make the estimator
model_name = "simplest_nonlinear_net"
model = nn.Sequential(
    nn.Linear(n_features, 1),
    nn.ReLU(),
    nn.Linear(1, 1),
)
model = TorchEstimator(net=model, optimizer=optim.Adam, epochs=10)

### Evaluate


In [None]:
result = {}

In [None]:
# Full prediction
model.fit(X_train, y_train, X_val=X_test, y_val=y_test)
y_pred = model.predict(X_test)

In [None]:
# Calcualte the error
result["mse"] = mean_squared_error(y_test, y_pred)
result["mse"]

In [None]:
# Store the parameters
result["w"] = list(model.net.parameters())[0][0][0]
result["b"] = list(model.net.parameters())[1][0]

In [None]:
# Visualize what happens to the input
nv = NetVisualizer(model.net)
fig = nv.plot(x_edges)

In [None]:
# Evolution of fit over epochs
fig = plt.figure()
ax = plt.gca()

# Data
h, x_edges, y_edges, mesh = ax.hist2d(
    X_train[:, 0],
    y_train,
    bins=32,
    cmap="Greys",
)

# Change in fit parameters
norm = plt.Normalize(0, model.epochs)
cmap = sns.color_palette("flare", as_cmap=True)
for i, params in enumerate(model.params_list_):

    if i == 0 or i == model.epochs - 1:
        label = i
    else:
        label = None

    w = params[0][0][0]
    b = params[1][0]
    ax.plot(x_edges, w * x_edges + b, color=cmap(norm(i)), label=label, zorder=i+1)

# Linear regression best fit
l_params = results["linear_regression"]
ax.plot(x_edges, x_edges * l_params["w"] + l_params["b"], color="b")

ax.legend()

ax.set_xlabel("X")
ax.set_ylabel("y")

In [None]:
# Plot the training progress (loss curve)
fig = plt.figure()
ax = plt.gca()

ax.plot(
    range(len(model.losses_)),
    model.losses_,
    label = 'training',
)

ax.plot(
    range(len(model.losses_val_)),
    model.losses_val_,
    label = 'validation',
)

ax.legend()

ax.set_xlabel("epoch")
ax.set_ylabel("mse")

In [None]:
# Crossval score
result["cross_val_score"] = cross_val_score(
    model, X_train, y_train, cv=cv, scoring=config["scoring"]
)
result["cross_val_score"]

In [None]:
results[model_name] = result

## Model: Slightly-more-complex Nonlinear Net


In [None]:
# Make the estimator
model_name = "nonlinear_net"
model = nn.Sequential(
    nn.Linear(n_features, 2),
    nn.ReLU(),
    nn.Linear(2, 1),
)
model = TorchEstimator(net=model, optimizer=optim.Adam, epochs=10)

### Evaluate


In [None]:
result = {}

In [None]:
# Full prediction
model.fit(X_train, y_train, X_val=X_test, y_val=y_test)
y_pred = model.predict(X_test)

In [None]:
# Calcualte the error
result["mse"] = mean_squared_error(y_test, y_pred)
result["mse"]

In [None]:
# Store the parameters
result["w"] = list(model.net.parameters())[0][0][0]
result["b"] = list(model.net.parameters())[1][0]

In [None]:
# Visualize what happens to the input
nv = NetVisualizer(model.net)
fig = nv.plot(x_edges)

In [None]:
# Evolution of fit over epochs
fig = plt.figure()
ax = plt.gca()

# Data
h, x_edges, y_edges, mesh = ax.hist2d(
    X_train[:, 0],
    y_train,
    bins=32,
    cmap="Greys",
)

# Change in fit parameters
norm = plt.Normalize(0, model.epochs)
cmap = sns.color_palette("flare", as_cmap=True)
for i, params in enumerate(model.params_list_):

    if i == 0 or i == model.epochs - 1:
        label = i
    else:
        label = None

    w = params[0][0][0]
    b = params[1][0]
    ax.plot(x_edges, w * x_edges + b, color=cmap(norm(i)), label=label, zorder=i+1)

# Linear regression best fit
l_params = results["linear_regression"]
ax.plot(x_edges, x_edges * l_params["w"] + l_params["b"], color="b")

ax.legend()

ax.set_xlabel("X")
ax.set_ylabel("y")

In [None]:
# Plot the training progress (loss curve)
fig = plt.figure()
ax = plt.gca()

ax.plot(
    range(len(model.losses_)),
    model.losses_,
    label = 'training',
)

ax.plot(
    range(len(model.losses_val_)),
    model.losses_val_,
    label = 'validation',
)

ax.legend()

ax.set_xlabel("epoch")
ax.set_ylabel("mse")

In [None]:
# Crossval score
result["cross_val_score"] = cross_val_score(
    model, X_train, y_train, cv=cv, scoring=config["scoring"]
)
result["cross_val_score"]

In [None]:
results[model_name] = result

## Compare Models


In [None]:
# Format data
dfs = []
for key, value in results.items():

    df = pd.DataFrame(value)
    df["model_name"] = key
    dfs.append(df)
results_df = pd.concat(dfs)

In [None]:
fig = plt.figure(figsize=(len(results) * 2, 2))
ax = plt.gca()

sns.swarmplot(
    data=results_df,
    x="model_name",
    y="cross_val_score",
)

ax.set_ylabel(config["scoring"])

# Single-Feature Nonlinear Regression


In [None]:
results = {}

In [None]:
from sklearn.datasets import make_regression

In [None]:
X, y = make_regression(n_samples=1000, n_features=1, noise=5.,)
y = y**2.
y -= y.mean()
y /= y.std()

In [None]:
# X, y = make_regression(n_samples=1000, n_features=1, noise=10)

In [None]:
n_features = X.shape[1]

In [None]:
fig = plt.figure()
ax = plt.gca()

ax.hist2d(
    X[:, 0],
    y,
    bins=32,
)

ax.set_xlabel("X")
ax.set_ylabel("y")

### Split data


In [None]:
from sklearn.model_selection import train_test_split, cross_val_score, KFold

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=config["test_size"], random_state=config["random_state"]
)

In [None]:
cv = KFold(
    n_splits=config["n_splits"], shuffle=True, random_state=config["random_state"]
)

## Baseline


### Build


In [None]:
from sklearn.base import BaseEstimator

In [None]:
class Baseline(BaseEstimator):

    def fit(self, X, y):
        """Baseline is we just use the fraction of classifications as a binomial probability."""

        self.mean_ = y.mean()

    def predict(self, X):

        return np.full(X.shape[0], self.mean_)

In [None]:
# Make the estimator
model_name = "mean"
model = Baseline()

### Evaluate


In [None]:
result = {}

In [None]:
# Full prediction
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
result["mse"] = mean_squared_error(y_test, y_pred)
result["mse"]

In [None]:
# Crossval score
result["cross_val_score"] = cross_val_score(
    model, X_train, y_train, cv=cv, scoring=config["scoring"]
)
result["cross_val_score"]

In [None]:
results[model_name] = result

## Linear Regression


### Build


In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
# Make the estimator
model_name = "linear_regression"
model = LinearRegression()

### Evaluate


In [None]:
result = {}

In [None]:
# Full prediction
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:
result["mse"] = mean_squared_error(y_test, y_pred)
result["mse"]

In [None]:
# Crossval score
result["cross_val_score"] = cross_val_score(
    model, X_train, y_train, cv=cv, scoring=config["scoring"]
)
result["cross_val_score"]

In [None]:
fig = plt.figure()
ax = plt.gca()

h, x_edges, y_edges, mesh = ax.hist2d(
    X_test[:, 0],
    y_test,
    bins=32,
    cmap="Greys",
)

y_pred_plot = model.predict(x_edges.reshape(-1, 1))
ax.plot(x_edges, y_pred_plot, color="r")

ax.set_xlabel("X")
ax.set_ylabel("y")

In [None]:
result["w"] = model.coef_[0]
result["b"] = model.intercept_

In [None]:
results[model_name] = result

## Model: Neural Net


In [None]:
# Make the estimator
model_name = "neural_net"
model = nn.Sequential(
    nn.Linear(n_features, 2),
    nn.ReLU(),
    nn.Linear(2, 1),
)
model = TorchEstimator(net=model, optimizer=optim.Adam, epochs=100)

### Evaluate


In [None]:
result = {}

In [None]:
# Full prediction
model.fit(X_train, y_train, X_val=X_test, y_val=y_test)
y_pred = model.predict(X_test)

In [None]:
# Calcualte the error
result["mse"] = mean_squared_error(y_test, y_pred)
result["mse"]

In [None]:
# Store the parameters
result["w"] = list(model.net.parameters())[0][0][0]
result["b"] = list(model.net.parameters())[1][0]

In [None]:
# Visualize what happens to the input
nv = NetVisualizer(model.net)
fig = nv.plot(x_edges)

In [None]:
# Evolution of fit over epochs
fig = plt.figure()
ax = plt.gca()

# Data
h, x_edges, y_edges, mesh = ax.hist2d(
    X_train[:, 0],
    y_train,
    bins=32,
    cmap="Greys",
)

y_pred = model.predict(x_edges.reshape(-1, 1))
ax.plot(
    x_edges,
    y_pred,
)

ax.legend()

ax.set_xlabel("X")
ax.set_ylabel("y")

In [None]:
# Plot the training progress (loss curve)
fig = plt.figure()
ax = plt.gca()

ax.plot(
    range(len(model.losses_)),
    model.losses_,
    label = 'training',
)

ax.plot(
    range(len(model.losses_val_)),
    model.losses_val_,
    label = 'validation',
)

ax.legend()

ax.set_xlabel("epoch")
ax.set_ylabel("mse")

In [None]:
# Crossval score
result["cross_val_score"] = cross_val_score(
    model, X_train, y_train, cv=cv, scoring=config["scoring"]
)
result["cross_val_score"]

In [None]:
results[model_name] = result

## Model: Wide Neural Net


In [None]:
# Make the estimator
model_name = "wide_neural_net"
model = nn.Sequential(
    nn.Linear(n_features, 16),
    nn.ReLU(),
    nn.Linear(16, 1),
)
model = TorchEstimator(net=model, optimizer=optim.Adam, epochs=100)

### Evaluate


In [None]:
result = {}

In [None]:
# Full prediction
model.fit(X_train, y_train, X_val=X_test, y_val=y_test)
y_pred = model.predict(X_test)

In [None]:
# Calcualte the error
result["mse"] = mean_squared_error(y_test, y_pred)
result["mse"]

In [None]:
# Store the parameters
result["w"] = list(model.net.parameters())[0][0][0]
result["b"] = list(model.net.parameters())[1][0]

In [None]:
# Visualize what happens to the input
nv = NetVisualizer(model.net)
fig = nv.plot(x_edges)

In [None]:
# Evolution of fit over epochs
fig = plt.figure()
ax = plt.gca()

# Data
h, x_edges, y_edges, mesh = ax.hist2d(
    X_train[:, 0],
    y_train,
    bins=32,
    cmap="Greys",
)

y_pred = model.predict(x_edges.reshape(-1, 1))
ax.plot(
    x_edges,
    y_pred,
)

ax.set_xlabel("X")
ax.set_ylabel("y")

In [None]:
# Plot the training progress (loss curve)
fig = plt.figure()
ax = plt.gca()

ax.plot(
    range(len(model.losses_)),
    model.losses_,
    label = 'training',
)

ax.plot(
    range(len(model.losses_val_)),
    model.losses_val_,
    label = 'validation',
)

ax.legend()

ax.set_xlabel("epoch")
ax.set_ylabel("mse")

In [None]:
# Crossval score
result["cross_val_score"] = cross_val_score(
    model, X_train, y_train, cv=cv, scoring=config["scoring"]
)
result["cross_val_score"]

In [None]:
results[model_name] = result

## Model: Wide-Deep Neural Net


In [None]:
# Make the estimator
model_name = "deep_neural_net"
model = nn.Sequential(
    nn.Linear(n_features, 4),
    nn.ReLU(),
    nn.Linear(4, 4),
    nn.ReLU(),
    nn.Linear(4, 1),
)
model = TorchEstimator(net=model, optimizer=optim.Adam, epochs=50)

### Evaluate


In [None]:
result = {}

In [None]:
# Full prediction
model.fit(X_train, y_train, X_val=X_test, y_val=y_test)
y_pred = model.predict(X_test)

In [None]:
# Calcualte the error
result["mse"] = mean_squared_error(y_test, y_pred)
result["mse"]

In [None]:
# Store the parameters
result["w"] = list(model.net.parameters())[0][0][0]
result["b"] = list(model.net.parameters())[1][0]

In [None]:
# Visualize what happens to the input
nv = NetVisualizer(model.net)
fig = nv.plot(x_edges)

In [None]:
# Evolution of fit over epochs
fig = plt.figure()
ax = plt.gca()

# Data
h, x_edges, y_edges, mesh = ax.hist2d(
    X_train[:, 0],
    y_train,
    bins=32,
    cmap="Greys",
)

y_pred = model.predict(x_edges.reshape(-1, 1))
ax.plot(
    x_edges,
    y_pred,
)

ax.set_xlabel("X")
ax.set_ylabel("y")

In [None]:
# Plot the training progress (loss curve)
fig = plt.figure()
ax = plt.gca()

ax.plot(
    range(len(model.losses_)),
    model.losses_,
    label = 'training',
)

ax.plot(
    range(len(model.losses_val_)),
    model.losses_val_,
    label = 'validation',
)

ax.legend()

ax.set_xlabel("epoch")
ax.set_ylabel("mse")

In [None]:
# Crossval score
result["cross_val_score"] = cross_val_score(
    model, X_train, y_train, cv=cv, scoring=config["scoring"]
)
result["cross_val_score"]

In [None]:
results[model_name] = result

## Compare Models


In [None]:
# Format data
dfs = []
for key, value in results.items():

    df = pd.DataFrame(value)
    df["model_name"] = key
    dfs.append(df)
results_df = pd.concat(dfs)

In [None]:
fig = plt.figure(figsize=(len(results) * 2, 2))
ax = plt.gca()

sns.swarmplot(
    data=results_df,
    x="model_name",
    y="cross_val_score",
)

ax.set_ylabel(config["scoring"])