# Neural networks

Check [the tutorial](https://developers.google.com/machine-learning/crash-course/introduction-to-neural-networks/anatomy) to see how neural networks deal with nonlinear problems.

In previous chapters we had one layer in our linear model that produced the weighted sum of the inputs as the output. We may add `hidden layers` that are weighted sums of the input layer, but the model would still be linear no matter how many hidden layers we introduce because they are just linear combinations of the input.

We may introduce nonlinearity through `activation functions`. That is, the value of each node in a hidden layer is transformed with a nonlinear function before being passed on to the next layer. Common activation functions include `sigmoid`, rectified linear unit (`ReLU`), `Tanh`, etc.

In this notebook we still use the [California Housing Dataset](https://developers.google.com/machine-learning/crash-course/california-housing-data-description). All values are z-score normalized, and the features we use are:

- `latitude` X `longitude` (binned then crossed)
- `median_income`
- `population`

and the variable we want to predict is `median_house_value`. A linear regression model is used to find the baseline loss before creating the neural net models.

In [1]:
import numpy as np
import pandas as pd
import pytorch_lightning as pl
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from torch.utils.data.dataset import random_split

## Load dataset

In [2]:
# Load dataset
train_df = pd.read_csv(
    "https://download.mlcc.google.com/mledu-datasets/california_housing_train.csv"
)
test_df = pd.read_csv(
    "https://download.mlcc.google.com/mledu-datasets/california_housing_test.csv"
)

# Shuffle the examples
rng = np.random.default_rng(42)
train_df = train_df.reindex(rng.permutation(train_df.index))

In [3]:
# Normalize using z-scores
train_df_mean, train_df_std = train_df.mean(), train_df.std()
train_df_norm = (train_df - train_df_mean) / train_df_std
test_df_mean, test_df_std = test_df.mean(), test_df.std()
test_df_norm = (test_df - test_df_mean) / test_df_std

In [4]:
# Construct features
def coord_bins(coord):
    # returns the `bins` parameter in pd.cut
    return np.linspace(np.min(coord), np.max(coord), 10)


def coord_to_loc(longitude, latitude):
    longitude_bins, latitude_bins = coord_bins(longitude), coord_bins(latitude)
    longitude_binned = np.digitize(longitude, bins=longitude_bins)
    latitude_binned = np.digitize(latitude, bins=latitude_bins)

    longitude_binned_onehot, latitude_binned_onehot = (
        pd.get_dummies(longitude_binned),
        pd.get_dummies(latitude_binned),
    )
    return np.einsum(
        "...i,...j", longitude_binned_onehot, latitude_binned_onehot
    ).reshape(longitude.shape[0], -1)

In [5]:
loc_train = coord_to_loc(train_df_norm["longitude"], train_df_norm["latitude"])
loc_test = coord_to_loc(test_df_norm["longitude"], test_df_norm["latitude"])

x_train = np.hstack(
    (train_df_norm[["median_income", "population"]].to_numpy(), loc_train)
)
x_test = np.hstack((test_df_norm[["median_income", "population"]].to_numpy(), loc_test))

In [6]:
class HousingDataset(Dataset):
    def __init__(self, x, y):
        self.x = torch.tensor(x, dtype=torch.float)
        self.y = torch.tensor(y.to_numpy().reshape(-1, 1), dtype=torch.float)

    def __getitem__(self, idx):
        return (self.x[idx], self.y[idx])

    def __len__(self):
        return len(self.y)


dat = HousingDataset(x_train, train_df_norm["median_house_value"])
dat_test = HousingDataset(x_test, test_df_norm["median_house_value"])

## Build baseline linear model

In [7]:
class LinearModel(pl.LightningModule):
    def __init__(self, dat, test_dat, hparams, *args, **kwargs):
        super().__init__()
        self.hparams = hparams
        self.dat = dat
        self.test_dat = test_dat

        self.l1 = nn.Linear(test_dat.x.shape[1], 1)

    def forward(self, x):
        y_hat = self.l1(x)
        return y_hat

    def setup(self, step):
        # step is either "fit" or "test"; not relevant

        # Split the dataset into a training set and a validation set
        validation_set_size = int(self.dat.x.shape[0] * self.hparams.validation_split)
        training_set_size = self.dat.x.shape[0] - validation_set_size
        train_dat, val_dat = random_split(dat, [training_set_size, validation_set_size])
        self.train_dat = train_dat
        self.val_dat = val_dat

    def configure_optimizers(self):
        return [torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)]

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)  # or just self(x)
        loss = nn.MSELoss()(y_hat, y)

        logs = {
            "train_loss": loss,
        }
        return {"loss": loss, "log": logs}

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        return {"val_loss": nn.MSELoss()(y_hat, y)}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        logs = {"val_loss": avg_loss}
        return {"avg_val_loss": avg_loss, "log": logs}

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        return {"test_loss": nn.MSELoss()(y_hat, y)}

    def test_epoch_end(self, outputs):
        avg_loss = torch.stack([x["test_loss"] for x in outputs]).mean()
        logs = {"test_loss": avg_loss}
        return {"avg_test_loss": avg_loss, "log": logs}

    def train_dataloader(self):
        return DataLoader(
            self.train_dat, batch_size=self.hparams.batch_size, num_workers=8
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_dat, batch_size=self.hparams.batch_size, num_workers=4
        )

    def test_dataloader(self):
        return DataLoader(
            self.test_dat, batch_size=self.hparams.batch_size, num_workers=4
        )

In [8]:
# Hyperparameters
hparams = {"learning_rate": 0.001, "batch_size": 100, "validation_split": 0.2}
epochs = 50

# Train model
linear_trainer = pl.Trainer(gpus=[0], max_epochs=epochs)
linear_model = LinearModel(dat, dat_test, hparams)

linear_trainer.fit(linear_model)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
CUDA_VISIBLE_DEVICES: [0]

  | Name | Type   | Params
--------------------------------
0 | l1   | Linear | 103   


HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validation sanity check', layout=Layout…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Training', layout=Layout(flex='2'), max…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…

HBox(children=(FloatProgress(value=1.0, bar_style='info', description='Validating', layout=Layout(flex='2'), m…




1

In [9]:
linear_trainer.test()

FileNotFoundError: [Errno 2] No such file or directory: '/home/yi/work/machine_learning/google-ml-crash-course/lightning_logs/version_0/checkpoints/epoch=12.ckpt'

## NN model

In [None]:
class NeuralNetModel(pl.LightningModule):
    def __init__(self, dat, test_dat, hparams, *args, **kwargs):
        super().__init__()
        self.hparams = hparams
        self.dat = dat
        self.test_dat = test_dat

        self.l1 = nn.Linear(test_dat.x.shape[1], 10)
        self.l2 = nn.Linear(10, 6)
        self.l3 = nn.Linear(6, 1)

    def forward(self, x):
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = self.l3(x)
        return x

    def setup(self, step):
        # step is either "fit" or "test"; not relevant

        # Split the dataset into a training set and a validation set
        validation_set_size = int(self.dat.x.shape[0] * self.hparams.validation_split)
        training_set_size = self.dat.x.shape[0] - validation_set_size
        train_dat, val_dat = random_split(dat, [training_set_size, validation_set_size])
        self.train_dat = train_dat
        self.val_dat = val_dat

    def configure_optimizers(self):
        return [torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)]

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)  # or just self(x)
        loss = nn.MSELoss()(y_hat, y)

        logs = {
            "train_loss": loss,
        }
        return {"loss": loss, "log": logs}

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        return {"val_loss": nn.MSELoss()(y_hat, y)}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        logs = {"val_loss": avg_loss}
        return {"avg_val_loss": avg_loss, "log": logs}

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        return {"test_loss": nn.MSELoss()(y_hat, y)}

    def test_epoch_end(self, outputs):
        avg_loss = torch.stack([x["test_loss"] for x in outputs]).mean()
        logs = {"test_loss": avg_loss}
        return {"avg_test_loss": avg_loss, "log": logs}

    def train_dataloader(self):
        return DataLoader(
            self.train_dat, batch_size=self.hparams.batch_size, num_workers=8
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_dat, batch_size=self.hparams.batch_size, num_workers=4
        )

    def test_dataloader(self):
        return DataLoader(
            self.test_dat, batch_size=self.hparams.batch_size, num_workers=4
        )

In [None]:
# Hyperparameters
hparams = {
    "learning_rate": 0.001,
    "batch_size": 100,
    "validation_split": 0.2,
}
epochs = 50

# Train model
nn_trainer = pl.Trainer(gpus=[0], max_epochs=epochs)
nn_model = NeuralNetModel(dat, dat_test, hparams)

nn_trainer.fit(nn_model)

In [None]:
nn_trainer.test()

## Regularization

Note that the model's loss against the test set is much higher than the loss against the training set. This indicates the model is overfitting to the data, and we need regularization to reduce the overfitting. Try L1, L2 and dropout regularizations.

- For L1 regularization, add `nn.L1Loss` to the list of loss functions: `loss = nn.MSELoss()(y_hat, y) + lambda * nn.L1Loss()(y_hat, y)`.
- For L2 regularization, use the `weight_decay` parameter in the optimizer, e.g. `torch.optim.SGD`.
- See below for an example of adding dropout layers.

In [None]:
class NeuralNetDropoutModel(pl.LightningModule):
    def __init__(self, dat, test_dat, hparams, *args, **kwargs):
        super().__init__()
        self.hparams = hparams
        self.dat = dat
        self.test_dat = test_dat

        self.l1 = nn.Linear(test_dat.x.shape[1], 10)
        self.l_drop = nn.Dropout(p=self.hparams.dropout_prob)
        self.l2 = nn.Linear(10, 6)
        self.l3 = nn.Linear(6, 1)

    def forward(self, x):
        x = F.relu(self.l1(x))
        x = self.l_drop(x)
        x = F.relu(self.l2(x))
        x = self.l3(x)
        return x

    def setup(self, step):
        # step is either "fit" or "test"; not relevant

        # Split the dataset into a training set and a validation set
        validation_set_size = int(self.dat.x.shape[0] * self.hparams.validation_split)
        training_set_size = self.dat.x.shape[0] - validation_set_size
        train_dat, val_dat = random_split(dat, [training_set_size, validation_set_size])
        self.train_dat = train_dat
        self.val_dat = val_dat

    def configure_optimizers(self):
        return [torch.optim.Adam(self.parameters(), lr=self.hparams.learning_rate)]

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self.forward(x)  # or just self(x)
        loss = nn.MSELoss()(y_hat, y)

        logs = {
            "train_loss": loss,
        }
        return {"loss": loss, "log": logs}

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        return {"val_loss": nn.MSELoss()(y_hat, y)}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        logs = {"val_loss": avg_loss}
        return {"avg_val_loss": avg_loss, "log": logs}

    def test_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        return {"test_loss": nn.MSELoss()(y_hat, y)}

    def test_epoch_end(self, outputs):
        avg_loss = torch.stack([x["test_loss"] for x in outputs]).mean()
        logs = {"test_loss": avg_loss}
        return {"avg_test_loss": avg_loss, "log": logs}

    def train_dataloader(self):
        return DataLoader(
            self.train_dat, batch_size=self.hparams.batch_size, num_workers=8
        )

    def val_dataloader(self):
        return DataLoader(
            self.val_dat, batch_size=self.hparams.batch_size, num_workers=4
        )

    def test_dataloader(self):
        return DataLoader(
            self.test_dat, batch_size=self.hparams.batch_size, num_workers=4
        )

In [None]:
# Hyperparameters
hparams = {
    "learning_rate": 0.001,
    "batch_size": 100,
    "validation_split": 0.2,
    "dropout_prob": 0.2,
}
epochs = 50

# Train model
nn_trainer = pl.Trainer(gpus=[0], max_epochs=epochs)
nn_model = NeuralNetDropoutModel(dat, dat_test, hparams)

nn_trainer.fit(nn_model)

In [None]:
nn_trainer.test()