Just some imports to get out of the way

In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader
#from torchvision import datasets
#from torchvision.transforms import ToTensor, Lambda, Compose
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import datetime

%matplotlib inline

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"

## Working with tensors

In [3]:
n, k = 5, 5

In [4]:
x = torch.tensor([i for i in range(25)])
x

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24])

In [5]:
# We can manipulate the shape of our data, adding and subtract dimensions.
x = x.reshape(5, 5)
x

tensor([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]])

In [6]:
x = x.reshape(5, -1)
x

tensor([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]])

In [7]:
x = x.unsqueeze(2)
x

tensor([[[ 0],
         [ 1],
         [ 2],
         [ 3],
         [ 4]],

        [[ 5],
         [ 6],
         [ 7],
         [ 8],
         [ 9]],

        [[10],
         [11],
         [12],
         [13],
         [14]],

        [[15],
         [16],
         [17],
         [18],
         [19]],

        [[20],
         [21],
         [22],
         [23],
         [24]]])

In [8]:
x = x.squeeze()
x

tensor([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]])

In [9]:
# Some useful short hands
I = torch.eye(5)
zeros = torch.zeros((n, k))
ones = torch.ones((n, k))
ones_like = torch.ones_like(torch.tensor([_ for _ in range(k)]))

print(I, '\n')
print(zeros, '\n')
print(ones_like)

tensor([[1., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0.],
        [0., 0., 1., 0., 0.],
        [0., 0., 0., 1., 0.],
        [0., 0., 0., 0., 1.]]) 

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]) 

tensor([1, 1, 1, 1, 1])


In [10]:
x.type(torch.float)

tensor([[ 0.,  1.,  2.,  3.,  4.],
        [ 5.,  6.,  7.,  8.,  9.],
        [10., 11., 12., 13., 14.],
        [15., 16., 17., 18., 19.],
        [20., 21., 22., 23., 24.]])

## How about some math

In [11]:
x

tensor([[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24]])

In [12]:
x ** 2

tensor([[  0,   1,   4,   9,  16],
        [ 25,  36,  49,  64,  81],
        [100, 121, 144, 169, 196],
        [225, 256, 289, 324, 361],
        [400, 441, 484, 529, 576]])

In [13]:
x + 10

tensor([[10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19],
        [20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34]])

In [14]:
1 / (x + 1)

tensor([[1.0000, 0.5000, 0.3333, 0.2500, 0.2000],
        [0.1667, 0.1429, 0.1250, 0.1111, 0.1000],
        [0.0909, 0.0833, 0.0769, 0.0714, 0.0667],
        [0.0625, 0.0588, 0.0556, 0.0526, 0.0500],
        [0.0476, 0.0455, 0.0435, 0.0417, 0.0400]])

In [15]:
x + torch.tensor([1, 2, 3, 4, 5])

tensor([[ 1,  3,  5,  7,  9],
        [ 6,  8, 10, 12, 14],
        [11, 13, 15, 17, 19],
        [16, 18, 20, 22, 24],
        [21, 23, 25, 27, 29]])

In [16]:
y = torch.rand(5, 5) * 25
y

tensor([[ 5.1532, 21.8379, 13.8088,  8.5903,  0.9431],
        [ 3.4526, 16.0344, 10.0737, 22.3892,  6.5703],
        [22.1282,  0.8602,  5.7745, 11.8369, 17.0561],
        [17.1448, 15.1211,  9.0148, 20.0356, 15.3469],
        [ 7.4655,  5.1678,  3.6377, 11.6095, 20.8208]])

In [17]:
x * y

tensor([[  0.0000,  21.8379,  27.6175,  25.7708,   3.7726],
        [ 17.2630,  96.2064,  70.5162, 179.1135,  59.1328],
        [221.2823,   9.4623,  69.2945, 153.8802, 238.7858],
        [257.1726, 241.9381, 153.2516, 360.6410, 291.5908],
        [149.3099, 108.5236,  80.0294, 267.0187, 499.7001]])

In [18]:
x.T

tensor([[ 0,  5, 10, 15, 20],
        [ 1,  6, 11, 16, 21],
        [ 2,  7, 12, 17, 22],
        [ 3,  8, 13, 18, 23],
        [ 4,  9, 14, 19, 24]])

## Lets work with some data

In [20]:
traindf = pd.read_csv('california_housing_train.csv')
testdf = pd.read_csv('california_housing_test.csv')

print(traindf[:5])

   longitude  latitude  housing_median_age  total_rooms  total_bedrooms  \
0    -114.31     34.19                15.0       5612.0          1283.0   
1    -114.47     34.40                19.0       7650.0          1901.0   
2    -114.56     33.69                17.0        720.0           174.0   
3    -114.57     33.64                14.0       1501.0           337.0   
4    -114.57     33.57                20.0       1454.0           326.0   

   population  households  median_income  median_house_value  
0      1015.0       472.0         1.4936             66900.0  
1      1129.0       463.0         1.8200             80100.0  
2       333.0       117.0         1.6509             85700.0  
3       515.0       226.0         3.1917             73400.0  
4       624.0       262.0         1.9250             65500.0  


In [21]:
traindf.iloc[0]

longitude              -114.3100
latitude                 34.1900
housing_median_age       15.0000
total_rooms            5612.0000
total_bedrooms         1283.0000
population             1015.0000
households              472.0000
median_income             1.4936
median_house_value    66900.0000
Name: 0, dtype: float64

In [22]:
sc = MinMaxScaler()
train_numpy  = sc.fit_transform(traindf)
test_numpy = sc.transform(testdf)

In [23]:
train_numpy[:5]

array([[1.        , 0.17534538, 0.2745098 , 0.14788454, 0.19894475,
        0.02836402, 0.07745437, 0.06853009, 0.10701193],
       [0.98406375, 0.19766206, 0.35294118, 0.20160801, 0.29484792,
        0.03155918, 0.07597435, 0.09104012, 0.13422831],
       [0.9750996 , 0.12221041, 0.31372549, 0.01892711, 0.02684668,
        0.00924914, 0.01907581, 0.07937822, 0.14577466],
       [0.97410359, 0.11689692, 0.25490196, 0.03951496, 0.05214153,
        0.01435018, 0.03700049, 0.18563882, 0.12041394],
       [0.97410359, 0.10945802, 0.37254902, 0.038276  , 0.05043451,
        0.0174052 , 0.04292057, 0.0982814 , 0.10412534]])

In [24]:
train_tensor = torch.tensor(train_numpy)
print(train_tensor[:5])

tensor([[1.0000, 0.1753, 0.2745, 0.1479, 0.1989, 0.0284, 0.0775, 0.0685, 0.1070],
        [0.9841, 0.1977, 0.3529, 0.2016, 0.2948, 0.0316, 0.0760, 0.0910, 0.1342],
        [0.9751, 0.1222, 0.3137, 0.0189, 0.0268, 0.0092, 0.0191, 0.0794, 0.1458],
        [0.9741, 0.1169, 0.2549, 0.0395, 0.0521, 0.0144, 0.0370, 0.1856, 0.1204],
        [0.9741, 0.1095, 0.3725, 0.0383, 0.0504, 0.0174, 0.0429, 0.0983, 0.1041]],
       dtype=torch.float64)


In [None]:
class CustomHousingDataset(torch.utils.data.Dataset):
  def __init__(self, file, scaler):
    self.df = pd.read_csv(file)
    self.sc = scaler

  def __len__(self):
    return self.df.shape[0]
  
  def __getitem__(self, idx):
    raw = self.df.iloc[idx].values
    if type(idx) == int:
      raw = raw.reshape(1, -1)
    raw = self.sc.transform(raw)
    data = torch.tensor(raw[:, :-1], dtype=torch.float32)
    label = torch.tensor(raw[:, -1], dtype=torch.float32)
    return data, label

In [None]:
train_data = CustomHousingDataset('sample_data/california_housing_train.csv', sc)
test_data = CustomHousingDataset('sample_data/california_housing_test.csv', sc)

train_loader = DataLoader(train_data, batch_size=64, shuffle=True)
test_loader = DataLoader(test_data, batch_size=64, shuffle=True)

In [None]:
train_data[100:110]

## LETS MODEL
We'll start by defining some basic models, loss functions, and optimizers. These are the fundamental building blocks for any Deep Learning platform.

Some common loss functions:
  - CrossEntropy
  - NLL (negative log likelihood)
  - BCE (binary cross entropy)
  - L1
  - MSE (mean sqaure error aka L2)

And for optimizers we use some form of gradient descent:
- SGD (stochastic gradient decent)
- RMSprop
- Adagrad
- Adam

Once we get here, would could also do things like schedule the learning rate, etc.

In [None]:
class FeedForward(nn.Module):
  def __init__(self):
    super(FeedForward, self).__init__()
    self.linear1 = nn.Linear(8, 32)
    self.relu1 = nn.LeakyReLU()
    self.linear2 = nn.Linear(32, 16)
    self.relu2 = nn.LeakyReLU()
    self.linear_out = nn.Linear(16, 1)

  def forward(self, x):
    x = self.linear1(x)
    x = self.relu1(x)
    x = self.linear2(x)
    x = self.relu2(x)
    x = self.linear_out(x)
    return x

class moreAdvanced(nn.Module):
  def __init__(self, size_in, size_out, device=device):
    super(moreAdvanced, self).__init__()
    self.linearStack = nn.Sequential(
        nn.Linear(size_in, 32),
        nn.LeakyReLU(),
        nn.Linear(32, 16),
        nn.LeakyReLU(),
        nn.Linear(16, size_out)
    )
  
  def forward(self, x):
    x = self.linearStack(x)
    return x

In [None]:
ff = FeedForward()
print(ff)

In [None]:
moreAdv = moreAdvanced(8, 1)
print(moreAdv)

In [None]:
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(ff.parameters(), lr=1e-2)
ff.eval()
a, b = train_data[0]
with torch.no_grad():
  pred = ff(a)
print('prediction:', pred)
print('target:', b)
print('error:', loss_func(pred, b.unsqueeze(0)))

### This is what we want to do, over the entire training data.

In [None]:
def train(dataloader, model, loss_func, optimizer):
  model.train()
  train_loss = []
  
  now = datetime.datetime.now()
  for batch, (X, y) in enumerate(dataloader):
    # ignore the first time we see this
    # second time why is gpu better than cpu for this?
    X, y = X.to(device), y.to(device)

    # make some predictions and get the error
    pred = model(X)
    loss = loss_func(pred, y.unsqueeze(1))

    # where the magic happens
    # backpropogation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if batch % 10 == 0:
      loss, current = loss.item(), batch * len(X)
      iters = 10 * len(X)
      then = datetime.datetime.now()
      iters /= (then - now).total_seconds()
      print(f"loss: {loss:>6f} [{current:>5d}/{17000}] ({iters:.1f} its/sec)")
      now = then
      train_loss.append(loss)
  return train_loss

In [None]:
def test(dataloader, model, loss_func):
  size = len(dataloader)
  num_batches = 170
  model.eval()
  test_loss = 0

  with torch.no_grad():
    for X, y in dataloader:
      X, y = X.to(device), y.to(device)
      pred = model(X)
      test_loss += loss_func(pred, y.unsqueeze(1)).item()
  test_loss /= num_batches
  print(f"Avg Loss: {test_loss:>8f}\n")
  return test_loss

In [None]:
ff = FeedForward().to(device)
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(ff.parameters(), lr=1e-3)
epochs = 10
train_loss = []
test_loss = []
for t in range(epochs):
  print(f"Epoch {t+1}\n------------------------------- \n")
  losses = train(train_loader, ff, loss_func, optimizer)
  train_loss.append(losses)
  test_loss.append(test(test_loader, ff, loss_func))

# Could add a condition that interrupts training when the loss doesn't change much
print('Done!')

In [None]:
plt.plot([i for i in range(len(train_loss))], torch.tensor(train_loss).mean(axis=1))

In [None]:
plt.plot([i for i in range(len(test_loss))], test_loss)

In [None]:
ff.eval()
with torch.no_grad():
  x, y = train_data[4]
  pred = ff(x)
  print(pred)
  print(y)
print(loss_func(pred, y.unsqueeze(1)).item())

## If there's time

In [None]:
class evenMore(nn.Module):
  def __init__(self, s1, s2, s3):
    super(evenMore, self).__init__()
    self.m1 = moreAdvanced(s1, s2)
    self.relu1 = nn.ReLU()
    self.m2 = moreAdvanced(s2, s3)

  def forward(self, x):
    x = self.m1(x)
    x = self.relu1(x)
    x = self.m2(x)
    return x

In [None]:
em = evenMore(8, 8, 1)
em