Train a NN to predict y = sin(x1x2+x3^x4-x5)>cos(x0)

In [1]:
import numpy as np
import torch
from torch import nn

# data sampler

In [90]:
m = 1000
n = 10000
np.random.seed(12)
x = np.random.uniform(size=(n,m))
y = np.sin((x[:,1]*x[:,2]) + np.power(x[:,3], x[:,4]) - x[:,5]) > np.cos(x[:,0])

In [91]:
y.mean()

0.1312

# split data

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
X_train, X_test,  y_train, y_test = train_test_split(x,y, test_size = 0.2, random_state=32)

In [6]:
len(X_train), len(y_train)

(8000, 8000)

# Model class for classification

In [7]:
from typing import List
from collections import OrderedDict

class DeepNN(nn.Module):
    def __init__(self, hidden_dims: List[int]) -> None:
        super().__init__()
        self.layers = []
        for i in range(len(hidden_dims)-1):
            self.layers.append((f'conv{i}', nn.Linear(hidden_dims[i], hidden_dims[i+1])))
            #self.layers.append((f'BatchNorm{i}', nn.BatchNorm1d(hidden_dims[i+1])))
            #self.layers.append((f'dropout{i}', nn.Dropout(p=0.1)))
            self.layers.append((f'relu{i}', nn.ReLU()))
        self.layers.append(('output', nn.Linear(hidden_dims[-1], 1)))
        print(self.layers)
        
    def build(self) -> None:
        '''
        def init_weights(m):
            if type(m)==nn.Linear:
                nn.init.normal_(m.weight)
                nn.init.constant_(m.bias, 0)
        '''
        self.model = nn.Sequential(OrderedDict(self.layers))
        # self.model.apply(init_weights)
    
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.model(x)

In [8]:
d = DeepNN([1000, 512, 128, 64])

[('conv0', Linear(in_features=1000, out_features=512, bias=True)), ('relu0', ReLU()), ('conv1', Linear(in_features=512, out_features=128, bias=True)), ('relu1', ReLU()), ('conv2', Linear(in_features=128, out_features=64, bias=True)), ('relu2', ReLU()), ('output', Linear(in_features=64, out_features=1, bias=True))]


In [9]:
d.build()

In [10]:
X_train[0:2]

array([[0.96615647, 0.69441522, 0.36463435, ..., 0.88372571, 0.32812163,
        0.65418378],
       [0.99086928, 0.35123739, 0.44418384, ..., 0.95551566, 0.72651902,
        0.50276685]])

In [50]:
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

y_train_tensor = torch.tensor(y_train, dtype=torch.long)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

In [44]:
d.forward(x)

tensor([[0.0839],
        [0.0746],
        [0.0732],
        ...,
        [0.0710],
        [0.0732],
        [0.0820]], grad_fn=<AddmmBackward0>)

In [71]:
m = nn.LogSoftmax(dim=1)
input = torch.randn(1, 2)
output = m(input)

In [73]:
output

tensor([[-0.3643, -1.1865]])

In [75]:
N_EPOCHS = 100
loss_fn = nn.BCEWithLogitsLoss(reduction = 'mean')
optimizer = torch.optim.Adam(d.parameters())
    

In [76]:
torch.argmax(y_pred, axis=1)

tensor([0, 0, 0,  ..., 0, 0, 0])

In [82]:
y_pred.squeeze()

tensor([0.0839, 0.0746, 0.0732,  ..., 0.0710, 0.0732, 0.0820],
       grad_fn=<SqueezeBackward0>)

tensor([ True, False,  True,  ...,  True,  True,  True])

In [108]:
((y_pred>0.5).squeeze() == y_train_tensor).numpy().mean()

0.86925

In [112]:
train_loss, test_loss = [], []
train_acc, test_acc = [], []

d.train()
for t in range(N_EPOCHS):
    y_pred = d(X_train_tensor)
    loss = loss_fn(y_pred.squeeze(), y_train_tensor.float())
    acc = ((y_pred>0.5).squeeze() == y_train_tensor).numpy().mean()
    train_loss.append(float(loss))
    train_acc.append(acc)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    y_pred = d(X_test_tensor)
    loss = loss_fn(y_pred.squeeze(), y_test_tensor.float())
    acc = ((y_pred>0.5).squeeze() == y_test_tensor).numpy().mean()
    test_loss.append(float(loss))
    test_acc.append(acc)
    
    print(f"Train loss: {round(train_loss[-1],5)}, train accuracy: {round(train_acc[-1],5)}")
    print(f"Test loss: {round(test_loss[-1],5)}, test accuracy: {round(test_acc[-1],5)}")

Train loss: 0.10536, train accuracy: 0.94238
Test loss: 0.2336, test accuracy: 0.903
Train loss: 0.1105, train accuracy: 0.96788
Test loss: 0.30444, test accuracy: 0.8915
Train loss: 0.11777, train accuracy: 0.933
Test loss: 0.22796, test accuracy: 0.904
Train loss: 0.09976, train accuracy: 0.97112
Test loss: 0.25071, test accuracy: 0.903
Train loss: 0.0884, train accuracy: 0.95538
Test loss: 0.23273, test accuracy: 0.9085
Train loss: 0.08304, train accuracy: 0.966
Test loss: 0.2247, test accuracy: 0.9075
Train loss: 0.08619, train accuracy: 0.97238
Test loss: 0.27228, test accuracy: 0.9005
Train loss: 0.09543, train accuracy: 0.94875
Test loss: 0.22951, test accuracy: 0.9055
Train loss: 0.10097, train accuracy: 0.97175
Test loss: 0.31084, test accuracy: 0.893
Train loss: 0.11547, train accuracy: 0.93375
Test loss: 0.23529, test accuracy: 0.903
Train loss: 0.10909, train accuracy: 0.9705
Test loss: 0.31103, test accuracy: 0.895
Train loss: 0.11388, train accuracy: 0.93425
Test loss: 0.

In [116]:
torch.tensor(X_test, dtype=torch.float32)

tensor([[0.3720, 0.0813, 0.4079,  ..., 0.3477, 0.4968, 0.2155],
        [0.9243, 0.3137, 0.3303,  ..., 0.7141, 0.2330, 0.1494],
        [0.5804, 0.2009, 0.3148,  ..., 0.6338, 0.4188, 0.3704],
        ...,
        [0.2821, 0.0024, 0.8719,  ..., 0.8228, 0.2355, 0.4027],
        [0.2776, 0.8033, 0.4498,  ..., 0.4598, 0.1108, 0.4704],
        [0.1702, 0.5336, 0.6603,  ..., 0.4552, 0.1610, 0.3729]])

In [114]:
d()

RuntimeError: mat1 and mat2 must have the same dtype

In [117]:
d.model

Sequential(
  (conv0): Linear(in_features=1000, out_features=512, bias=True)
  (relu0): ReLU()
  (conv1): Linear(in_features=512, out_features=128, bias=True)
  (relu1): ReLU()
  (conv2): Linear(in_features=128, out_features=64, bias=True)
  (relu2): ReLU()
  (output): Linear(in_features=64, out_features=1, bias=True)
)