In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim

In [2]:
import numpy as np

x = np.random.normal(0, 1, (1000, 10))
y = np.random.normal(0, 1, (1000, 1))

x = torch.tensor(x, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.float32)

## Leaky ReLU

In [3]:
net = nn.Sequential(
        nn.Linear(10, 32),
        nn.LeakyReLU(),
        nn.Linear(32, 1)
)

loss_fs = nn.MSELoss()
optimizer = optim.Adam(net.parameters())

# epochs
optimizer.zero_grad()
    
y_pred = net(x)
loss = loss_fs(y_pred, y)

loss.backward()

optimizer.step()
print(loss)

tensor(1.1536, grad_fn=<MseLossBackward>)


## PReLU

In [4]:
net = nn.Sequential(
        nn.Linear(10, 32),
        nn.PReLU(),
        nn.Linear(32, 1)
)

loss_fs = nn.MSELoss()
optimizer = optim.Adam(net.parameters())

# epochs
optimizer.zero_grad()
    
y_pred = net(x)
loss = loss_fs(y_pred, y)

loss.backward()

optimizer.step()
print(loss)

tensor(1.1908, grad_fn=<MseLossBackward>)


## SWISH

In [5]:
class Swish(nn.Module):
    def __init__(self):
        super(Swish, self).__init__()

    def forward(self, x):
        return x * (torch.sigmoid(x))

net = nn.Sequential(
        nn.Linear(10, 32),
        Swish(),
        nn.Linear(32, 1)
)

loss_fs = nn.MSELoss()
optimizer = optim.Adam(net.parameters())

# epochs
optimizer.zero_grad()
    
y_pred = net(x)
loss = loss_fs(y_pred, y)

loss.backward()

optimizer.step()
print(loss)

tensor(1.1301, grad_fn=<MseLossBackward>)


## SELU

In [6]:
net = nn.Sequential(
        nn.Linear(10, 32),
        nn.SELU(),
        nn.Linear(32, 1)
)

loss_fs = nn.MSELoss()
optimizer = optim.Adam(net.parameters())

# epochs
optimizer.zero_grad()
    
y_pred = net(x)
loss = loss_fs(y_pred, y)

loss.backward()

optimizer.step()
print(loss)

tensor(1.2963, grad_fn=<MseLossBackward>)


## GELU

In [7]:
class GELU(nn.Module):
    def __init__(self):
        super(GELU, self).__init__()

    def forward(self, x):
        return 0.5*x*(1+torch.tanh(np.sqrt(2/np.pi)*(x+0.044715*F.pow(x, 3))))

net = nn.Sequential(
        nn.Linear(10, 32),
        Swish(),
        nn.Linear(32, 1)
)

loss_fs = nn.MSELoss()
optimizer = optim.Adam(net.parameters())

# epochs
optimizer.zero_grad()
    
y_pred = net(x)
loss = loss_fs(y_pred, y)

loss.backward()

optimizer.step()
print(loss)

tensor(1.1068, grad_fn=<MseLossBackward>)


## MISH

In [8]:
class Mish(nn.Module):
    def __init__(self):
        super(Mish, self).__init__()

    def forward(self, x):
        return x * torch.tanh(F.softplus(x))

net = nn.Sequential(
        nn.Linear(10, 32),
        Mish(),
        nn.Linear(32, 1)
)

loss_fs = nn.MSELoss()
optimizer = optim.Adam(net.parameters())

# epochs
optimizer.zero_grad()
    
y_pred = net(x)
loss = loss_fs(y_pred, y)

loss.backward()

optimizer.step()
print(loss)

tensor(1.1780, grad_fn=<MseLossBackward>)
