In [1]:
import torch
import torch.nn as nn
import numpy as np

from utils import select_available, train

device = select_available()

Using MPS


In [27]:
class LinearBlock(nn.Module):
    def __init__(self, channels_in, channels_out):
        super().__init__()
        layers = [
            nn.Linear(channels_in, channels_out, bias=True),
            nn.ReLU()
        ]
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)

In [28]:
class SinActivation(nn.Module):
    def forward(self, x):
        return torch.sin(x)
    
class CosActivation(nn.Module):
    def forward(self, x):
        return torch.cos(x)

class FourierBlock(nn.Module):
    def __init__(self, channels_in, channels_out):
        super().__init__()
        layers_sin = [
            nn.Linear(channels_in, channels_out, bias=False),
            SinActivation(),
        ]
        layers_cos = [
            nn.Linear(channels_in, channels_out, bias=False),
            CosActivation(),
        ]
        
        self.layers_sin = nn.Sequential(*layers_sin)
        self.layers_cos = nn.Sequential(*layers_cos)

    def forward(self, x):
        sin_part = self.layers_sin(x)
        cos_part = self.layers_cos(x)
        return sin_part + cos_part

In [29]:
class MLP(nn.Module):
    def __init__(self, input_features, hidden_layers, output_features):
        super().__init__()
        
        self.fc_in = LinearBlock(input_features, hidden_layers[0])
        # self.fc_in = FourierBlock(input_features, hidden_layers[0])

        layers = []
        if len(hidden_layers) > 1:
            for i in range(len(hidden_layers) - 1):
                layers.append(LinearBlock(hidden_layers[i], hidden_layers[i+1]))
                # layers.append(FourierBlock(hidden_layers[i], hidden_layers[i+1]))
        self.layers = nn.Sequential(*layers)
        
        self.fc_out = nn.Linear(hidden_layers[-1], output_features, bias=True)
    
    def forward(self, x):
        x = self.fc_in(x)
        for layer in self.layers:
            x = layer(x)
        x = self.fc_out(x)
        
        return x

In [16]:
def func(x):
    return x**2
    # return x**2 - torch.exp(-2*x**2)
    # return torch.sin(x) + torch.cos(3*x) + torch.sin(5*x)

def d_func_dx(x):
    return 2*x
    # return 2*x + 4*x*torch.exp(-2*x**2)
    # return torch.cos(x) - 3*torch.sin(3*x) + 5*torch.cos(5*x)

In [30]:
x_train = torch.linspace(-np.pi, np.pi, 200).unsqueeze(1)
y_train = func(x_train)

features_in = 1
features_out = 1
hidden = [100, 100]

model = MLP(features_in, hidden, features_out)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

epochs = 1000
images = train(model, x_train, y_train, epochs, optimizer, loss_fn, plot_interval=10, model_name=f"MLP_{'_'.join(map(str, hidden))}_relu")

Training Progress: 100%|███████████████| 1000/1000 [00:06<00:00, 162.88it/s, Train Loss: 0.00003181]


GIF saved at gif/MLP_100_100_relu.gif


In [32]:
model.eval()
print(f'Model parameter count: {sum(p.numel() for p in model.parameters())}')

x_point = torch.tensor([0.4], requires_grad=True)
output = model(x_point)
output.backward()

print(f"Prediction at x={x_point.item():.4f}: {output.item():.4f}")
print(f"Derivative at x={x_point.item():.4f}: {x_point.grad.item():.4f}")

print(f"Exact value at x={x_point.item():.4f}: {func(x_point).item():.4f}")
print(f"Derivative at x={x_point.item():.4f}: {d_func_dx(x_point).item():.4f}")

Model parameter count: 10401
Prediction at x=0.4000: 0.1479
Derivative at x=0.4000: 0.6327
Exact value at x=0.4000: 0.1600
Derivative at x=0.4000: 0.8000
