# ___

# [ Machine Learning in Geosciences ]

**Department of Applied Geoinformatics and Carthography, Charles University** 

*Lukas Brodsky lukas.brodsky@natur.cuni.cz*


# DEMO1: Nonlinear regression with high capacity ANN model



This notebook demonstrate how to deal with high capcity ANN model in case of low numbner of noisy samples. 

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.optim as optim

# Seed for reproducibility
np.random.seed(42)
torch.manual_seed(42)

<torch._C.Generator at 0x1df096dddf0>

In [None]:
# Generate nonlinear data
X = np.linspace(-3, 3, 30).reshape(-1, 1)
y = 2 * X**2 + X + np.random.normal(0, 3, X.shape)

# Introduce multiple strong outliers
outlier_indices = [2, 5, 7, 8, 9]
outlier_values = [7, -6, 15, -11, 15]
for i, idx in enumerate(outlier_indices):
    y[idx] += outlier_values[i]

In [None]:
# Plot results
plt.figure(figsize=(10, 6))
plt.scatter(X, y, color='black', label="Data", zorder=3)
plt.legend()
plt.xlabel("X")
plt.ylabel("y")
plt.show()

In [None]:
# Convert to PyTorch tensors
# (X - X.mean()) / X.std()
X_train = torch.tensor(X, dtype=torch.float32)
y_train = torch.tensor(y, dtype=torch.float32).reshape(-1, 1) 

In [None]:
# Define ANN model
class ANN(nn.Module):
    def __init__(self, input_size=1, hidden_layers=[512, 256, 128, 64]):
        super(ANN, self).__init__()

        # Create hidden layers dynamically
        layers = []
        prev_size = input_size
        for size in hidden_layers:
            layers.append(nn.Linear(prev_size, size))
            layers.append(nn.ReLU())  
            prev_size = size
        
        layers.append(nn.Linear(prev_size, 1))  # Output layer
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

In [None]:
# Function to train the model with optional regularization
def train_model(model, X, y, epochs=2000, lr=0.005, batch_size=5):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    dataset = torch.utils.data.TensorDataset(X, y)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(epochs):
        for batch_X, batch_y in dataloader:
            optimizer.zero_grad()
            y_pred = model(batch_X)
            loss = criterion(y_pred, batch_y)
            # Backpropagation 
            loss.backward()
            optimizer.step()

### High capcity model

In [None]:
hidden_layers = [32] * 5
model_hc = ANN(hidden_layers=hidden_layers) 

In [None]:
# train model
train_model(model_hc, X_train, y_train)

In [None]:
# make predictions
X_pred = torch.linspace(-3.5, 3.5, 100).reshape(-1, 1)
with torch.no_grad():
    y_pred_hc = model_hc(X_pred).numpy()

In [None]:
# Plot results
plt.figure(figsize=(10, 6))
plt.scatter(X, y, color='black', label="Data", zorder=3)
plt.plot(X_pred, y_pred_hc, label="ANN model", linestyle='dotted', color='red', linewidth=2)

plt.legend()
plt.title("High capcity ANN model prediction")
plt.xlabel("X")
plt.ylabel("y")
plt.show()

### Low capacity model

In [None]:
# Model definition
hidden_layers = [1] * 2
model_lc = ANN(hidden_layers=hidden_layers) 

In [None]:
# Train model
train_model(model_lc, X_train, y_train)

In [None]:
# Predictions
X_pred = torch.linspace(-3.5, 3.5, 100).reshape(-1, 1)
with torch.no_grad():
    y_pred_lc = model_lc(X_pred).numpy()

In [None]:
# Plot results
plt.figure(figsize=(10, 6))
plt.scatter(X, y, color='black', label="Data", zorder=3)
plt.plot(X_pred, y_pred_lc, label="ANN model", linestyle='dotted', color='red', linewidth=2)

plt.legend()
plt.title("High capcity ANN model prediction")
plt.xlabel("X")
plt.ylabel("y")
plt.show()

### Compare the two models' parameters

In [None]:
# Function to compute mean and standard deviation of model parameters
def compute_param_stats(model):
    all_params = torch.cat([param.view(-1) for param in model.parameters()])
    mean_val = all_params.mean().item()
    std_val = all_params.std().item()
    min_val = all_params.min().item()
    max_val = all_params.max().item()
    return mean_val, std_val, min_val, max_val

In [None]:
# Compute statistics for each model
mean_hc, std_hc, min_hc, max_hc= compute_param_stats(model_hc)
mean_lc, std_lc, min_lc, max_lc = compute_param_stats(model_lc)

In [None]:
# Print results
print(f"Model with high capcaity: Min = {min_hc:.5f}, Max = {max_hc:.5f} Std = {std_hc:.5f}")
print(f"Model with low capacity: Min = {min_lc:.5f}, Max = {max_lc:.5f} Std = {std_lc:.5f}")

In [None]:
# Function to extract weights for visualization
def get_model_weights(model):
    return torch.cat([param.view(-1) for param in model.parameters()]).detach().numpy()

In [None]:
# Extract weights from each model
weights_hc = get_model_weights(model_hc)
weights_lc = get_model_weights(model_lc)

In [None]:
# Prepare box plot
plt.figure(figsize=(8, 6))
sns.boxplot(data=[weights_hc, weights_lc], 
            palette=["red", "blue"])
plt.xticks([0, 1], ["High capcity", "Low capacity"])
plt.ylabel("Weight Values")
plt.title("Box Plot of Model Weights")

### How to "tame" the high capcity model through the weights? 

Use sum of the weights:

$$
 \sum_{j=1}^{n} |w_j|
$$

where:

𝑤  are the model weights,
𝑛 is the number of weights.

In [None]:
# Sum of the weights
sum(param.abs().sum() for param in model_hc.parameters()).item()

Ore use sum of the squared weights:

$$
 \sum_{j=1}^{n} w_j^2
$$

where:

𝑤  are the model weights,
𝑛 is the number of weights.


In [None]:
sum(param.norm(2) for param in model_hc.parameters()).item()

### Aussmption 
We wish to diminish the variation of the model weights. 
Let's use the above indicator as a penalty in the defined `Loss` function. 

In [None]:
learning_rate=0.005
criterion = nn.MSELoss()
optimizer = optim.Adam(model_hc.parameters(), lr=learning_rate)

In [None]:
penalty = sum(param.norm(2) for param in model_hc.parameters()).item()
print(penalty)
# But update it in every training iteration according to the weights

In [None]:
penalty_weight = 0.4

In [None]:
# Function to train the model with optional regularization
def train_model(model, X, y, epochs=2000, lr=0.005, batch_size=5):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    dataset = torch.utils.data.TensorDataset(X, y)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(epochs):
        for batch_X, batch_y in dataloader:
            optimizer.zero_grad()
            y_pred = model(batch_X)
            loss = criterion(y_pred, batch_y)
            penalty = sum(param.norm(2) for param in model.parameters())
            loss += penalty_weight * penalty

            # Backpropagation 
            loss.backward()
            optimizer.step()

In [None]:
# Define different architectures
hidden_layers = [32] * 5
model_hc_penalty = ANN(hidden_layers=hidden_layers)

In [None]:
# Train models 
# loss, penalty = train_model(model_hc_penalty, X_train, y_train)
train_model(model_hc_penalty, X_train, y_train)

In [None]:
# Predictions
X_pred = torch.linspace(-3.5, 3.5, 100).reshape(-1, 1)
with torch.no_grad():
    y_pred_hc_penalty = model_hc_penalty(X_pred).numpy()

In [None]:
# Plot results
plt.figure(figsize=(10, 6))
plt.scatter(X, y, color='black', label="Data", zorder=3)
plt.plot(X_pred, y_pred_hc, label="High capacity ANN model", linestyle='dotted', color='red', linewidth=2)
plt.plot(X_pred, y_pred_hc_penalty, label="ANN model with extra loss penalty", color='blue', linewidth=2)

plt.legend()
plt.title("High capcity ANN model prediction")
plt.xlabel("X")
plt.ylabel("y")
plt.show()


In [None]:
# Extract weights from THE model
weights_hc_penalty = get_model_weights(model_hc_penalty)

In [None]:
# Prepare box plot
plt.figure(figsize=(8, 6))
sns.boxplot(data=[weights_hc, weights_hc_penalty], 
            palette=["red", "blue"])
plt.xticks([0, 1], ["High capcity model", "High capacity model with penalty"])
plt.ylabel("Weight Values")
plt.title("Box Plot of Model Weights")