# FiveDReg Python Package

This notebook demonstrates all examples from the official documentation.
Workflow as a full package.

**Ground Truth Function:**

$$y = 2.0 \cdot x_1 + (-1.5) \cdot x_2^2 + 3.0 \cdot \sin(x_3) + 0.5 \cdot x_4 \cdot x_5$$

---

## Quick Start Example

In [1]:
from fivedreg import (
    FiveDRegressor,
    create_synthetic_dataset,
    save_model,
    load_model
)

# Create data
X_train, y_train = create_synthetic_dataset(n_samples=1000, seed=42)
X_test, y_test = create_synthetic_dataset(n_samples=200, seed=43)

# Train model
model = FiveDRegressor(hidden_layers=(64, 32, 16), max_epochs=50)
model.fit(X_train, y_train)

# Save and load
save_model(model, "quickstart_model.pt")
loaded_model = load_model("quickstart_model.pt")

# Predict
predictions = loaded_model.predict(X_test)
print(f"Predictions: {predictions[:5]}")

Predictions: [-1.9702132  2.0419214 -4.287335  -6.829669   3.5995965]


---

## Example 1: Create and Save Multiple Datasets

In [2]:
from fivedreg import create_synthetic_dataset, save_dataset, list_datasets

# Create train/val/test splits
datasets = {
    'train': create_synthetic_dataset(n_samples=3000, seed=42),
    'val': create_synthetic_dataset(n_samples=600, seed=43),
    'test': create_synthetic_dataset(n_samples=500, seed=44)
}

# Save all datasets
for name, (X, y) in datasets.items():
    save_dataset(X, y, f"{name}_data.pkl")
    print(f"Saved {name} dataset: {X.shape[0]} samples")

# List all datasets
print("\nAvailable datasets:")
for dataset in list_datasets():
    print(f"  - {dataset}")

Saved train dataset: 3000 samples
Saved val dataset: 600 samples
Saved test dataset: 500 samples

Available datasets:
  - train_data.pkl
  - val_data.pkl
  - test_data.pkl


## Example 2: Load and Merge Datasets

In [3]:
import numpy as np
from fivedreg import load_dataset, save_dataset

# Load multiple datasets
X1, y1 = load_dataset("train_data.pkl")
X2, y2 = load_dataset("val_data.pkl")

# Merge datasets
X_merged = np.vstack([X1, X2])
y_merged = np.concatenate([y1, y2])

# Save merged dataset
save_dataset(X_merged, y_merged, "merged_data.pkl")
print(f"Merged dataset: {X_merged.shape[0]} samples")

Merged dataset: 3600 samples


## Example 3: Add Custom Noise

In [4]:
import numpy as np
from fivedreg import create_synthetic_dataset, save_dataset

# Create clean dataset
X, y_clean = create_synthetic_dataset(n_samples=1000, seed=42)

# Add different noise levels manually
noise_levels = [0.1, 0.5, 1.0]

for sigma in noise_levels:
    # Add Gaussian noise manually
    rng = np.random.default_rng(0)
    noise = rng.normal(0, sigma, size=y_clean.shape)
    y_noisy = y_clean + noise
    
    # Save noisy dataset
    filename = f"noisy_data_{sigma:.1f}.pkl"
    save_dataset(X, y_noisy, filename)
    print(f"Created dataset with noise level={sigma}")

Created dataset with noise level=0.1
Created dataset with noise level=0.5
Created dataset with noise level=1.0


---

## Example 4: Basic Training with Progress Monitoring

In [5]:
from fivedreg import FiveDRegressor, create_synthetic_dataset

# Create datasets
X_train, y_train = create_synthetic_dataset(n_samples=2000, seed=42)
X_val, y_val = create_synthetic_dataset(n_samples=400, seed=43)

# Train with verbose output
model = FiveDRegressor(
    hidden_layers=(64, 32, 16),
    max_epochs=100,
    batch_size=32,
    learning_rate=1e-3,
    verbose=True  # Shows progress during training
)

model.fit(X_train, y_train, X_val, y_val)

# Access training history
print(f"\nFinal training loss: {model.history['train_loss'][-1]:.6f}")
print(f"Final validation loss: {model.history['val_loss'][-1]:.6f}")

Epoch    1: train_mse=12.984546 val_mse=7.072946 train_r2=0.0085 val_r2=0.3723
Epoch   20: train_mse=0.184240 val_mse=0.201838 train_r2=0.9859 val_r2=0.9821
Epoch   40: train_mse=0.072423 val_mse=0.079293 train_r2=0.9945 val_r2=0.9930
Epoch   60: train_mse=0.031439 val_mse=0.043103 train_r2=0.9976 val_r2=0.9962
Epoch   80: train_mse=0.027356 val_mse=0.028424 train_r2=0.9979 val_r2=0.9975
Epoch  100: train_mse=0.015373 val_mse=0.021336 train_r2=0.9988 val_r2=0.9981

Final training loss: 0.015373
Final validation loss: 0.021336


## Example 5: Training with Patience-Based Early Stopping

In [6]:
from fivedreg import FiveDRegressor, create_synthetic_dataset

X_train, y_train = create_synthetic_dataset(n_samples=2000, seed=42)
X_val, y_val = create_synthetic_dataset(n_samples=400, seed=43)

# Train with patience-based early stopping
# Early stopping is triggered automatically when validation data is provided
# and no improvement occurs for 'patience' epochs
model = FiveDRegressor(
    hidden_layers=(128, 64, 32),
    max_epochs=500,  # High max_epochs
    patience=30,  # Stop if no improvement for 30 epochs
    min_delta=1e-6,  # Minimum change to qualify as improvement
    verbose=True
)

# Providing validation data enables early stopping behavior
model.fit(X_train, y_train, X_val, y_val)

# Check if early stopping occurred
epochs_trained = len(model.history['epoch'])
print(f"Training stopped at epoch {epochs_trained}")

Epoch    1: train_mse=14.736540 val_mse=11.505248 train_r2=-0.1253 val_r2=-0.0211
Epoch   20: train_mse=0.521205 val_mse=0.482787 train_r2=0.9602 val_r2=0.9572
Epoch   40: train_mse=0.135748 val_mse=0.168969 train_r2=0.9896 val_r2=0.9850
Epoch   60: train_mse=0.051124 val_mse=0.067393 train_r2=0.9961 val_r2=0.9940
Epoch   80: train_mse=0.030811 val_mse=0.043782 train_r2=0.9976 val_r2=0.9961
Epoch  100: train_mse=0.020390 val_mse=0.032992 train_r2=0.9984 val_r2=0.9971
Epoch  120: train_mse=0.014892 val_mse=0.030595 train_r2=0.9989 val_r2=0.9973
Epoch  140: train_mse=0.012812 val_mse=0.025517 train_r2=0.9990 val_r2=0.9977
Epoch  160: train_mse=0.009720 val_mse=0.024149 train_r2=0.9993 val_r2=0.9979
Epoch  180: train_mse=0.008486 val_mse=0.022015 train_r2=0.9994 val_r2=0.9980
Epoch  200: train_mse=0.006704 val_mse=0.020565 train_r2=0.9995 val_r2=0.9982
Epoch  220: train_mse=0.006399 val_mse=0.020123 train_r2=0.9995 val_r2=0.9982
Epoch  240: train_mse=0.005763 val_mse=0.020197 train_r2=0.9

## Example 6: Hyperparameter Comparison

In [7]:
from fivedreg import FiveDRegressor, create_synthetic_dataset
from sklearn.metrics import r2_score
import numpy as np

# Create datasets
X_train, y_train = create_synthetic_dataset(n_samples=2000, seed=42)
X_val, y_val = create_synthetic_dataset(n_samples=400, seed=43)
X_test, y_test = create_synthetic_dataset(n_samples=500, seed=44)

# Test different architectures
architectures = [
    (32, 16),
    (64, 32, 16),
    (128, 64, 32, 16)
]

results = {}

for arch in architectures:
    print(f"\nTesting architecture: {arch}")
    
    model = FiveDRegressor(
        hidden_layers=arch,
        max_epochs=100,
        verbose=False
    )
    model.fit(X_train, y_train, X_val, y_val)
    
    # Evaluate
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    
    results[arch] = r2
    print(f"  R² score: {r2:.6f}")

# Find best architecture
best_arch = max(results, key=results.get)
print(f"\nBest architecture: {best_arch} (R² = {results[best_arch]:.6f})")


Testing architecture: (32, 16)
  R² score: 0.966261

Testing architecture: (64, 32, 16)
  R² score: 0.989771

Testing architecture: (128, 64, 32, 16)
  R² score: 0.996946

Best architecture: (128, 64, 32, 16) (R² = 0.996946)


## Example 7: Training with Regularization

In [8]:
from fivedreg import FiveDRegressor, create_synthetic_dataset

X_train, y_train = create_synthetic_dataset(n_samples=2000, seed=42)
X_val, y_val = create_synthetic_dataset(n_samples=400, seed=43)

# Train with weight decay (L2 regularization)
model = FiveDRegressor(
    hidden_layers=(128, 64, 32),
    max_epochs=150,
    weight_decay=1e-4,  # L2 regularization
    verbose=True
)

model.fit(X_train, y_train, X_val, y_val)

Epoch    1: train_mse=14.736452 val_mse=11.505078 train_r2=-0.1253 val_r2=-0.0211
Epoch   20: train_mse=0.526133 val_mse=0.482770 train_r2=0.9598 val_r2=0.9572
Epoch   40: train_mse=0.151865 val_mse=0.178982 train_r2=0.9884 val_r2=0.9841
Epoch   60: train_mse=0.059693 val_mse=0.083397 train_r2=0.9954 val_r2=0.9926
Epoch   80: train_mse=0.036484 val_mse=0.055408 train_r2=0.9972 val_r2=0.9951
Epoch  100: train_mse=0.022975 val_mse=0.038103 train_r2=0.9982 val_r2=0.9966
Epoch  120: train_mse=0.017541 val_mse=0.033812 train_r2=0.9987 val_r2=0.9970
Epoch  140: train_mse=0.014743 val_mse=0.026533 train_r2=0.9989 val_r2=0.9976


FiveDRegressor(
  (network): Sequential(
    (0): Linear(in_features=5, out_features=128, bias=True)
    (1): ReLU()
    (2): Linear(in_features=128, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=32, bias=True)
    (5): ReLU()
    (6): Linear(in_features=32, out_features=1, bias=True)
  )
  (train_mse_metric): MeanSquaredError()
  (val_mse_metric): MeanSquaredError()
  (train_r2_metric): R2Score()
  (val_r2_metric): R2Score()
  (train_mae_metric): MeanAbsoluteError()
  (val_mae_metric): MeanAbsoluteError()
)

---

## Example 8: Batch Predictions

In [9]:
from fivedreg import FiveDRegressor, create_synthetic_dataset, load_model
import numpy as np

# Load trained model
model = load_model("quickstart_model.pt")

# Create large test set
X_test, y_test = create_synthetic_dataset(n_samples=10000, seed=99)

# Make predictions (automatically batched)
y_pred = model.predict(X_test)

print(f"Made {len(y_pred)} predictions")
print(f"Predictions range: [{y_pred.min():.2f}, {y_pred.max():.2f}]")

Made 10000 predictions
Predictions range: [-18.55, 7.25]


## Example 9: Single Sample Prediction

In [10]:
import numpy as np
from fivedreg import load_model

# Load model
model = load_model("quickstart_model.pt")

# Single sample (must be 2D)
x_single = np.array([[1.0, 2.0, 3.0, 4.0, 5.0]], dtype=np.float32)
y_pred = model.predict(x_single)

print(f"Input: {x_single[0]}")
print(f"Prediction: {y_pred[0]:.4f}")

Input: [1. 2. 3. 4. 5.]
Prediction: 6.0079


## Example 10: Custom Input Prediction

In [11]:
import numpy as np
from fivedreg import load_model, ground_truth_function

# Load model
model = load_model("quickstart_model.pt")

# Create custom inputs
custom_inputs = np.array([
    [1.0, 0.0, 0.0, 0.0, 0.0],  # Only x1 = 1
    [0.0, 1.0, 0.0, 0.0, 0.0],  # Only x2 = 1
    [0.0, 0.0, 1.0, 0.0, 0.0],  # Only x3 = 1
    [0.0, 0.0, 0.0, 1.0, 1.0],  # Only x4, x5 = 1
], dtype=np.float32)

# Make predictions
predictions = model.predict(custom_inputs)
ground_truth = ground_truth_function(custom_inputs)

# Compare
for i, (pred, truth) in enumerate(zip(predictions, ground_truth)):
    error = abs(pred - truth)
    print(f"Sample {i+1}: pred={pred:.4f}, truth={truth:.4f}, error={error:.4f}")

Sample 1: pred=2.6421, truth=2.0000, error=0.6421
Sample 2: pred=-2.3894, truth=-1.5000, error=0.8894
Sample 3: pred=2.3321, truth=2.5244, error=0.1923
Sample 4: pred=1.2126, truth=0.5000, error=0.7126


---

## Example 11: Comprehensive Evaluation

In [12]:
from fivedreg import load_model, create_synthetic_dataset
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np

# Load model and create test data
model = load_model("quickstart_model.pt")
X_test, y_test = create_synthetic_dataset(n_samples=1000, seed=99)

# Make predictions
y_pred = model.predict(X_test)

# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Calculate percentage errors
eps = 1e-12
mape = np.mean(np.abs((y_test - y_pred) / (np.abs(y_test) + eps))) * 100
max_error = np.max(np.abs(y_test - y_pred))

print("Model Evaluation Metrics:")
print(f"  R² Score:      {r2:.6f}")
print(f"  MSE:           {mse:.6f}")
print(f"  RMSE:          {rmse:.6f}")
print(f"  MAE:           {mae:.6f}")
print(f"  MAPE:          {mape:.2f}%")
print(f"  Max Error:     {max_error:.6f}")

Model Evaluation Metrics:
  R² Score:      0.939575
  MSE:           0.799277
  RMSE:          0.894023
  MAE:           0.612074
  MAPE:          113.74%
  Max Error:     6.974006
