In [None]:
#  Import Required Libraries

import torch                          # PyTorch library for building and training neural networks
import torch.nn as nn                 # Neural network layers and components (e.g., Linear, ReLU)
import numpy as np                   # For numerical operations (arrays, random numbers, etc.)
from sklearn.datasets import fetch_covtype          # Load the Covertype dataset (forest cover types)
from sklearn.model_selection import train_test_split  # To split data into training and testing sets
from sklearn.preprocessing import StandardScaler      # To normalize feature values
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score  # Evaluation metrics
import pandas as pd                  # For creating dataframes and saving results as CSV
import matplotlib.pyplot as plt      # For plotting and visualization
import os                            # Used with psutil to track system resources
import psutil                        # To measure memory usage of the current process
import time                          # To track training time

#  Function to Print Current RAM Usage
def print_memory_usage(label=""):
    process = psutil.Process(os.getpid())             # Get current Python process info
    mem_mb = process.memory_info().rss / 1024 / 1024  # Convert memory usage to MB
    print(f"[{label}] RAM usage: {mem_mb:.2f} MB")     # Print memory with a custom label


In [None]:
# Set random seeds for reproducibility (same behavior each time the code is run)
torch.manual_seed(0)
np.random.seed(0)

#  Choose whether to use GPU (if available) or CPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#  Print the device being used
print(f"Using device: {device}")


Using device: cpu


In [None]:
#  Load and Preprocess Covertype Dataset

# Load the Covertype dataset (predicts forest cover types based on cartographic variables)
data = fetch_covtype()

# Extract features (X) and labels (y)
X, y = data.data, data.target

# Split the dataset into training and test sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize the features so they have mean 0 and standard deviation 1
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)   # Fit scaler on training data and transform it
X_test = scaler.transform(X_test)         # Transform test data using the same scaler

# Convert data to PyTorch tensors and move them to the selected device (CPU or GPU)
X_train = torch.FloatTensor(X_train).to(device)
X_test = torch.FloatTensor(X_test).to(device)

# Convert labels to LongTensors and shift class labels from 1–7 to 0–6 (PyTorch expects 0-based labels)
y_train = torch.LongTensor(y_train - 1).to(device)
y_test = torch.LongTensor(y_test - 1).to(device)


In [None]:
#  Neural Network Class Definition

class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, hidden_activation='tanh', dropout_rate=0.2):
        super(NeuralNetwork, self).__init__()  # Call the constructor of nn.Module
        
        layers = []               # List to hold all the layers of the network
        prev_size = input_size    # Start with input size

        # Add hidden layers dynamically based on the 'hidden_sizes' list
        for hidden_size in hidden_sizes:
            layers.append(nn.Linear(prev_size, hidden_size))  # Fully connected layer
            layers.append(nn.Tanh() if hidden_activation == 'tanh' else nn.ReLU())  # Activation function
            layers.append(nn.Dropout(dropout_rate))            # Dropout to reduce overfitting
            prev_size = hidden_size                            # Update input size for next layer

        #  Add the final output layer
        layers.append(nn.Linear(prev_size, output_size))

        #  Wrap all layers in a sequential container for easy forward pass
        self.network = nn.Sequential(*layers)

        #  Initialize weights of all linear layers
        for layer in self.network:
            if isinstance(layer, nn.Linear):
                if hidden_activation == 'relu':
                    nn.init.kaiming_normal_(layer.weight, nonlinearity='relu')  # Better for ReLU
                else:
                    nn.init.xavier_normal_(layer.weight)                        # Better for Tanh
                nn.init.zeros_(layer.bias)                                      # Initialize bias to 0

    #  Define the forward pass
    def forward(self, x):
        return self.network(x)  # Simply pass the input through all the layers


In [None]:
#  Training Function with Evaluation and Early Stopping

def train_model(model, X_train, y_train, X_test, y_test, epochs, batch_size, lr, patience=10):
    #  Loss function for multi-class classification
    criterion = nn.CrossEntropyLoss()

    #  Use Adam optimizer for faster convergence
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    # Calculate number of batches per epoch
    n_batches = len(X_train) // batch_size

    #  Dictionary to store accuracy and loss during training
    results = {'train_acc': [], 'test_acc': [], 'train_loss': [], 'test_loss': []}

    #  Initialize early stopping variables
    best_test_loss = float('inf')
    patience_counter = 0

    #  Evaluate model on a fixed subset of 10,000 samples (faster evaluation)
    eval_train_size = 10000
    eval_test_size = 10000
    train_subset_idx = torch.randperm(len(X_train))[:eval_train_size].to(device)
    test_subset_idx = torch.randperm(len(X_test))[:eval_test_size].to(device)

    #  Start training loop
    for epoch in range(epochs):
        model.train()            # Set model to training mode
        epoch_loss = 0           # Track total loss for this epoch

        # Shuffle the training indices
        indices = torch.randperm(len(X_train)).to(device)

        # Train on mini-batches
        for i in range(n_batches):
            batch_idx = indices[i * batch_size:(i + 1) * batch_size]
            X_batch, y_batch = X_train[batch_idx], y_train[batch_idx]

            optimizer.zero_grad()               # Reset gradients
            outputs = model(X_batch)            # Forward pass
            loss = criterion(outputs, y_batch)  # Compute loss
            loss.backward()                     # Backpropagation
            optimizer.step()                    # Update weights
            epoch_loss += loss.item()           # Accumulate loss for reporting

        #  Evaluation on train and test subsets (no gradients needed)
        model.eval()
        with torch.no_grad():
            # Training subset
            train_outputs = model(X_train[train_subset_idx])
            train_loss = criterion(train_outputs, y_train[train_subset_idx]).item()
            train_pred = torch.argmax(train_outputs, dim=1)
            train_acc = accuracy_score(y_train[train_subset_idx].cpu().numpy(), train_pred.cpu().numpy())

            # Test subset
            test_outputs = model(X_test[test_subset_idx])
            test_loss = criterion(test_outputs, y_test[test_subset_idx]).item()
            test_pred = torch.argmax(test_outputs, dim=1)
            test_acc = accuracy_score(y_test[test_subset_idx].cpu().numpy(), test_pred.cpu().numpy())

        # Save metrics
        results['train_acc'].append(train_acc)
        results['test_acc'].append(test_acc)
        results['train_loss'].append(train_loss)
        results['test_loss'].append(test_loss)

        #  Print progress every 100 epochs
        if epoch % 100 == 0:
            print(f"Epoch: {epoch}, Avg Loss: {epoch_loss / n_batches:.4f}")

        # Early stopping based on test loss
        if test_loss < best_test_loss:
            best_test_loss = test_loss
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping at epoch {epoch}")
                break

    #  Return training metrics for visualization or analysis
    return results


In [None]:
# Utility Functions and Experiment Configurations

# Function to count the total number of trainable parameters in the model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

#  Estimate how much memory the model uses (in bytes), assuming 4 bytes per parameter (float32)
def estimate_ram(parameters):
    return parameters * 4


#  List of configurations for experimenting with different network structures and hyperparameters
configs = [
    # 2 hidden layers with 64 neurons each, using Tanh activation
    {'hidden_sizes': [64, 64], 'activation': 'tanh', 'lr': 0.001, 'epochs': 500},

    # 2 hidden layers with 64 neurons each, using ReLU activation and smaller learning rate
    {'hidden_sizes': [64, 64], 'activation': 'relu', 'lr': 0.0001, 'epochs': 500},

    # 4 hidden layers with 128 neurons each, using Tanh
    {'hidden_sizes': [128, 128, 128, 128], 'activation': 'tanh', 'lr': 0.001, 'epochs': 500},

    # 4 hidden layers with 128 neurons each, using ReLU and a smaller learning rate
    {'hidden_sizes': [128, 128, 128, 128], 'activation': 'relu', 'lr': 0.0001, 'epochs': 500},

    # Single hidden layer with 64 neurons and ReLU activation
    {'hidden_sizes': [64], 'activation': 'relu', 'lr': 0.0001, 'epochs': 500}
]



In [None]:
#  Run Experiments for Different Neural Network Configurations

# Get the number of input features from the training data
input_size = X_train.shape[1]

#  Get the number of output classes (should be 7 for Covertype dataset)
output_size = len(np.unique(y))

# Define the batch size for training
batch_size = 512

#  List to store results of each configuration
results_summary = []

#  Loop through all configurations defined earlier
for idx, config in enumerate(configs, 1):
    print(f"\nTesting Configuration {idx}: {config['hidden_sizes']} neurons, {config['activation']} activation")

    #  Create a neural network model using current configuration
    model = NeuralNetwork(input_size, config['hidden_sizes'], output_size, config['activation']).to(device)

    #  Print memory usage before training
    print_memory_usage(f"Before Training Config {idx}")

    #  Start timer to measure how long training takes
    start_time = time.time()

    #  Train the model with current config's parameters
    results = train_model(
        model,
        X_train, y_train,
        X_test, y_test,
        config['epochs'],
        batch_size,
        config['lr']
    )

    #  Stop timer and calculate elapsed time in minutes
    end_time = time.time()
    elapsed = (end_time - start_time) / 60
    print(f"Config {idx} took {elapsed:.2f} minutes.")

    #  Print memory usage after training
    print_memory_usage(f"After Training Config {idx}")

    #  Evaluate model on full training and test sets
    model.eval()
    with torch.no_grad():
        train_outputs = model(X_train)
        train_pred = torch.argmax(train_outputs, dim=1).cpu().numpy()

        test_outputs = model(X_test)
        test_pred = torch.argmax(test_outputs, dim=1).cpu().numpy()

    #  Compute training and test metrics
    train_acc = accuracy_score(y_train.cpu().numpy(), train_pred)
    test_acc = accuracy_score(y_test.cpu().numpy(), test_pred)

    train_precision = precision_score(y_train.cpu().numpy(), train_pred, average='weighted')
    test_precision = precision_score(y_test.cpu().numpy(), test_pred, average='weighted')

    train_recall = recall_score(y_train.cpu().numpy(), train_pred, average='weighted')
    test_recall = recall_score(y_test.cpu().numpy(), test_pred, average='weighted')

    train_f1 = f1_score(y_train.cpu().numpy(), train_pred, average='weighted')
    test_f1 = f1_score(y_test.cpu().numpy(), test_pred, average='weighted')

    # Count total trainable parameters
    params = count_parameters(model)

    #  Estimate memory usage based on parameter count
    ram = estimate_ram(params)

    #  Print accuracy and resource usage for this configuration
    print(f"Training Accuracy: {train_acc:.4f}")
    print(f"Test Accuracy: {test_acc:.4f}")
    print(f"Total Parameters: {params}, Estimated RAM: {ram} bytes")

    #  Store all results for this configuration
    results_summary.append({
        'Config': f'Config {idx}',
        'Hidden Sizes': config['hidden_sizes'],
        'Activation': config['activation'],
        'Train Accuracy': train_acc,
        'Test Accuracy': test_acc,
        'Train Precision': train_precision,
        'Test Precision': test_precision,
        'Train Recall': train_recall,
        'Test Recall': test_recall,
        'Train F1': train_f1,
        'Test F1': test_f1,
        'Parameters': params,
        'RAM (bytes)': ram,
        'Time (min)': elapsed
    })



Testing Configuration 1: [64, 64] neurons, tanh activation
[Before Training Config 1] RAM usage: 691.41 MB
Epoch: 0, Avg Loss: 0.7457
Epoch: 100, Avg Loss: 0.4642
Early stopping at epoch 169
Config 1 took 9.77 minutes.
[After Training Config 1] RAM usage: 482.88 MB
Training Accuracy: 0.8544
Test Accuracy: 0.8524
Total Parameters: 8135, Estimated RAM: 32540 bytes

Testing Configuration 2: [64, 64] neurons, relu activation
[Before Training Config 2] RAM usage: 731.28 MB
Epoch: 0, Avg Loss: 1.3224
Epoch: 100, Avg Loss: 0.4911
Epoch: 200, Avg Loss: 0.4612
Epoch: 300, Avg Loss: 0.4477
Early stopping at epoch 302
Config 2 took 23.75 minutes.
[After Training Config 2] RAM usage: 162.56 MB
Training Accuracy: 0.8477
Test Accuracy: 0.8450
Total Parameters: 8135, Estimated RAM: 32540 bytes

Testing Configuration 3: [128, 128, 128, 128] neurons, tanh activation
[Before Training Config 3] RAM usage: 474.44 MB
Epoch: 0, Avg Loss: 0.6902
Epoch: 100, Avg Loss: 0.3550
Early stopping at epoch 144
Confi

In [None]:
# Save and Plot Results

# Convert the results list into a pandas DataFrame
df_results = pd.DataFrame(results_summary)

#  Save the DataFrame to a CSV file for later use or reporting
df_results.to_csv('pytorch_covtype_results.csv', index=False)
print("\nResults saved to 'pytorch_covtype_results.csv'")

# Plot Train vs Test Accuracy for each configuration

# Set the figure size (width, height)
plt.figure(figsize=(10, 6))

# Create x-axis positions for each configuration
x = np.arange(len(configs))

# Draw bar chart for training accuracy
plt.bar(x - 0.2, [r['Train Accuracy'] for r in results_summary], 0.4, label='Train Accuracy')

# Draw bar chart for test accuracy
plt.bar(x + 0.2, [r['Test Accuracy'] for r in results_summary], 0.4, label='Test Accuracy')

# Label the x-axis and y-axis
plt.xlabel('Configuration')
plt.ylabel('Accuracy')

# Set chart title
plt.title('PyTorch Train vs Test Accuracy on Covertype Dataset')

# Format x-tick labels to show configuration info
plt.xticks(
    x,
    [f'Config {i+1}\n{r["Hidden Sizes"]}\n{r["Activation"]}' for i, r in enumerate(results_summary)],
    rotation=45
)

# Add legend to distinguish train and test bars
plt.legend()

# Automatically adjust layout to prevent label clipping
plt.tight_layout()

# Save the plot as an image
plt.savefig('pytorch_covtype_accuracy_plot.png')

#  Show the plot
plt.show()

