In [1]:
import numpy as np
import pandas as pd
from sklearn.datasets import fetch_covtype
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import precision_score, recall_score, f1_score

data = fetch_covtype()
X, y = data.data, data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y_train = y_train.reshape(-1, 1)
y_test = y_test.reshape(-1, 1)

encoder = OneHotEncoder(sparse_output=False)
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)

def relu(x): return np.maximum(0, x)
def relu_derivative(x): return (x > 0).astype(float)
def tanh(x): return np.tanh(x)
def tanh_derivative(x): return 1 - np.tanh(x)**2
def softmax(x): exp = np.exp(x - np.max(x, axis=1, keepdims=True)); return exp / np.sum(exp, axis=1, keepdims=True)
def cross_entropy(y_true, y_pred): return -np.sum(y_true * np.log(y_pred + 1e-9)) / y_true.shape[0]
def cross_entropy_derivative(y_true, y_pred): return y_pred - y_true

def initialize_parameters(layer_sizes):
    weights, biases = [], []
    for i in range(len(layer_sizes) - 1):
        w = np.random.randn(layer_sizes[i], layer_sizes[i + 1]) * np.sqrt(2. / layer_sizes[i])
        b = np.zeros((1, layer_sizes[i + 1]))
        weights.append(w)
        biases.append(b)
    return weights, biases

def forward_pass(X, weights, biases, activation_hidden):
    activations, zs = [X], []
    for i in range(len(weights) - 1):
        z = np.dot(activations[-1], weights[i]) + biases[i]
        zs.append(z)
        if activation_hidden == "relu": a = relu(z)
        elif activation_hidden == "tanh": a = tanh(z)
        activations.append(a)
    z = np.dot(activations[-1], weights[-1]) + biases[-1]
    zs.append(z)
    activations.append(softmax(z))
    return activations, zs

def backward_pass(activations, zs, y_true, weights, activation_hidden):
    grad_weights, grad_biases = [None] * len(weights), [None] * len(weights)
    delta = cross_entropy_derivative(y_true, activations[-1])
    grad_weights[-1] = np.dot(activations[-2].T, delta) / y_true.shape[0]
    grad_biases[-1] = np.sum(delta, axis=0, keepdims=True) / y_true.shape[0]
    for l in range(len(weights) - 2, -1, -1):
        if activation_hidden == "relu": delta = np.dot(delta, weights[l + 1].T) * relu_derivative(zs[l])
        elif activation_hidden == "tanh": delta = np.dot(delta, weights[l + 1].T) * tanh_derivative(zs[l])
        grad_weights[l] = np.dot(activations[l].T, delta) / y_true.shape[0]
        grad_biases[l] = np.sum(delta, axis=0, keepdims=True) / y_true.shape[0]
    return grad_weights, grad_biases

def update_parameters(weights, biases, grad_weights, grad_biases, lr):
    for i in range(len(weights)):
        weights[i] -= lr * grad_weights[i]
        biases[i] -= lr * grad_biases[i]
    return weights, biases

def predict(X, weights, biases, activation_hidden):
    activations, _ = forward_pass(X, weights, biases, activation_hidden)
    return np.argmax(activations[-1], axis=1)

def accuracy(y_true, y_pred): return np.mean(y_true == y_pred)
def count_learnable_params(layers): return sum(layers[i] * layers[i+1] + layers[i+1] for i in range(len(layers)-1))
def estimate_memory_usage(weights, biases): return sum(w.nbytes + b.nbytes for w, b in zip(weights, biases)) / (1024 ** 2)

configs = [
    {"name": "V1", "layers": [X_train.shape[1], 64, 64, y_train.shape[1]], "activation": "tanh", "lr": 0.005, "epochs": 500, "batch_size": 128},
    {"name": "V2", "layers": [X_train.shape[1], 64, 64, y_train.shape[1]], "activation": "relu", "lr": 0.005, "epochs": 500, "batch_size": 128},
    {"name": "V3", "layers": [X_train.shape[1], 128, 128, 128, 128, y_train.shape[1]], "activation": "tanh", "lr": 0.005, "epochs": 500, "batch_size": 128},
    {"name": "V4", "layers": [X_train.shape[1], 128, 128, 128, 128, y_train.shape[1]], "activation": "relu", "lr": 0.001, "epochs": 500, "batch_size": 128}
]

results = []

for config in configs:
    weights, biases = initialize_parameters(config["layers"])
    for epoch in range(config["epochs"]):
        for i in range(0, X_train.shape[0], config["batch_size"]):
            X_batch = X_train[i:i+config["batch_size"]]
            y_batch = y_train[i:i+config["batch_size"]]
            activations_list, zs = forward_pass(X_batch, weights, biases, config["activation"])
            grad_weights, grad_biases = backward_pass(activations_list, zs, y_batch, weights, config["activation"])
            weights, biases = update_parameters(weights, biases, grad_weights, grad_biases, config["lr"])
    y_pred_test = predict(X_test, weights, biases, config["activation"])
    y_true_test = np.argmax(y_test, axis=1)
    acc = accuracy(y_true_test, y_pred_test)
    prec = precision_score(y_true_test, y_pred_test, average="weighted", zero_division=0)
    rec = recall_score(y_true_test, y_pred_test, average="weighted", zero_division=0)
    f1 = f1_score(y_true_test, y_pred_test, average="weighted", zero_division=0)
    n_params = count_learnable_params(config["layers"])
    mem_mb = estimate_memory_usage(weights, biases)
    results.append((
        config["name"], config["layers"], config["activation"], config["lr"], config["epochs"],
        config["batch_size"], acc, prec, rec, f1, n_params, mem_mb
    ))

df = pd.DataFrame(results, columns=[
    "Config", "Layers", "Activation", "Learning Rate", "Epochs", "Batch Size",
    "Accuracy", "Precision", "Recall", "F1 Score", "Learnable Parameters", "Estimated RAM (MB)"
])
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.width", None)
pd.set_option("display.max_colwidth", None)

print(df)


  Config                       Layers Activation  Learning Rate  Epochs  \
0     V1              [54, 64, 64, 7]       tanh          0.005     500   
1     V2              [54, 64, 64, 7]       relu          0.005     500   
2     V3  [54, 128, 128, 128, 128, 7]       tanh          0.005     500   
3     V4  [54, 128, 128, 128, 128, 7]       relu          0.001     500   

   Batch Size  Accuracy  Precision    Recall  F1 Score  Learnable Parameters  \
0         128  0.919692   0.919263  0.919692  0.919328                  8135   
1         128  0.894762   0.896862  0.894762  0.893100                  8135   
2         128  0.963486   0.963478  0.963486  0.963473                 57479   
3         128  0.926749   0.926656  0.926749  0.926018                 57479   

   Estimated RAM (MB)  
0            0.062065  
1            0.062065  
2            0.438530  
3            0.438530  
