In [1]:
from ucimlrepo import fetch_ucirepo 
import torch 
import torch.nn as nn
import numpy as np
from torch.nn.functional import one_hot
from sklearn.model_selection import KFold, train_test_split
import pandas as pd


In [2]:
# fetch dataset 
bank_marketing = fetch_ucirepo(id=222) 
  
# data (as pandas dataframes) 
X = bank_marketing.data.features 
y = bank_marketing.data.targets 
  
# metadata 
print(bank_marketing.metadata) 
  
# variable information 
print(bank_marketing.variables) 

{'uci_id': 222, 'name': 'Bank Marketing', 'repository_url': 'https://archive.ics.uci.edu/dataset/222/bank+marketing', 'data_url': 'https://archive.ics.uci.edu/static/public/222/data.csv', 'abstract': 'The data is related with direct marketing campaigns (phone calls) of a Portuguese banking institution. The classification goal is to predict if the client will subscribe a term deposit (variable y).', 'area': 'Business', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 45211, 'num_features': 16, 'feature_types': ['Categorical', 'Integer'], 'demographics': ['Age', 'Occupation', 'Marital Status', 'Education Level'], 'target_col': ['y'], 'index_col': None, 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 2014, 'last_updated': 'Fri Aug 18 2023', 'dataset_doi': '10.24432/C5K306', 'creators': ['S. Moro', 'P. Rita', 'P. Cortez'], 'intro_paper': {'ID': 277, 'type': 'NATIVE', 'title': 'A data-driven approach to predict the s

In [59]:
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from sklearn.model_selection import train_test_split, KFold

# --- Data Preprocessing ---

# Assume X and y are your original DataFrames/Series.
# Factorize each categorical column in X to ensure numeric values.
X_factorized = X.copy()
for col in X.columns:
    X_factorized[col], _ = pd.factorize(X[col])

# Factorize and one-hot encode the target variable.
y_numeric, uniques = pd.factorize(y.values.ravel())
y_tensor = torch.tensor(y_numeric, dtype=torch.long)
y_onehot = F.one_hot(y_tensor, num_classes=2).numpy()

# Create train/test split.
X_train, X_test, y_train, y_test = train_test_split(
    X_factorized, y_onehot, test_size=0.2, random_state=0
)

# --- Neural Network Class Definition ---

class NN:
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, lr=0.001):
        self.lr = lr
        # Initialize weights with small random values and biases with zeros.
        self.W1 = np.random.randn(input_size, hidden_size1) * lr
        self.b1 = np.zeros((1, hidden_size1))
        
        self.W2 = np.random.randn(hidden_size1, hidden_size2) * lr
        self.b2 = np.zeros((1, hidden_size2))
        
        self.W3 = np.random.randn(hidden_size2, output_size) * lr
        self.b3 = np.zeros((1, output_size))
        
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def softmax(self, x):
        exps = np.exp(x - np.max(x, axis=1, keepdims=True))
        return exps / np.sum(exps, axis=1, keepdims=True)
    
    def forward(self, X):
        """Implement a 3 layer neural network forward propagation"""
        X = np.asarray(X, dtype=np.float64)
        self.X = X
        self.z1 = np.dot(self.X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)
        
        self.z3 = np.dot(self.a2, self.W3) + self.b3
        output = self.softmax(self.z3)
        return output 
    
    def compute_loss(self, y, output, weight=None):
        """
        Compute Cross Entropy Loss, where y is one-hot encoded and output is the softmax output.
        """
        m = y.shape[0]
        if weight is not None:
            weight = weight.reshape(1, -1)
            loss = -1/m * np.sum(weight * (y * np.log(output)))
        else:
            loss = -1/m * np.sum(y * np.log(output))
        return loss
            
    def backward(self, X, y, output):
        """Backward propagation"""
        m = y.shape[0]
        # Output layer gradients
        dZ3 = output - y
        dW3 = np.dot(self.a2.T, dZ3) / m
        db3 = np.sum(dZ3, axis=0, keepdims=True) / m
        
        # Propagate to second hidden layer
        dA2 = np.dot(dZ3, self.W3.T)
        dZ2 = dA2 * (self.a2 * (1 - self.a2))
        dW2 = np.dot(self.a1.T, dZ2) / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m
        
        # Propagate to first hidden layer
        dA1 = np.dot(dZ2, self.W2.T)
        dZ1 = dA1 * (self.a1 * (1 - self.a1))
        dW1 = np.dot(X.T, dZ1) / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m
        
        # Update weights and biases using gradient descent
        self.W3 -= self.lr * dW3
        self.b3 -= self.lr * db3
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr * db1

    def train(self, X, y, epochs, batch_size=64):
        """Batch Gradient Descent training"""
        m = X.shape[0]
        for epoch in range(epochs):
            indices = np.random.choice(m, batch_size, replace=False)
            # Use .iloc for DataFrame and direct indexing for numpy arrays.
            X_batch = X.iloc[indices]
            y_batch = y[indices]
        
            output = self.forward(X_batch)
            loss = self.compute_loss(y_batch, output)
            self.backward(X_batch, y_batch, output)
            
            if epoch % 100 == 0:
                print(f'Loss at epoch {epoch}: {loss}')
                
    def predict(self, X):
        output = self.forward(X)
        return output
    
    def accuracy(self, X, y):
        pred = np.argmax(self.predict(X), axis=1)
        true_labels = np.argmax(y, axis=1)
        return np.mean(pred == true_labels)
    
    def precision(self, X, y):
        preds = np.argmax(self.predict(X), axis=1)
        true_labels = np.argmax(y, axis=1)
        tp = np.sum((preds == 1) & (true_labels == 1))
        fp = np.sum((preds == 1) & (true_labels == 0))
        return tp / (tp + fp + 1e-8)
        
    def recall(self, X, y):
        preds = np.argmax(self.predict(X), axis=1)
        true_labels = np.argmax(y, axis=1)
        tp = np.sum((preds == 1) & (true_labels == 1))
        fn = np.sum((preds == 0) & (true_labels == 1))
        return tp / (tp + fn + 1e-8)

# --- Cross-Validation Training ---

# Ensure reproducibility
np.random.seed(0)

input_size = 16
output_size = 2
hidden_size1 = 32
hidden_size2 = 16

kf = KFold(n_splits=5, shuffle=True, random_state=0)
fold = 1

# Lists to collect metrics for each fold
acc_scores = []
prec_scores = []
recall_scores = []

# Reset indices for X_train to ensure proper integer indexing
X_train = X_train.reset_index(drop=True)
# y_train is already a NumPy array

for train_index, val_index in kf.split(X_train):
    # Use .iloc for DataFrame indexing and numpy indexing for y_train
    X_tr, X_val = X_train.iloc[train_index], X_train.iloc[val_index]
    y_tr, y_val = y_train[train_index], y_train[val_index]
    
    nn = NN(input_size, hidden_size1, hidden_size2, output_size)
    print(f"Training on fold {fold}: {X_tr.shape[0]} samples")
    
    nn.train(X_tr, y_tr, epochs=5000, batch_size=2000)
    
    # Evaluate on the validation set from the current fold
    acc = nn.accuracy(X_val, y_val)
    prec = nn.precision(X_val, y_val)
    rec = nn.recall(X_val, y_val)
    
    print(f"Fold {fold} - Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}")
    
    acc_scores.append(acc)
    prec_scores.append(prec)
    recall_scores.append(rec)
    
    fold += 1

print("\nAverage Accuracy:", np.mean(acc_scores))
print("Average Precision:", np.mean(prec_scores))
print("Average Recall:", np.mean(recall_scores))
    

Training on fold 1: 28934 samples
Loss at epoch 0: 0.6909256103608725
Loss at epoch 100: 0.5789004065184495
Loss at epoch 200: 0.5022362136145423
Loss at epoch 300: 0.45694508279455787
Loss at epoch 400: 0.4301940286524067
Loss at epoch 500: 0.4205409704616166
Loss at epoch 600: 0.40977273973983486
Loss at epoch 700: 0.38209349574710516
Loss at epoch 800: 0.39877908493148584
Loss at epoch 900: 0.37343478629610866
Loss at epoch 1000: 0.3681330627106137
Loss at epoch 1100: 0.3833486964029281
Loss at epoch 1200: 0.35639345521702176
Loss at epoch 1300: 0.35723940594709
Loss at epoch 1400: 0.35769105341565616
Loss at epoch 1500: 0.3594367685638496
Loss at epoch 1600: 0.3467904077284841
Loss at epoch 1700: 0.33776097026948354
Loss at epoch 1800: 0.3659823199280362
Loss at epoch 1900: 0.3440632794931606
Loss at epoch 2000: 0.354117730140921
Loss at epoch 2100: 0.3635752910519849
Loss at epoch 2200: 0.3546973557458748
Loss at epoch 2300: 0.35350817298897835
Loss at epoch 2400: 0.35146143790178