In [7]:
from ucimlrepo import fetch_ucirepo 
import torch 
import torch.nn as nn
import numpy as np
from torch.nn.functional import one_hot
from sklearn.model_selection import KFold
import pandas as pd

# fetch dataset 
bank_marketing = fetch_ucirepo(id=222) 
  
# data (as pandas dataframes) 
X = bank_marketing.data.features 
y = bank_marketing.data.targets 
  
# metadata 
print(bank_marketing.metadata) 
  
# variable information 
print(bank_marketing.variables) 

{'uci_id': 222, 'name': 'Bank Marketing', 'repository_url': 'https://archive.ics.uci.edu/dataset/222/bank+marketing', 'data_url': 'https://archive.ics.uci.edu/static/public/222/data.csv', 'abstract': 'The data is related with direct marketing campaigns (phone calls) of a Portuguese banking institution. The classification goal is to predict if the client will subscribe a term deposit (variable y).', 'area': 'Business', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 45211, 'num_features': 16, 'feature_types': ['Categorical', 'Integer'], 'demographics': ['Age', 'Occupation', 'Marital Status', 'Education Level'], 'target_col': ['y'], 'index_col': None, 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 2014, 'last_updated': 'Fri Aug 18 2023', 'dataset_doi': '10.24432/C5K306', 'creators': ['S. Moro', 'P. Rita', 'P. Cortez'], 'intro_paper': {'ID': 277, 'type': 'NATIVE', 'title': 'A data-driven approach to predict the s

In [8]:
class NN:
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, lr=0.001):
        # Initialize weights with small random values and biases with zeros
        self.lr= lr
        
        self.W1 = np.random.randn(input_size, hidden_size1) * lr
        self.b1 = np.zeros((1, hidden_size1))
        
        self.W2 = np.random.randn(hidden_size1, hidden_size2) * lr
        self.b2 = np.zeros((1, hidden_size2))
        
        self.W3 = np.random.randn(hidden_size2, output_size) * lr
        self.b3 = np.zeros((1, output_size))
        
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def forward(self, X):
        """Implement a 3 layer neural network forward propagation"""
        X = np.asarray(X,dtype=np.float64)
        self.X = X
        self.z1 = np.dot(self.X, self.W1) + self.b1
        self.a1 = self.sigmoid(self.z1)
        
        self.z2 = np.dot(self.a1, self.W2) + self.b2
        self.a2 = self.sigmoid(self.z2)
        
        self.z3 = np.dot(self.a2, self.W3) + self.b3
        self.a3 = self.sigmoid(self.z3)
        
        output = nn.Softmax(self.a3)
        return output 
    
    def compute_loss(self, y, output, weight=None):
        """
        Compute Cross Entropy Loss, y is one-hot encoded, output is the softmax output.
        
        Parameters:
        y (np.ndarray): One-hot encoded true labels of shape (m, C)
        output (np.ndarray): Predicted probabilities (softmax output) of shape (m, C)
        weight (np.ndarray, optional): Array of shape (C,) containing a weight for each class.
                                        If provided, each class's loss contribution is scaled by this weight.
        
        Returns:
        float: The weighted cross entropy loss.
        """
        m = y.shape[0]
        if weight is not None:
            # Reshape weight to (1, C) for broadcasting over the batch dimension.
            weight = weight.reshape(1, -1)
            loss = -1/m * np.sum( weight * (y * np.log(output) + (1-y) * np.log(1-output)) )
        else:
            loss = -1/m * np.sum( y * np.log(output) + (1-y) * np.log(1-output) )
        return loss
            
    def backward(self, X, y, output):
        """backward propagation"""
        m = y.shape[0]
        # Output layer gradients
        dZ3 = output - y  # shape: (m, output_size)
        dW3 = np.dot(self.A2.T, dZ3) / m
        db3 = np.sum(dZ3, axis=0, keepdims=True) / m
        
        # Propagate to second hidden layer
        dA2 = np.dot(dZ3, self.W3.T)
        dZ2 = dA2 * (self.A2 * (1 - self.A2))  # derivative of sigmoid
        dW2 = np.dot(self.A1.T, dZ2) / m
        db2 = np.sum(dZ2, axis=0, keepdims=True) / m
        
        # Propagate to first hidden layer
        dA1 = np.dot(dZ2, self.W2.T)
        dZ1 = dA1 * (self.A1 * (1 - self.A1))
        dW1 = np.dot(X.T, dZ1) / m
        db1 = np.sum(dZ1, axis=0, keepdims=True) / m
        
        # Update weights and biases using gradient descent
        self.W3 -= self.lr * dW3
        self.b3 -= self.lr * db3
        self.W2 -= self.lr * dW2
        self.b2 -= self.lr * db2
        self.W1 -= self.lr * dW1
        self.b1 -= self.lr* db1

    def train(self, X, y, epochs, batch_size=64):
        """Batch Gradient Descent training"""
        m = X.shape[0]
        for epoch in range(epochs):
            indices = np.random.choice(m, batch_size, replace=False)
            X_batch = X.iloc[indices]
            y_batch = y.iloc[indices] if hasattr(y, "iloc") else y[indices]
        
            output = self.forward(X_batch)
            loss = self.compute_loss(y_batch, output)
            self.backward(X_batch, y_batch, output)
            
            if epoch % 100 == 0:
                print(f'Loss at epoch {epoch}: {loss}')
                
    def predict(self, X):
        self.X = X
        output = self.forward()
        return output
    
    def accuracy(self, X, y):
        preds = self.predict(X)
        return np.mean(preds = y)
    
    def precision(self, X, y):
        preds = self.predict(X)
        tp = np.sum(np.logical_and(preds == 1, y == 1))
        fp = np.sum(np.logical_and(preds == 1, y == 0))
        return tp / (tp + fp + 1e-8)
        
        
    def recall(self, X, y):
        preds = self.predict(X)
        tp = np.sum(np.logical_and(preds == 1, y == 1))
        fn = np.sum(np.logical_and(preds == 0, y == 1))
        return tp / (tp + fn + 1e-8)
    

In [9]:
np.random.seed(0)
input_size = 16
output_size = 2
hidden_size1 = 32
hidden_size2 = 16

# convert y to one-hot encoding
y_np = y.values.ravel()

# Option 1: Use pd.factorize to convert to numeric labels
y_numeric, uniques = pd.factorize(y_np)
y_numeric = y_numeric.astype(np.int64)
y_onehot = one_hot(torch.tensor(y_numeric), 2).numpy()

# class weight calculation
unique, counts = np.unique(y, return_counts=True)
class_counts = dict(zip(unique, counts))
class_weight = {}
for c in unique:
    class_weight[c] = len(y) / (2 * class_counts[c])

acc_scores = []
prec_scores = []
recall_scores = []

# 5 cross validation 
kf = KFold(n_splits=5, shuffle = True, random_state=0)
fold = 1
for train_index, test_index in kf.split(X):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y_onehot[train_index], y_onehot[test_index]
    
    
    nn = NN(input_size, hidden_size1, hidden_size2, output_size)
    print(f"Training on fold {fold}: {X_train.shape[0]} samples")
    nn.train(X_train, y_train, epochs=5000, batch_size=2000)
    
    acc = nn.accuracy(X_test, y_test)
    prec = nn.precision(X_test, y_test)
    rec = nn.recall(X_test, y_test)
    
    print(f"Fold {fold} -Accuracy: {acc:.4f}, Precision: {prec:.4f}, Recall: {rec:.4f}")
    fold += 1

print("\nAverage Accuracy:", np.mean(acc_scores))
print("Average Precision:", np.mean(prec_scores))
print("Average Recall:", np.mean(recall_scores))

Training on fold 1: 36168 samples


ValueError: could not convert string to float: 'self-employed'