In [13]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Function to clean data
def clean_data(data):
    #data = data[data["Balance"] != 0]
    data = data.drop(["RowNumber", "CustomerId", "Surname"], axis=1)
    return data

# Function to perform one-hot encoding
def Categorical_To_Numerical(data, col):
    uniques = data[col].unique()
    unique_dict = {value: idx for idx, value in enumerate(uniques)}
    data[col] = data[col].map(unique_dict)
    return data

# Function to scale features
def scale_features(data, feature_columns):
    scaler = MinMaxScaler()
    data[feature_columns] = scaler.fit_transform(data[feature_columns])
    return data

# Initialize parameters
def initialize_params():
    np.random.seed(42)
    params = {
        'W(hn1)': np.random.randn(noofipnodes, noofhiddennodes) * 0.01,
        'B(hn1)': np.zeros((1, noofhiddennodes)),
        'W(op)': np.random.randn(noofhiddennodes, nofopnodes) * 0.01,
        'B(op)': np.zeros((1, nofopnodes)),
    }
    return params

# Activation functions
def relu(z):
    return np.maximum(0, z)

def relu_derivative(z):
    return np.where(z > 0, 1, 0)

def sigmoid(z):
    return 1 / (1 + np.exp(-z))

# Forward propagation
def forward_prop(X, params):
    Z1 = np.dot(X, params["W(hn1)"]) + params['B(hn1)']
    A1 = relu(Z1)
    Z2 = np.dot(A1, params["W(op)"]) + params['B(op)']
    A2 = sigmoid(Z2)
    cache = (Z1, A1, Z2, A2)
    return A2, cache

# Compute loss
def compute_loss(A2, y):
    m = y.shape[0]
    epsilon = 1e-10  # Small constant to avoid log(0)
    A2 = np.clip(A2, epsilon, 1 - epsilon)  # Clipping the values
    loss = -1/m * np.sum(y * np.log(A2) + (1 - y) * np.log(1 - A2))
    return loss

# Backward propagation
def backward_prop(X, y, cache, params):
    m = y.shape[0]  # Corrected this line
    (Z1, A1, Z2, A2) = cache 

    # Gradient with respect to output layer weights and biases
    dZ2 = A2 - y.reshape(-1, 1)
    dW_op = np.dot(A1.T, dZ2) / m
    dB_op = np.sum(dZ2, axis=0, keepdims=True) / m

    # Gradient with respect to hidden layer weights and biases
    dA1 = np.dot(dZ2, params['W(op)'].T)
    dZ1 = dA1 * relu_derivative(Z1)
    dW_hn1 = np.dot(X.T, dZ1) / m
    dB_hn1 = np.sum(dZ1, axis=0, keepdims=True) / m

    grads = {"HLW": dW_hn1, "HLB": dB_hn1, "OLW": dW_op, "OLB": dB_op}
    return grads

# Update parameters
def update_parameters(params, grads, learning_rate):
    params['W(hn1)'] -= learning_rate * grads["HLW"]
    params['B(hn1)'] -= learning_rate * grads["HLB"]
    params['W(op)'] -= learning_rate * grads["OLW"]
    params['B(op)'] -= learning_rate * grads["OLB"]
    return params

# Training function
def train(X_train, y_train, learning_rate=0.01, epochs=1000):
    params = initialize_params()
    for i in range(epochs):
        A2, cache = forward_prop(X_train, params)
        loss = compute_loss(y_train, A2)
        grads = backward_prop(X_train, y_train, cache, params)
        params = update_parameters(params, grads, learning_rate)

        if i % 100 == 0:
            print(f"Epoch {i}, Loss: {loss}")

    return params

# Prediction function
def predict(X, params):
    A2, _ = forward_prop(X, params)
    predictions = (A2 > 0.5).astype(int)
    return predictions

# Load data
data = pd.read_csv("datasets\Churn_Modelling.csv")

# Clean data

cleaned_data=Categorical_To_Numerical(data,"Gender")
cleaned_data=Categorical_To_Numerical(data,"Geography")
cleaned_data = clean_data(data)

continuous_features = ["CreditScore", "Age", "Tenure", "Balance", "NumOfProducts", "EstimatedSalary"]
normalized_data = scale_features(cleaned_data, continuous_features)

normalized_data.to_csv("Prepared Churn Model Data.csv", index=False)

# Split the data into features and target variable
X = normalized_data.drop("Exited", axis=1).values
y = normalized_data["Exited"].values.reshape(-1,1)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# NN architecture
noofipnodes = X_train.shape[1]
noofhiddennodes = 2
nofopnodes = 1

params = train(X_train, y_train, learning_rate=0.1, epochs=1000)
y_pred_train = predict(X_train, params)
y_pred_test = predict(X_test, params)

train_accuracy = np.mean(y_pred_train == y_train.reshape(-1, 1))
test_accuracy = np.mean(y_pred_test == y_test.reshape(-1, 1))

print(f"Train Accuracy: {train_accuracy * 100}%")
print(f"Test Accuracy: {test_accuracy * 100}%")

  data = pd.read_csv("datasets\Churn_Modelling.csv")


Epoch 0, Loss: 11.51290950247485
Epoch 100, Loss: 7.985148488530836
Epoch 200, Loss: 7.600884249464613
Epoch 300, Loss: 7.534952804790826
Epoch 400, Loss: 7.523555144223419
Epoch 500, Loss: 7.524869962464565
Epoch 600, Loss: 7.5339944331266215
Epoch 700, Loss: 7.544801606140152
Epoch 800, Loss: 7.515470386276193
Epoch 900, Loss: 7.422286084569029
Train Accuracy: 79.45%
Test Accuracy: 80.35%
