In [194]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/stroke-prediction-dataset/healthcare-dataset-stroke-data.csv


In [195]:
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

learning_rate = 0.01                           # learning rate
epochs = 1000                                 # max epochs
n_hidden1 = 16                                # hidden layer 1 neurons
n_hidden2 = 16                                 # hidden layer 2 neurons
n_output = 1                                  # Output Layer
init_scale = 0.01                             # weight initialization scale

activation_choice = "relu"                 # "sigmoid", "relu", "tanh"
Gradient_decent = "stochastic"                # "batch", "stochastic", "minibatch"
batch_size = 32                               # only used if Gradient_decent = "minibatch"


In [196]:
df = pd.read_csv("/kaggle/input/stroke-prediction-dataset/healthcare-dataset-stroke-data.csv")
df.drop("id", axis=1, inplace=True)
df["bmi"] = df["bmi"].fillna(df["bmi"].mean())

le = LabelEncoder()
for col in ["gender", "ever_married", "work_type", "Residence_type", "smoking_status"]:
    df[col] = le.fit_transform(df[col])

X = df.drop("stroke", axis=1).values
y = df["stroke"].values

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [197]:
#get the 2 hidden layers and output layer value from the adjustable parameter at the start 
n_input = X_train.shape[1]
n = [n_input, n_hidden1, n_hidden2, n_output]

In [198]:
# Activation Functions
def sigmoid(z): return 1 / (1 + np.exp(-z))
def sigmoid_derivative(a): return a * (1 - a)

def relu(z): return np.maximum(0, z)
def relu_derivative(a): return (a > 0).astype(float)

def tanh(z): return np.tanh(z)
def tanh_derivative(a): return 1 - np.square(a)

activation_functions = {
    "sigmoid": (sigmoid, sigmoid_derivative),
    "relu": (relu, relu_derivative),
    "tanh": (tanh, tanh_derivative)
}

# Pick an activation function for hidden layers
act, act_deriv = activation_functions[activation_choice]

# Always sigmoid at output (binary classification)
output_act, output_deriv = sigmoid, sigmoid_derivative


In [199]:
# Initialize weights
np.random.seed(42)
W1 = np.random.randn(n[1], n[0]) * init_scale
W2 = np.random.randn(n[2], n[1]) * init_scale
W3 = np.random.randn(n[3], n[2]) * init_scale
b1 = np.zeros((n[1], 1))
b2 = np.zeros((n[2], 1))
b3 = np.zeros((n[3], 1))

In [200]:
# Loss function (binary cross-entropy)
def cost(y_hat, y):
    eps = 1e-8
    losses = - (y * np.log(y_hat + eps) + (1 - y) * np.log(1 - y_hat + eps))
    return np.mean(losses)

In [201]:
# Forward Pass
def feed_forward(A0):
    Z1 = W1 @ A0 + b1
    A1 = act(Z1)

    Z2 = W2 @ A1 + b2
    A2 = act(Z2)

    Z3 = W3 @ A2 + b3
    A3 = output_act(Z3)

    cache = {"A0": A0, "A1": A1, "A2": A2, "A3": A3}
    return A3, cache

In [202]:
# Backward Pass
def backprop(y_hat, Y, cache):
    global W1, W2, W3, b1, b2, b3
    m = Y.shape[1]

    A0, A1, A2, A3 = cache["A0"], cache["A1"], cache["A2"], cache["A3"]

    # Output layer
    dZ3 = y_hat - Y
    dW3 = (1/m) * dZ3 @ A2.T
    db3 = (1/m) * np.sum(dZ3, axis=1, keepdims=True)

    # Hidden layer 2
    dA2 = W3.T @ dZ3
    dZ2 = dA2 * act_deriv(A2)
    dW2 = (1/m) * dZ2 @ A1.T
    db2 = (1/m) * np.sum(dZ2, axis=1, keepdims=True)

    # Hidden layer 1
    dA1 = W2.T @ dZ2
    dZ1 = dA1 * act_deriv(A1)
    dW1 = (1/m) * dZ1 @ A0.T
    db1 = (1/m) * np.sum(dZ1, axis=1, keepdims=True)

    return dW1, db1, dW2, db2, dW3, db3

In [203]:
# Training Function
def train(X_train, y_train, epochs=1000, alpha=0.1):
    global W1, W2, W3, b1, b2, b3
    costs = []
    A0_full = X_train.T
    Y_full = y_train.reshape(1, -1)

    for e in range(epochs):
        if Gradient_decent == "batch":
            y_hat, cache = feed_forward(A0_full)
            dW1, db1_, dW2, db2_, dW3, db3_ = backprop(y_hat, Y_full, cache)

            W1 -= alpha * dW1; b1 -= alpha * db1_
            W2 -= alpha * dW2; b2 -= alpha * db2_
            W3 -= alpha * dW3; b3 -= alpha * db3_

        elif Gradient_decent == "stochastic":
            m = A0_full.shape[1]
            for i in range(m):
                x_i = A0_full[:, i].reshape(-1, 1)
                y_i = Y_full[:, i].reshape(1, 1)
                y_hat, cache = feed_forward(x_i)
                dW1, db1_, dW2, db2_, dW3, db3_ = backprop(y_hat, y_i, cache)

                W1 -= alpha * dW1; b1 -= alpha * db1_
                W2 -= alpha * dW2; b2 -= alpha * db2_
                W3 -= alpha * dW3; b3 -= alpha * db3_

        elif Gradient_decent == "minibatch":
            m = A0_full.shape[1]
            permutation = np.random.permutation(m)
            A0_shuffled, Y_shuffled = A0_full[:, permutation], Y_full[:, permutation]

            for i in range(0, m, batch_size):
                X_batch = A0_shuffled[:, i:i+batch_size]
                Y_batch = Y_shuffled[:, i:i+batch_size]
                y_hat, cache = feed_forward(X_batch)
                dW1, db1_, dW2, db2_, dW3, db3_ = backprop(y_hat, Y_batch, cache)

                W1 -= alpha * dW1; b1 -= alpha * db1_
                W2 -= alpha * dW2; b2 -= alpha * db2_
                W3 -= alpha * dW3; b3 -= alpha * db3_

        if e % 100 == 0:
            y_hat, _ = feed_forward(A0_full)
            c = cost(y_hat, Y_full)
            print(f"Epoch {e}: Cost = {c:.4f}")
            costs.append(c)

    return costs

In [204]:
# Prediction
def predict(X):
    A0 = X.T
    y_hat, _ = feed_forward(A0)
    return (y_hat > 0.5).astype(int)

def accuracy(y_hat, y_true):
    return np.mean(y_hat.reshape(-1) == y_true) * 100

# =========================
# Run Training
# =========================
costs = train(X_train, y_train, epochs=epochs, alpha=learning_rate)

# Evaluate
y_pred = predict(X_test)
acc = accuracy(y_pred, y_test)
print("Test Accuracy:", acc, "%")


Epoch 0: Cost = 0.1858
Epoch 100: Cost = 0.1487
Epoch 200: Cost = 0.1468
Epoch 300: Cost = 0.1421
Epoch 400: Cost = 0.1369
Epoch 500: Cost = 0.1335
Epoch 600: Cost = 0.1302
Epoch 700: Cost = 0.1269
Epoch 800: Cost = 0.1225
Epoch 900: Cost = 0.1180
Test Accuracy: 92.75929549902152 %
