In [27]:
#import libraries for pytorch
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from torch.utils.data import Dataset, DataLoader

In [28]:
#import dataset
df = pd.read_csv('heart.csv')

In [29]:
#split labels and preds
x = df.drop("target", axis=1).values
y = df["target"].values
y

array([0, 0, 0, ..., 0, 1, 0], shape=(1025,))

In [30]:
#train test split
x_train, x_test, y_train, y_test = train_test_split(
    x, y, test_size=0.2, random_state=42, stratify=y
)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [31]:
#pytorch class for dataset
class HeartDataset(Dataset):
    def __init__(self, features, labels):
        self.X = torch.tensor(features, dtype=torch.float32)
        self.y = torch.tensor(labels, dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [32]:
train_dataset = HeartDataset(x_train, y_train)
test_dataset  = HeartDataset(x_test,  y_test)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_dataset,  batch_size=32, shuffle=False)

In [33]:
class HeartDiseaseModel(nn.Module):
    def __init__(self, input_dim, hidden_dim=32):
        super(HeartDiseaseModel, self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, 1)  
        )
    def forward(self, x):
        return self.net(x)

In [34]:
input_dim = x_train.shape[1]

In [35]:
model = HeartDiseaseModel(input_dim=input_dim, hidden_dim=32)
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [36]:
def train_model(model, train_loader, criterion, optimizer, num_epochs=20):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for X_batch, y_batch in train_loader:
            logits = model(X_batch).view(-1)
            loss = criterion(logits, y_batch)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * X_batch.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        if (epoch + 1) % 5 == 0:
            print(f"Epoch [{epoch+1}/{num_epochs}] - Loss: {epoch_loss:.4f}")


In [37]:
train_model(model, train_loader, criterion, optimizer, num_epochs=20)

Epoch [5/20] - Loss: 0.3054
Epoch [10/20] - Loss: 0.2055
Epoch [15/20] - Loss: 0.1258
Epoch [20/20] - Loss: 0.0739


In [38]:
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            logits = model(X_batch).view(-1)
            probs = torch.sigmoid(logits)
            preds = (probs >= 0.5).float()
            total += y_batch.size(0)
            correct += (preds == y_batch).sum().item()

    accuracy = correct / total
    print(f"Test Accuracy: {accuracy * 100:.2f}%")
evaluate_model(model, test_loader)

Test Accuracy: 97.56%


In [39]:
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

In [40]:
gnb = GaussianNB()
grid_search = GridSearchCV(
    gnb,
    {'var_smoothing': [1e-12, 1e-10, 1e-9, 1e-8, 1e-6, 1e-4]},
    scoring='accuracy',
    cv=5,      
    n_jobs=-1  
)

In [41]:
grid_search.fit(x_train, y_train)
best_model = grid_search.best_estimator_
best_params = grid_search.best_params_

In [42]:
train_preds = best_model.predict(x_train)
test_preds = best_model.predict(x_test)

In [43]:
train_accuracy = accuracy_score(y_train, train_preds)
test_accuracy = accuracy_score(y_test, test_preds)
print(f"Train Accuracy: {train_accuracy*100:.2f}%")
print(f"Test  Accuracy: {test_accuracy*100:.2f}%")

Train Accuracy: 83.05%
Test  Accuracy: 82.93%
