In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score






In [2]:
# Load the data
file_path = '../csvs_files/cleaning_data2.csv'
data = pd.read_csv(file_path)



In [3]:
# Separate features (X) and target variable (y)
X = data.drop(columns=['stroke'])  
y = data['stroke']  

# Identify numeric and categorical columns
numeric_columns = ['age', 'avg_glucose_level', 'bmi']
categorical_columns = ['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status']



In [4]:
# Preprocess numeric data (scaling)
scaler = StandardScaler()
X[numeric_columns] = scaler.fit_transform(X[numeric_columns])

# Preprocess categorical data (one-hot encoding)
encoder = OneHotEncoder(sparse=False, handle_unknown='ignore')
X_encoded = pd.get_dummies(X[categorical_columns])
X.drop(columns=categorical_columns, inplace=True)
X = pd.concat([X, X_encoded], axis=1)



In [5]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the neural network model
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(1, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")




Epoch 1/10


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 93.93%


In [9]:
# Save log_data to CSV

log_data = pd.DataFrame({
    'Model': ['Neural Network'],
    'Test Accuracy': [test_accuracy]
})

log_data.to_csv('../csvs_files/model1_performance.csv', index=False)

In [10]:
#  the second model
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=164)
model_2 = Sequential([
    Dense(150, activation='relu', input_shape=(X.shape[1],)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile and train the second model
model_2.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_2.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)
# Evaluate the model on test data
test_loss, test_accuracy = model_2.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 94.91%


In [11]:
# Save log_data to CSV
log_data = pd.DataFrame({
    'Model': ['Neural Network'],
    'Test Accuracy': [test_accuracy]
})

log_data.to_csv('../csvs_files/mode2_performance.csv', index=False)