In [2]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report
from sklearn.cluster import KMeans

# Load the dataset
file_path = '10k_diabetes (1).csv'  # Replace with your file path
data = pd.read_csv(file_path)

# Selecting relevant columns and encoding categorical features
columns_to_use = ['race', 'gender', 'age', 'admission_type_id', 'discharge_disposition_id',
                  'admission_source_id', 'time_in_hospital', 'num_lab_procedures',
                  'num_medications', 'number_outpatient', 'number_emergency',
                  'number_inpatient', 'number_diagnoses']

X = data[columns_to_use].copy()
y = data['readmitted']

# Convert categorical columns to numeric using Label Encoding
for col in X.select_dtypes(include=['object']).columns:
    X[col] = LabelEncoder().fit_transform(X[col].astype(str))

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize numerical columns
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Function to evaluate models
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, "predict_proba") else y_pred
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
    print(f"ROC-AUC: {roc_auc_score(y_test, y_pred_proba)}")
    print(classification_report(y_test, y_pred))

# Step 1: Predict Patient Readmission Risk
# Logistic Regression
print("Logistic Regression:")
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)
evaluate_model(log_reg, X_test, y_test)

# Decision Tree
print("\nDecision Tree:")
decision_tree = DecisionTreeClassifier(max_depth=5, random_state=42)
decision_tree.fit(X_train, y_train)
evaluate_model(decision_tree, X_test, y_test)

# Random Forest
print("\nRandom Forest:")
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
random_forest.fit(X_train, y_train)
evaluate_model(random_forest, X_test, y_test)

# Gradient Boosting
print("\nGradient Boosting:")
gradient_boosting = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
gradient_boosting.fit(X_train, y_train)
evaluate_model(gradient_boosting, X_test, y_test)

# Step 2: Clustering for Intervention Recommendation
# Using KMeans clustering to identify patient segments
num_clusters = 3
kmeans = KMeans(n_clusters=num_clusters, random_state=42)
X_clustered = kmeans.fit_predict(X)

# Assign recommended interventions based on clusters
interventions = {
    0: "High-risk intervention: Schedule frequent follow-ups and monitor closely.",
    1: "Moderate-risk intervention: Educate patient on self-management techniques.",
    2: "Low-risk intervention: Provide standard discharge instructions."
}

# Add recommendations to the dataset
X['Cluster'] = X_clustered
X['Intervention_Recommendation'] = X['Cluster'].map(interventions)

# Display sample recommendations
sample_recommendations = X[['Cluster', 'Intervention_Recommendation']].head()
print("\nSample Patient Recommendations:")
print(sample_recommendations)


Logistic Regression:
Accuracy: 0.668
ROC-AUC: 0.677122331807826
              precision    recall  f1-score   support

       False       0.67      0.90      0.77      1221
        True       0.66      0.31      0.42       779

    accuracy                           0.67      2000
   macro avg       0.66      0.60      0.59      2000
weighted avg       0.67      0.67      0.63      2000


Decision Tree:
Accuracy: 0.6625
ROC-AUC: 0.6832364515291345
              precision    recall  f1-score   support

       False       0.67      0.88      0.76      1221
        True       0.63      0.33      0.43       779

    accuracy                           0.66      2000
   macro avg       0.65      0.60      0.60      2000
weighted avg       0.65      0.66      0.63      2000


Random Forest:
Accuracy: 0.666
ROC-AUC: 0.6834167578711867
              precision    recall  f1-score   support

       False       0.69      0.82      0.75      1221
        True       0.60      0.43      0.50       77

In [3]:
# Import necessary libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

# Define the neural network model architecture
nn_model = Sequential([
    Dense(64, input_dim=X_train.shape[1], activation='relu'),  # First hidden layer
    Dense(32, activation='relu'),                             # Second hidden layer
    Dense(1, activation='sigmoid')                            # Output layer for binary classification
])

# Compile the model
nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = nn_model.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1, validation_split=0.2)

# Predict on the test data
nn_predictions = (nn_model.predict(X_test) > 0.5).astype("int32")

# Evaluate the model
print("\nNeural Network Accuracy:", accuracy_score(y_test, nn_predictions))
print("ROC-AUC Score:", roc_auc_score(y_test, nn_model.predict(X_test)))
print("Classification Report:\n", classification_report(y_test, nn_predictions))


Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.6147 - loss: 0.6510 - val_accuracy: 0.6237 - val_loss: 0.6477
Epoch 2/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6475 - loss: 0.6328 - val_accuracy: 0.6237 - val_loss: 0.6447
Epoch 3/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6555 - loss: 0.6292 - val_accuracy: 0.6319 - val_loss: 0.6430
Epoch 4/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6578 - loss: 0.6160 - val_accuracy: 0.6300 - val_loss: 0.6427
Epoch 5/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6632 - loss: 0.6170 - val_accuracy: 0.6300 - val_loss: 0.6451
Epoch 6/10
[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.6581 - loss: 0.6191 - val_accuracy: 0.6356 - val_loss: 0.6454
Epoch 7/10
[1m200/200[0m [32m━━━━━━━

In [12]:
# Import necessary libraries
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import accuracy_score, roc_auc_score, classification_report

# Load the dataset
file_path = '/content/10k_diabetes (1).csv'  # Update this path with the correct file path
data = pd.read_csv(file_path)

# Selecting relevant columns for input features
columns_to_use = ['race', 'gender', 'age', 'admission_type_id', 'discharge_disposition_id',
                  'admission_source_id', 'time_in_hospital', 'num_lab_procedures',
                  'num_medications', 'number_outpatient', 'number_emergency',
                  'number_inpatient', 'number_diagnoses']

X = data[columns_to_use].copy()
y = data['readmitted']

# Encode categorical variables
for col in X.select_dtypes(include=['object']).columns:
    X[col] = LabelEncoder().fit_transform(X[col].astype(str))

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the numerical columns for neural network
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Ensure y_train is in binary integer format (0 and 1)
y_train = y_train.astype(int)  # Corrected: Convert to integers

# Calculate class weights to handle imbalance
unique_classes = np.unique(y_train)
class_weights = compute_class_weight(class_weight='balanced', classes=unique_classes, y=y_train)

# Create class_weights_dict with integer keys
class_weights_dict = {int(cls): weight for cls, weight in zip(unique_classes, class_weights)}

print("Class Weights:", class_weights_dict)  # Optional: To check computed weights

# Define the neural network model with Input layer and additional dropout layers
nn_model = Sequential([
    Input(shape=(X_train.shape[1],)),  # Input layer with the shape of the feature set
    Dense(128, activation='relu'),     # First hidden layer with 128 neurons
    Dropout(0.3),                      # Dropout layer to prevent overfitting
    Dense(64, activation='relu'),      # Second hidden layer with 64 neurons
    Dropout(0.3),                      # Dropout layer
    Dense(32, activation='relu'),      # Third hidden layer with 32 neurons
    Dense(1, activation='sigmoid')     # Output layer for binary classification
])

# Compile the model with a reduced learning rate
nn_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                 loss='binary_crossentropy', metrics=['accuracy'])

# Define EarlyStopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Train the model with class weights and early stopping
history = nn_model.fit(X_train, y_train, epochs=100, batch_size=64, verbose=1,
                       validation_split=0.2, class_weight=class_weights_dict,
                       callbacks=[early_stopping])

# Make predictions on the test set
nn_predictions = (nn_model.predict(X_test) > 0.5).astype("int32")

# Evaluate the model
print("\nEnhanced Neural Network Accuracy:", accuracy_score(y_test, nn_predictions))
print("ROC-AUC Score:", roc_auc_score(y_test, nn_model.predict(X_test)))
print("Classification Report:\n", classification_report(y_test, nn_predictions))

Class Weights: {0: 0.8309098462816784, 1: 1.2554927809165097}


KeyError: 0