In [4]:
# Import necessary libraries
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.formula.api import ols
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import GradientBoostingClassifier,RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, roc_auc_score, classification_report,mean_squared_error, precision_score, recall_score
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from imblearn.ensemble import BalancedBaggingClassifier
#from tensorflow.keras.models import Sequential
#from tensorflow.keras.layers import Dense, Conv1D, Flatten, Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from statsmodels.discrete.discrete_model import MNLogit
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform, randint

In [5]:
# put the dataset which comes from short_dataset_feature_engineering.ipynb 
file_path = '/data/caysar9/results/final_short.csv'
df = pd.read_csv(file_path)

In [20]:
# Define features and target
features = [
    'trigger_lack_physical_activity', 'trigger_physical_activity', 'trigger_poor_sleep',
    'trigger_stress', 'sleep_duration_hours', 'sleep_duration_past_7_days', 'age',
    'migraine_attacks_past7days', 'mean_migraine_duration_past7days', 'total_physical_activity',
    'reported_anxiety', 'reported_depression'
]
target = 'migraine_days_per_month'

X = df[features]
y = df[target]

# Handle missing values in the target variable
if y.isnull().sum() > 0:
    print(f"Missing values in target: {y.isnull().sum()}. Filling with median.")
    y = y.fillna(y.median())  # Fill NaN values with the median of the column

# Ensure all values in the target are within the defined range
print(f"Target value range before binning: {y.min()} to {y.max()}")
if y.min() < 0 or y.max() > 7:
    print("Adjusting out-of-range values.")
    y = y.clip(lower=0, upper=7)  # Clip values to fit within [0, 7]

# Define bins for categorizing the frequencies (Low: 1-2, Medium: 3-4, High: 5-7)
bins = [0, 2, 4, 7]
labels = [1, 2, 3]  # Integers for Low, Medium, High

# Bin the target variable
y_binned = pd.cut(y, bins=bins, labels=labels, include_lowest=True).astype(int)

# Frequency counts before splitting
print("Frequency counts for each class before sampling:")
print(y_binned.value_counts(sort=False))

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y_binned, test_size=0.2, random_state=42, stratify=y_binned)

# Frequency counts before applying SMOTE
print("\nFrequency counts before SMOTE:")
print("Training set:")
print(y_train.value_counts(sort=False))
print("Test set:")
print(y_test.value_counts(sort=False))

# Apply SMOTE to the training data
smote = SMOTE(random_state=42)
X_train_balanced, y_train_balanced = smote.fit_resample(X_train, y_train)

# Frequency counts after applying SMOTE
print("\nFrequency counts after SMOTE:")
print(y_train_balanced.value_counts(sort=False))



Target value range before binning: 1 to 17
Adjusting out-of-range values.
Frequency counts for each class before sampling:
migraine_days_per_month
2    4152
1    6889
3    3361
Name: count, dtype: int64

Frequency counts before SMOTE:
Training set:
migraine_days_per_month
2    3321
3    2689
1    5511
Name: count, dtype: int64
Test set:
migraine_days_per_month
1    1378
3     672
2     831
Name: count, dtype: int64

Frequency counts after SMOTE:
migraine_days_per_month
2    5511
3    5511
1    5511
Name: count, dtype: int64


In [21]:
# Define hyperparameter distributions
param_distributions = {
    'C': uniform(0.01, 10),  # Regularization strength
    'max_iter': randint(100, 1000),  # Number of iterations
    'solver': ['lbfgs', 'newton-cg', 'sag'],  # Solvers that support multinomial
    'multi_class': ['multinomial']  # Multinomial setting for multi-class classification
}

# Initialize the Logistic Regression model
lr_model = LogisticRegression(random_state=42)

# Set up RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=lr_model,
    param_distributions=param_distributions,
    n_iter=20,  # Number of random combinations to try
    scoring='roc_auc_ovo',  # AUC (One-vs-One for multi-class)
    cv=3,  # 3-fold cross-validation
    verbose=1,  # Show progress
    n_jobs=-1,  # Use all available cores
    random_state=42  # For reproducibility
)

# Fit RandomizedSearchCV to the training data
random_search.fit(X_train_balanced, y_train_balanced)

# Get the best model and hyperparameters
best_model = random_search.best_estimator_
print("\nBest Hyperparameters:", random_search.best_params_)

# Train the best model on the balanced training set
best_model.fit(X_train_balanced, y_train_balanced)

# Make predictions and evaluate
y_pred = best_model.predict(X_test)
y_pred_proba = best_model.predict_proba(X_test)  # Probabilities for each class

# Calculate AUC
auc_score = roc_auc_score(y_test, y_pred_proba, multi_class='ovo')
print(f"\nAUC Score (One-vs-One): {auc_score:.4f}")

# Confusion Matrix and Classification Report
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=['Low', 'Medium', 'High']))


Fitting 3 folds for each of 20 candidates, totalling 60 fits


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt


Best Hyperparameters: {'C': 0.08066305219717405, 'max_iter': 956, 'multi_class': 'multinomial', 'solver': 'lbfgs'}





AUC Score (One-vs-One): 0.7764

Confusion Matrix:
[[1029  349    0]
 [ 258  348  225]
 [ 105  147  420]]

Classification Report:
              precision    recall  f1-score   support

         Low       0.74      0.75      0.74      1378
      Medium       0.41      0.42      0.42       831
        High       0.65      0.62      0.64       672

    accuracy                           0.62      2881
   macro avg       0.60      0.60      0.60      2881
weighted avg       0.62      0.62      0.62      2881



In [22]:
# Add a constant to the feature set for the intercept term
X_train_balanced_const = sm.add_constant(X_train_balanced)


# Fit a multinomial logistic regression model using statsmodels
logit_model = sm.MNLogit(y_train_balanced, X_train_balanced_const)
logit_results = logit_model.fit()

# Get the summary, including coefficients and p-values
print(logit_results.summary())

Optimization terminated successfully.
         Current function value: 0.779471
         Iterations 8
                             MNLogit Regression Results                            
Dep. Variable:     migraine_days_per_month   No. Observations:                16533
Model:                             MNLogit   Df Residuals:                    16507
Method:                                MLE   Df Model:                           24
Date:                     Sat, 07 Dec 2024   Pseudo R-squ.:                  0.2905
Time:                             19:50:53   Log-Likelihood:                -12887.
converged:                            True   LL-Null:                       -18163.
Covariance Type:                 nonrobust   LLR p-value:                     0.000
       migraine_days_per_month=2       coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------------------------
const                   

In [23]:
# Define hyperparameter distributions
param_distributions = {
    'criterion': ['gini', 'entropy', 'log_loss'],  # Splitting criteria
    'max_depth': randint(3, 50),  # Maximum depth of the tree
    'min_samples_split': randint(2, 20),  # Minimum number of samples required to split a node
    'min_samples_leaf': randint(1, 10),  # Minimum number of samples in a leaf node
    'max_features': [None, 'sqrt', 'log2']  # Number of features considered for the best split
}

# Initialize the Decision Tree model
dt_model = DecisionTreeClassifier(random_state=42)

# Set up RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=dt_model,
    param_distributions=param_distributions,
    n_iter=20,  # Number of random combinations to try
    scoring='roc_auc_ovo',  # Evaluate based on AUC for multi-class problems
    cv=3,  # 3-fold cross-validation
    verbose=1,  # Show progress
    n_jobs=-1,  # Use all available cores
    random_state=42  # For reproducibility
)

# Fit RandomizedSearchCV to the training data
random_search.fit(X_train_balanced, y_train_balanced)

# Get the best model and hyperparameters
best_model = random_search.best_estimator_
print("Best Hyperparameters:", random_search.best_params_)

# Train the best model on the balanced training set
best_model.fit(X_train_balanced, y_train_balanced)

# Make predictions and evaluate
y_pred = best_model.predict(X_test)
y_pred_proba = best_model.predict_proba(X_test)  # Probabilities for each class

# Calculate AUC
auc_score = roc_auc_score(y_test, y_pred_proba, multi_class='ovo')
print(f"\nAUC Score (One-vs-One): {auc_score:.4f}")

# Confusion Matrix and Classification Report
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["Low", "Medium", "High"]))

Fitting 3 folds for each of 20 candidates, totalling 60 fits


Best Hyperparameters: {'criterion': 'log_loss', 'max_depth': 5, 'max_features': None, 'min_samples_leaf': 3, 'min_samples_split': 8}

AUC Score (One-vs-One): 0.7904

Confusion Matrix:
[[1275  103    0]
 [ 440  311   80]
 [ 178  140  354]]

Classification Report:
              precision    recall  f1-score   support

         Low       0.67      0.93      0.78      1378
      Medium       0.56      0.37      0.45       831
        High       0.82      0.53      0.64       672

    accuracy                           0.67      2881
   macro avg       0.68      0.61      0.62      2881
weighted avg       0.67      0.67      0.65      2881



In [24]:
# Define hyperparameter distributions
param_distributions = {
    'n_estimators': randint(50, 300),  # Number of trees in the forest
    'max_depth': randint(5, 50),  # Maximum depth of each tree
    'min_samples_split': randint(2, 20),  # Minimum samples to split a node
    'min_samples_leaf': randint(1, 10),  # Minimum samples in a leaf node
    'max_features': ['sqrt', 'log2', None],  # Number of features to consider for the best split
    'bootstrap': [True, False]  # Whether bootstrap samples are used
}

# Initialize the Random Forest model
rf_model = RandomForestClassifier(random_state=42)

# Set up RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=rf_model,
    param_distributions=param_distributions,
    n_iter=20,  # Number of random combinations to try
    scoring='roc_auc_ovo',  # Evaluate based on AUC for multi-class classification
    cv=3,  # 3-fold cross-validation
    verbose=1,  # Show progress
    n_jobs=-1,  # Use all available cores
    random_state=42  # For reproducibility
)

# Fit RandomizedSearchCV to the training data
random_search.fit(X_train_balanced, y_train_balanced)

# Get the best model and hyperparameters
best_model = random_search.best_estimator_
print("Best Hyperparameters:", random_search.best_params_)

# Train the best model on the balanced training set
best_model.fit(X_train_balanced, y_train_balanced)

# Make predictions on the test set
y_pred = best_model.predict(X_test)
y_pred_proba = best_model.predict_proba(X_test)  # Probabilities for each class

# Calculate AUC
auc_score = roc_auc_score(y_test, y_pred_proba, multi_class='ovo')
print(f"\nAUC Score (One-vs-One): {auc_score:.4f}")

# Confusion Matrix and Classification Report
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["Low", "Medium", "High"]))


Fitting 3 folds for each of 20 candidates, totalling 60 fits


Best Hyperparameters: {'bootstrap': False, 'max_depth': 34, 'max_features': 'log2', 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 253}

AUC Score (One-vs-One): 0.7856

Confusion Matrix:
[[1166  188   24]
 [ 356  397   78]
 [ 150  179  343]]

Classification Report:
              precision    recall  f1-score   support

         Low       0.70      0.85      0.76      1378
      Medium       0.52      0.48      0.50       831
        High       0.77      0.51      0.61       672

    accuracy                           0.66      2881
   macro avg       0.66      0.61      0.63      2881
weighted avg       0.66      0.66      0.65      2881



In [25]:
# Get feature importance
feature_importance = best_model.feature_importances_
feature_names = X_train_balanced.columns
importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importance}).sort_values(by='Importance', ascending=False)
print("\nFeature Importance:\n", importance_df)



Feature Importance:
                              Feature  Importance
7         migraine_attacks_past7days    0.418731
8   mean_migraine_duration_past7days    0.137442
5         sleep_duration_past_7_days    0.116500
4               sleep_duration_hours    0.110415
6                                age    0.102991
9            total_physical_activity    0.076249
2                 trigger_poor_sleep    0.009209
3                     trigger_stress    0.008720
10                  reported_anxiety    0.006924
11               reported_depression    0.006657
0     trigger_lack_physical_activity    0.004732
1          trigger_physical_activity    0.001429


In [26]:
# Define hyperparameter distributions
param_distributions = {
    'n_estimators': randint(50, 300),  # Number of boosting stages
    'learning_rate': uniform(0.01, 0.3),  # Learning rate for boosting
    'max_depth': randint(3, 20),  # Maximum depth of each tree
    'min_samples_split': randint(2, 20),  # Minimum number of samples required to split an internal node
    'min_samples_leaf': randint(1, 10),  # Minimum number of samples required in a leaf node
    'subsample': uniform(0.7, 0.3),  # Fraction of samples used for fitting each base learner
    'max_features': ['sqrt', 'log2', None]  # Number of features to consider for splits
}

# Initialize the Gradient Boosting model
gb_model = GradientBoostingClassifier(random_state=42)

# Set up RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=gb_model,
    param_distributions=param_distributions,
    n_iter=20,  # Number of random combinations to try
    scoring='roc_auc_ovo',  # Evaluate based on AUC for multi-class classification
    cv=3,  # 3-fold cross-validation
    verbose=1,  # Show progress
    n_jobs=-1,  # Use all available cores
    random_state=42  # For reproducibility
)

# Fit RandomizedSearchCV to the training data
random_search.fit(X_train_balanced, y_train_balanced)

# Get the best model and hyperparameters
best_model = random_search.best_estimator_
print("Best Hyperparameters:", random_search.best_params_)

# Train the best model on the balanced training set
best_model.fit(X_train_balanced, y_train_balanced)

# Make predictions and evaluate
y_pred = best_model.predict(X_test)
y_pred_proba = best_model.predict_proba(X_test)  # Probabilities for each class

# Calculate AUC
auc_score = roc_auc_score(y_test, y_pred_proba, multi_class='ovo')
print(f"\nAUC Score (One-vs-One): {auc_score:.4f}")

# Confusion Matrix and Classification Report
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["Low", "Medium", "High"]))


Fitting 3 folds for each of 20 candidates, totalling 60 fits
Best Hyperparameters: {'learning_rate': 0.30087538832936755, 'max_depth': 14, 'max_features': 'log2', 'min_samples_leaf': 4, 'min_samples_split': 15, 'n_estimators': 144, 'subsample': 0.976562270506935}

AUC Score (One-vs-One): 0.7808

Confusion Matrix:
[[1142  198   38]
 [ 345  391   95]
 [ 146  166  360]]

Classification Report:
              precision    recall  f1-score   support

         Low       0.70      0.83      0.76      1378
      Medium       0.52      0.47      0.49       831
        High       0.73      0.54      0.62       672

    accuracy                           0.66      2881
   macro avg       0.65      0.61      0.62      2881
weighted avg       0.65      0.66      0.65      2881



In [27]:
# Get feature importance
feature_importance = best_model.feature_importances_
feature_names = X_train_balanced.columns
importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': feature_importance}).sort_values(by='Importance', ascending=False)
print("\nFeature Importance:\n", importance_df)



Feature Importance:
                              Feature  Importance
7         migraine_attacks_past7days    0.432284
8   mean_migraine_duration_past7days    0.128902
5         sleep_duration_past_7_days    0.119150
4               sleep_duration_hours    0.108179
6                                age    0.095664
9            total_physical_activity    0.075555
3                     trigger_stress    0.009697
2                 trigger_poor_sleep    0.009686
10                  reported_anxiety    0.007680
11               reported_depression    0.007133
0     trigger_lack_physical_activity    0.004468
1          trigger_physical_activity    0.001603


In [28]:
# Define hyperparameter distributions
param_distributions = {
    'n_neighbors': randint(1, 50),  # Number of neighbors
    'weights': ['uniform', 'distance'],  # Weighting strategies
    'metric': ['euclidean', 'manhattan', 'minkowski'],  # Distance metrics
    'p': randint(1, 3)  # distance parameter 
}

# Initialize the K-Nearest Neighbors model
knn_model = KNeighborsClassifier()

# Set up RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=knn_model,
    param_distributions=param_distributions,
    n_iter=20,  # Number of random combinations to try
    scoring='roc_auc_ovo',  # Evaluate based on AUC for multi-class classification
    cv=3,  # 3-fold cross-validation
    verbose=1,  # Show progress
    n_jobs=-1,  # Use all available cores
    random_state=42  # For reproducibility
)

# Fit RandomizedSearchCV to the training data
random_search.fit(X_train_balanced, y_train_balanced)

# Get the best model and hyperparameters
best_model = random_search.best_estimator_
print("Best Hyperparameters:", random_search.best_params_)

# Train the best model on the balanced training set
best_model.fit(X_train_balanced, y_train_balanced)

# Make predictions and evaluate
y_pred = best_model.predict(X_test)
y_pred_proba = best_model.predict_proba(X_test)  # Probabilities for each class

# Calculate AUC
auc_score = roc_auc_score(y_test, y_pred_proba, multi_class='ovo')
print(f"\nAUC Score (One-vs-One): {auc_score:.4f}")

# Confusion Matrix and Classification Report
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=["Low", "Medium", "High"]))


Fitting 3 folds for each of 20 candidates, totalling 60 fits


Best Hyperparameters: {'metric': 'manhattan', 'n_neighbors': 6, 'p': 2, 'weights': 'distance'}

AUC Score (One-vs-One): 0.7291

Confusion Matrix:
[[880 341 157]
 [302 354 175]
 [103 176 393]]

Classification Report:
              precision    recall  f1-score   support

         Low       0.68      0.64      0.66      1378
      Medium       0.41      0.43      0.42       831
        High       0.54      0.58      0.56       672

    accuracy                           0.56      2881
   macro avg       0.54      0.55      0.55      2881
weighted avg       0.57      0.56      0.57      2881



In [29]:
'''from sklearn.metrics import roc_auc_score, confusion_matrix, classification_report
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from scikeras.wrappers import KerasClassifier
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd

# Define features and target
features = [
    'trigger_lack_physical_activity', 'trigger_physical_activity', 'trigger_poor_sleep',
    'trigger_stress', 'sleep_duration_hours', 'sleep_duration_past_7_days', 'age',
    'migraine_attacks_past7days', 'mean_migraine_duration_past7days', 'total_physical_activity',
    'reported_anxiety', 'reported_depression'
]
target = 'migraine_days_per_month'

X = df[features]
y = df[target]

# Handle missing values in the target variable
if y.isnull().sum() > 0:
    print(f"Missing values in target: {y.isnull().sum()}. Filling with median.")
    y = y.fillna(y.median())

# Ensure all values in the target are within the defined range
print(f"Target value range before binning: {y.min()} to {y.max()}")
if y.min() < 0 or y.max() > 7:
    print("Adjusting out-of-range values.")
    y = y.clip(lower=0, upper=7)  # Clip values to fit within [0, 7]
    
# Define bins for categorizing the frequencies (Low: 1-2, Medium: 3-4, High: 5-7)
bins = [0, 2, 4, 7]
labels = [0, 1, 2]  # Integers for Low, Medium, High
y_binned = pd.cut(y, bins=bins, labels=labels, include_lowest=True).astype(int)

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y_binned, test_size=0.2, random_state=42, stratify=y_binned)

# Apply SMOTE to the training set
smote = SMOTE(random_state=42)
X_train_smote, y_train_smote = smote.fit_resample(X_train, y_train)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_smote)
X_test_scaled = scaler.transform(X_test)

# Define a function to create the neural network model
def create_model(learning_rate=0.001, neurons=[64, 32, 16]):
    model = Sequential()
    model.add(Dense(neurons[0], activation='relu', input_shape=(X_train_scaled.shape[1],)))
    model.add(Dense(neurons[1], activation='relu'))
    model.add(Dense(neurons[2], activation='relu'))
    model.add(Dense(3, activation='softmax'))  # 3 classes: Low, Medium, High
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Wrap the model for RandomizedSearchCV
keras_clf = KerasClassifier(model=create_model, verbose=0)

# Define hyperparameter distributions
param_distributions = {
    "model__learning_rate": [0.001, 0.01, 0.1],
    "model__neurons": [[64, 32, 16], [128, 64, 32], [32, 16, 8]],
    "batch_size": [16, 32, 64],
    "epochs": [20, 50, 100]
}

# Set up RandomizedSearchCV
random_search = RandomizedSearchCV(
    estimator=keras_clf,
    param_distributions=param_distributions,
    n_iter=10,  # Number of random combinations to try
    scoring='roc_auc_ovo',  # Optimize AUC for multi-class classification
    cv=3,  # 3-fold cross-validation
    verbose=1,
    n_jobs=-1,  # Use all available cores
    random_state=42
)

# Fit RandomizedSearchCV to the training data
random_search.fit(X_train_scaled, y_train_smote)

# Get the best model and parameters
best_model = random_search.best_estimator_
print("Best Hyperparameters:", random_search.best_params_)

# Evaluate the model
y_pred = best_model.predict(X_test_scaled)
y_pred_proba = best_model.predict_proba(X_test_scaled)

# Calculate AUC
auc_score = roc_auc_score(pd.get_dummies(y_test), y_pred_proba, multi_class='ovo')
print(f"\nAUC Score (One-vs-One): {auc_score:.4f}")

# Confusion Matrix and Classification Report
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

target_names = ["Low", "Medium", "High"]
print("\nClassification Report:")
print(classification_report(y_test, y_pred, target_names=target_names))
'''

' \nfrom sklearn.metrics import roc_auc_score, confusion_matrix, classification_report\nfrom sklearn.model_selection import RandomizedSearchCV, train_test_split\nfrom tensorflow.keras.models import Sequential\nfrom tensorflow.keras.layers import Dense\nfrom tensorflow.keras.optimizers import Adam\nfrom scikeras.wrappers import KerasClassifier\nfrom imblearn.over_sampling import SMOTE\nfrom sklearn.preprocessing import StandardScaler\nimport numpy as np\n\n# Define features and target\nfeatures = [\'trigger_lack_physical_activity\',\'trigger_physical_activity\',\'trigger_poor_sleep\',\'trigger_stress\',\'sleep_duration_hours\',\'sleep_duration_past_7_days\',\'age\',\'migraine_attacks_past7days\',\'mean_migraine_duration_past7days\',\'total_physical_activity\',\'reported_anxiety\', \'reported_depression\']\n\ntarget = \'attack_duration_hours\'\n\nX = df[features]\ny = df[target]\n\n# Define bins for migraine duration (Low: 4-9, Medium: 10-22, High: 23-72 hours)\nbins = [4, 9, 22, 72]\ny_