In [10]:
import json
import pandas as pd
import numpy as np
import shap
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Bidirectional, LSTM, GRU, Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, RobustScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.utils.class_weight import compute_class_weight
import os
from tqdm import tqdm

### Loading and Preprocessing Data

In [11]:
file_path = "../data/monthly_data/monthly_data.json" 
with open(file_path, "r") as file:
    data = json.load(file)
df = pd.DataFrame(data)

# preprocessing
df["month"] = pd.to_datetime(df["month"])
df = df[df["status"].isin(["graduated", "retired"])]

numeric_features = [
    "avg_response_time", "avg_first_response_time", 
    "accepted_prs", "avg_time_to_acceptance", "rejected_prs",
    "avg_time_to_rejection", "unresolved_prs", "avg_thread_length",
    "new_prs", "new_comments", "active_devs"
]

# Check if total_active_devs is null in any project - may signify no active developers for PROJECT
# has_nan = df["total_active_devs"].isna().any()
# print(has_nan)

df["total_active_devs"] = df["total_active_devs"].replace(0, np.nan) # this is for sanity only, there are no empty total_active_devs (has been checked)
for feature in numeric_features:
    df[feature] = df[feature] / df["total_active_devs"]

# Fill missing values with 0
df[numeric_features] = df[numeric_features].fillna(0)
df["status"] = df["status"].map({"graduated": 1, "retired": 0})
# Drop total active devs
df = df.drop(columns=["total_active_devs"])
df = df.sort_values(by=["repo", "month"])
df.head(10)

Unnamed: 0,listid,repo,status,month,avg_response_time,avg_first_response_time,active_devs,accepted_prs,avg_time_to_acceptance,rejected_prs,avg_time_to_rejection,unresolved_prs,avg_thread_length,new_prs,new_comments
0,51,ACE,1,2014-09-01,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.2,0.0,0.2,0.0
1,51,ACE,1,2014-10-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0
2,51,ACE,1,2014-11-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0
3,51,ACE,1,2014-12-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0
4,51,ACE,1,2015-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0
5,51,ACE,1,2015-02-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0
6,51,ACE,1,2015-03-01,0.0,0.0,0.0,0.0,0.0,0.2,3376272.8,0.0,0.0,0.0,0.0
7,51,ACE,1,2016-02-01,0.0,0.0,0.2,0.0,0.0,2.2,33260.2,0.0,0.0,2.2,0.0
8,51,ACE,1,2018-07-01,0.0,0.0,0.2,0.0,0.0,0.0,0.0,0.2,0.0,0.2,0.0
9,51,ACE,1,2018-08-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0


### Scaling the Features

In [12]:
scaler = MinMaxScaler()
df[numeric_features] = scaler.fit_transform(df[numeric_features])

### Ensuring Correct Train-Test Split (Keep Class Distribution same)

In [13]:
# Group projects by first status (Graduated/Retired)
project_labels = df.groupby("repo")["status"].first()

# Split projects while maintaining class balance
train_projects, test_projects = train_test_split(
    project_labels.index,  # Project names
    test_size=0.15, 
    random_state=42, 
    stratify=project_labels  # Maintain class balance
)

# Split data based on project assignments
train_data = df[df['repo'].isin(train_projects)]
test_data = df[df['repo'].isin(test_projects)]

print(f"Train data shape: {train_data.shape}")
print(f"Test data shape: {test_data.shape}")

print(f"Length of data df: {df.shape[0]}")
print(f"Length of train data: {train_data.shape[0]}")
print(f"Length of test data: {test_data.shape[0]}")

if train_data.shape[0] + test_data.shape[0] == df.shape[0]:
    print("train and test data's first dimension is represents project data month-wise, hence the numbers add up to total df length")


Train data shape: (14886, 15)
Test data shape: (2800, 15)
Length of data df: 17686
Length of train data: 14886
Length of test data: 2800
train and test data's first dimension is represents project data month-wise, hence the numbers add up to total df length


In [14]:
# Check for common projects between train and test data
common_projects = set(train_projects).intersection(set(test_projects))

if common_projects:
    print(f"Warning: Common projects in both train and test sets: {common_projects}")
else:
    print("No project leakage detected.")

# Verify the number of unique projects in train and test sets
print(f"Number of unique projects in training set: {len(train_projects)}")
print(f"Number of unique projects in test set: {len(test_projects)}")

# Verify that the total number of unique projects is the same as the total dataset
total_unique_projects = len(df['repo'].unique())
assert len(train_projects) + len(test_projects) == total_unique_projects, \
    "Mismatch in total number of unique projects between train and test sets."

No project leakage detected.
Number of unique projects in training set: 167
Number of unique projects in test set: 30


### Apply Zero Padding + Create Monthly Sequence for Model

In [15]:
def create_project_sequences(data, max_timesteps):
    X, y = [], []
    
    grouped_data = data.groupby("repo")  # Process each project separately
    
    for _, group in grouped_data:
        group = group.sort_values(by="month")  # Ensure chronological order
        features = group[numeric_features].values  # Extract numeric features
        label = group["status"].values[-1]  # Take the final status (Graduated/Retired)

        X.append(features)  # Full sequence of project
        y.append(label)  # Assign label for project

    # Pad sequences to the global max_timesteps
    X_padded = np.array([np.pad(seq, ((0, max_timesteps - len(seq)), (0, 0)), mode="constant") for seq in X])

    # print(f"Shape of X without padding: {np.array(X).shape}") # this raise inhomogeneous shape error, which means each project has different number of months
    return np.array(X_padded), np.array(y)

# Step 1: Find the global maximum time steps across train & test
max_timesteps = max(train_data.groupby("repo").size().max(), test_data.groupby("repo").size().max())

# Step 2: Generate sequences with consistent padding
X_train, y_train = create_project_sequences(train_data, max_timesteps)
X_test, y_test = create_project_sequences(test_data, max_timesteps)

X_train, X_val, y_train, y_val = train_test_split(
    X_train, y_train, 
    test_size=0.15,  # 15% for validation
    random_state=42, 
    stratify=y_train  # Preserve class distribution (Graduated/Retired)
)

# Print shapes
print(f"X_train shape: {X_train.shape}")  # (num_projects_train, max_timesteps, num_features)
print(f"y_train shape: {y_train.shape}")  # (num_projects_train,)
print(f"X_val shape: {X_val.shape}")
print(f"y_val shape: {y_val.shape}")
print(f"X_test shape: {X_test.shape}")  # (num_projects_test, max_timesteps, num_features)
print(f"y_test shape: {y_test.shape}")  # (num_projects_test,)



X_train shape: (141, 173, 11)
y_train shape: (141,)
X_val shape: (26, 173, 11)
y_val shape: (26,)
X_test shape: (30, 173, 11)
y_test shape: (30,)


### Compute Class Weights

In [16]:

# Compute class weights for balanced training
class_weights = compute_class_weight(class_weight="balanced", classes=np.unique(y_train), y=y_train)
class_weight_dict = {0: class_weights[0], 1: class_weights[1]}

# Print computed class weights
print(f"Computed Class Weights: {class_weight_dict}")

Computed Class Weights: {0: np.float64(4.147058823529412), 1: np.float64(0.5685483870967742)}


### Check Class Distributions to confirm Stratified Sampling worked

In [17]:
# Convert y_train and y_val to pandas Series for value_counts
y_train_series = pd.Series(y_train)
y_val_series = pd.Series(y_val)
y_test_series = pd.Series(y_test)

# Check class distribution in the training set
print("Class distribution in training set:")
print(y_train_series.value_counts(normalize=True))  # Normalized to show proportions

# Check class distribution in the validation set
print("\nClass distribution in validation set:")
print(y_val_series.value_counts(normalize=True))  # Normalized to show proportions

# Check class distribution in the test set  
print("\nClass distribution in test set:")
print(y_test_series.value_counts(normalize=True))  # Normal


Class distribution in training set:
1    0.879433
0    0.120567
Name: proportion, dtype: float64

Class distribution in validation set:
1    0.884615
0    0.115385
Name: proportion, dtype: float64

Class distribution in test set:
1    0.866667
0    0.133333
Name: proportion, dtype: float64


### Check all classes present in each set

In [18]:
# Confirm all classes are present in each test
print(f"Unique classes in y_train: {np.unique(y_train)}")
print(f"Unique classes in y_val: {np.unique(y_val)}")
print(f"Unique classes in y_test: {np.unique(y_test)}")

Unique classes in y_train: [0 1]
Unique classes in y_val: [0 1]
Unique classes in y_test: [0 1]


### Define and Evaluate LSTM Model (4 times)

In [24]:
# Set the number of runs
n_runs = 4

# Wrap the loop in tqdm for progress tracking
for run in tqdm(range(n_runs), desc="Running models", unit="model"):

    print(f"\n--- Running model {run+1}/{n_runs} ---\n")

    # Define the LSTM model
    model = Sequential([
        Input(shape=(max_timesteps, len(numeric_features))),
        LSTM(512, return_sequences=True),
        Dropout(0.2),
        LSTM(256, return_sequences=True),
        Dropout(0.2),
        LSTM(256),
        Dropout(0.2),
        Dense(25, activation="relu"),
        Dense(1, activation="sigmoid")
    ])

    # Define an optimizer with a specific learning rate
    optimizer = Adam(learning_rate=0.000005)  # Default is 0.001

    # Compile the model
    model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])

    # Define early stopping
    early_stopping = EarlyStopping(
        monitor='val_loss',  # Metric to monitor
        patience=5,          # Number of epochs to wait for improvement
        restore_best_weights=True  # Restore the best weights after stopping
    )

    # Train the model with early stopping and class weights
    model.fit(
        X_train, y_train,
        epochs=30,
        batch_size=3,
        validation_data=(X_val, y_val),
        callbacks=[early_stopping],  # Add early stopping callback
        class_weight=class_weight_dict  # Apply class weights
    )

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test Accuracy: {accuracy:.4f}")

    # Accuracy for the graduated and retired project 
    # Make predictions
    y_pred_prob = model.predict(X_test)
    y_pred = (y_pred_prob.astype("float32") > 0.5).astype(int)  # Convert probabilities to binary (0 or 1)

    # Compute overall accuracy
    overall_accuracy = accuracy_score(y_test, y_pred)
    print(f"Overall Accuracy: {overall_accuracy:.4f}")

    # Compute classification report (precision, recall, F1-score, and support)
    report = classification_report(y_test, y_pred, target_names=["Retired", "Graduated"], output_dict=True)
    print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=["Retired", "Graduated"]))

    # Extract overall precision, recall, and F1-score
    overall_precision = report["weighted avg"]["precision"]
    overall_recall = report["weighted avg"]["recall"]
    overall_f1 = report["weighted avg"]["f1-score"]

    # Print all metrics
    print("\nOverall Model Performance:")
    print(f"Accuracy:  {overall_accuracy:.4f}")
    print(f"Precision: {overall_precision:.4f}")
    print(f"Recall:    {overall_recall:.4f}")
    print(f"F1-Score:  {overall_f1:.4f}")

    # Compute confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    print("\nConfusion Matrix:\n", conf_matrix)

Running models:   0%|          | 0/4 [00:00<?, ?model/s]


--- Running model 1/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 425ms/step - accuracy: 0.1832 - loss: 0.8239 - val_accuracy: 0.1923 - val_loss: 0.6932
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 398ms/step - accuracy: 0.2515 - loss: 0.7951 - val_accuracy: 0.2308 - val_loss: 0.6933
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 423ms/step - accuracy: 0.5439 - loss: 0.6170 - val_accuracy: 0.3077 - val_loss: 0.6931
Epoch 4/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 427ms/step - accuracy: 0.6083 - loss: 0.5936 - val_accuracy: 0.3846 - val_loss: 0.6930
Epoch 5/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 421ms/step - accuracy: 0.3882 - loss: 0.7354 - val_accuracy: 0.3077 - val_loss: 0.6930
Epoch 6/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 422ms/step - accuracy: 0.3111 - loss: 0.8893 - val_accuracy: 0.3077 - val_loss: 0

Running models:  25%|██▌       | 1/4 [09:59<29:58, 599.47s/model]

Overall Accuracy: 0.9000

Classification Report:
               precision    recall  f1-score   support

     Retired       0.60      0.75      0.67         4
   Graduated       0.96      0.92      0.94        26

    accuracy                           0.90        30
   macro avg       0.78      0.84      0.80        30
weighted avg       0.91      0.90      0.90        30


Overall Model Performance:
Accuracy:  0.9000
Precision: 0.9120
Recall:    0.9000
F1-Score:  0.9046

Confusion Matrix:
 [[ 3  1]
 [ 2 24]]

--- Running model 2/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 356ms/step - accuracy: 0.5040 - loss: 0.6774 - val_accuracy: 0.2692 - val_loss: 0.6931
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 376ms/step - accuracy: 0.3820 - loss: 0.6795 - val_accuracy: 0.8846 - val_loss: 0.6930
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 404ms/step - accuracy: 0.2810 - loss: 0.8120 - val_acc

Running models:  50%|█████     | 2/4 [18:32<18:17, 548.83s/model]

Overall Accuracy: 0.8667

Classification Report:
               precision    recall  f1-score   support

     Retired       0.50      1.00      0.67         4
   Graduated       1.00      0.85      0.92        26

    accuracy                           0.87        30
   macro avg       0.75      0.92      0.79        30
weighted avg       0.93      0.87      0.88        30


Overall Model Performance:
Accuracy:  0.8667
Precision: 0.9333
Recall:    0.8667
F1-Score:  0.8833

Confusion Matrix:
 [[ 4  0]
 [ 4 22]]

--- Running model 3/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 374ms/step - accuracy: 0.3398 - loss: 0.7217 - val_accuracy: 0.8846 - val_loss: 0.6930
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 375ms/step - accuracy: 0.8884 - loss: 0.6279 - val_accuracy: 0.2308 - val_loss: 0.6932
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 374ms/step - accuracy: 0.5188 - loss: 0.6483 - val_acc

Running models:  75%|███████▌  | 3/4 [27:14<08:56, 536.56s/model]

Overall Accuracy: 0.8667

Classification Report:
               precision    recall  f1-score   support

     Retired       0.50      1.00      0.67         4
   Graduated       1.00      0.85      0.92        26

    accuracy                           0.87        30
   macro avg       0.75      0.92      0.79        30
weighted avg       0.93      0.87      0.88        30


Overall Model Performance:
Accuracy:  0.8667
Precision: 0.9333
Recall:    0.8667
F1-Score:  0.8833

Confusion Matrix:
 [[ 4  0]
 [ 4 22]]

--- Running model 4/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 357ms/step - accuracy: 0.7768 - loss: 0.6964 - val_accuracy: 0.8846 - val_loss: 0.6930
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 360ms/step - accuracy: 0.9120 - loss: 0.6124 - val_accuracy: 0.8846 - val_loss: 0.6929
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 371ms/step - accuracy: 0.8834 - loss: 0.7019 - val_acc

Running models: 100%|██████████| 4/4 [35:42<00:00, 535.58s/model]

Overall Accuracy: 0.9000

Classification Report:
               precision    recall  f1-score   support

     Retired       0.60      0.75      0.67         4
   Graduated       0.96      0.92      0.94        26

    accuracy                           0.90        30
   macro avg       0.78      0.84      0.80        30
weighted avg       0.91      0.90      0.90        30


Overall Model Performance:
Accuracy:  0.9000
Precision: 0.9120
Recall:    0.9000
F1-Score:  0.9046

Confusion Matrix:
 [[ 3  1]
 [ 2 24]]





### Define and Evaluate BiLSTM (4 times)

In [25]:
# Set the number of runs
n_runs = 4

# Wrap the loop in tqdm for progress tracking
for run in tqdm(range(n_runs), desc="Running models", unit="model"):

    print(f"\n--- Running model {run+1}/{n_runs} ---\n")

    # define biLSTM model
    model = Sequential([
        Input(shape=(max_timesteps, len(numeric_features))),
        Bidirectional(LSTM(512, return_sequences=True)),
        Dropout(0.2),
        Bidirectional(LSTM(256, return_sequences=True)),
        Dropout(0.2),
        Bidirectional(LSTM(256)),
        Dropout(0.2),
        Dense(25, activation="relu"),
        Dense(1, activation="sigmoid")
    ])

    # Define an optimizer with a specific learning rate
    optimizer = Adam(learning_rate=0.000005)  # Default is 0.001

    # Compile the model
    model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])

    # Define early stopping
    early_stopping = EarlyStopping(
        monitor='val_loss',  # Metric to monitor
        patience=5,          # Number of epochs to wait for improvement
        restore_best_weights=True  # Restore the best weights after stopping
    )

    # Train the model with early stopping and class weights
    model.fit(
        X_train, y_train,
        epochs=30,
        batch_size=3,
        validation_data=(X_val, y_val),
        callbacks=[early_stopping],  # Add early stopping callback
        class_weight=class_weight_dict  # Apply class weights
    )

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test Accuracy: {accuracy:.4f}")

    # Accuracy for the graduated and retired project 
    # Make predictions
    y_pred_prob = model.predict(X_test)
    y_pred = (y_pred_prob.astype("float32") > 0.5).astype(int)  # Convert probabilities to binary (0 or 1)

    # Compute overall accuracy
    overall_accuracy = accuracy_score(y_test, y_pred)
    print(f"Overall Accuracy: {overall_accuracy:.4f}")

    # Compute classification report (precision, recall, F1-score, and support)
    report = classification_report(y_test, y_pred, target_names=["Retired", "Graduated"], output_dict=True)
    print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=["Retired", "Graduated"]))

    # Extract overall precision, recall, and F1-score
    overall_precision = report["weighted avg"]["precision"]
    overall_recall = report["weighted avg"]["recall"]
    overall_f1 = report["weighted avg"]["f1-score"]

    # Print all metrics
    print("\nOverall Model Performance:")
    print(f"Accuracy:  {overall_accuracy:.4f}")
    print(f"Precision: {overall_precision:.4f}")
    print(f"Recall:    {overall_recall:.4f}")
    print(f"F1-Score:  {overall_f1:.4f}")

    # Compute confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    print("\nConfusion Matrix:\n", conf_matrix)

Running models:   0%|          | 0/4 [00:00<?, ?model/s]


--- Running model 1/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 629ms/step - accuracy: 0.4586 - loss: 0.6361 - val_accuracy: 0.6923 - val_loss: 0.6929
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 654ms/step - accuracy: 0.5912 - loss: 0.6512 - val_accuracy: 0.5000 - val_loss: 0.6932
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 829ms/step - accuracy: 0.5119 - loss: 0.6266 - val_accuracy: 0.6538 - val_loss: 0.6926
Epoch 4/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 824ms/step - accuracy: 0.5995 - loss: 0.6450 - val_accuracy: 0.5385 - val_loss: 0.6930
Epoch 5/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 780ms/step - accuracy: 0.4929 - loss: 0.8395 - val_accuracy: 0.5385 - val_loss: 0.6929
Epoch 6/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 779ms/step - accuracy: 0.4794 - loss: 0.6437 - val_accuracy: 0.5769 - val_loss: 0

Running models:  25%|██▌       | 1/4 [19:42<59:08, 1182.98s/model]

Overall Accuracy: 0.9333

Classification Report:
               precision    recall  f1-score   support

     Retired       0.67      1.00      0.80         4
   Graduated       1.00      0.92      0.96        26

    accuracy                           0.93        30
   macro avg       0.83      0.96      0.88        30
weighted avg       0.96      0.93      0.94        30


Overall Model Performance:
Accuracy:  0.9333
Precision: 0.9556
Recall:    0.9333
F1-Score:  0.9387

Confusion Matrix:
 [[ 4  0]
 [ 2 24]]

--- Running model 2/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 631ms/step - accuracy: 0.4977 - loss: 0.6135 - val_accuracy: 0.5385 - val_loss: 0.6933
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 624ms/step - accuracy: 0.4484 - loss: 0.6818 - val_accuracy: 0.5385 - val_loss: 0.6933
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m37s[0m 785ms/step - accuracy: 0.4718 - loss: 0.6761 - val_acc

Running models:  50%|█████     | 2/4 [23:54<21:09, 634.94s/model] 

Overall Accuracy: 0.5667

Classification Report:
               precision    recall  f1-score   support

     Retired       0.24      1.00      0.38         4
   Graduated       1.00      0.50      0.67        26

    accuracy                           0.57        30
   macro avg       0.62      0.75      0.52        30
weighted avg       0.90      0.57      0.63        30


Overall Model Performance:
Accuracy:  0.5667
Precision: 0.8980
Recall:    0.5667
F1-Score:  0.6286

Confusion Matrix:
 [[ 4  0]
 [13 13]]

--- Running model 3/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 601ms/step - accuracy: 0.2102 - loss: 0.7825 - val_accuracy: 0.3077 - val_loss: 0.6936
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 615ms/step - accuracy: 0.3768 - loss: 0.6271 - val_accuracy: 0.4231 - val_loss: 0.6936
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 815ms/step - accuracy: 0.4197 - loss: 0.6538 - val_acc

Running models:  75%|███████▌  | 3/4 [28:51<08:00, 480.71s/model]

Overall Accuracy: 0.4667

Classification Report:
               precision    recall  f1-score   support

     Retired       0.20      1.00      0.33         4
   Graduated       1.00      0.38      0.56        26

    accuracy                           0.47        30
   macro avg       0.60      0.69      0.44        30
weighted avg       0.89      0.47      0.53        30


Overall Model Performance:
Accuracy:  0.4667
Precision: 0.8933
Recall:    0.4667
F1-Score:  0.5259

Confusion Matrix:
 [[ 4  0]
 [16 10]]

--- Running model 4/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 617ms/step - accuracy: 0.9167 - loss: 0.6017 - val_accuracy: 0.8846 - val_loss: 0.6921
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 635ms/step - accuracy: 0.8955 - loss: 0.6790 - val_accuracy: 0.8846 - val_loss: 0.6918
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 837ms/step - accuracy: 0.8328 - loss: 0.7415 - val_acc

Running models: 100%|██████████| 4/4 [48:11<00:00, 722.81s/model]

Overall Accuracy: 0.9000

Classification Report:
               precision    recall  f1-score   support

     Retired       0.57      1.00      0.73         4
   Graduated       1.00      0.88      0.94        26

    accuracy                           0.90        30
   macro avg       0.79      0.94      0.83        30
weighted avg       0.94      0.90      0.91        30


Overall Model Performance:
Accuracy:  0.9000
Precision: 0.9429
Recall:    0.9000
F1-Score:  0.9106

Confusion Matrix:
 [[ 4  0]
 [ 3 23]]





### Define and Evaluate GRU (4 times)

In [26]:
# Set the number of runs
n_runs = 4

# Wrap the loop in tqdm for progress tracking
for run in tqdm(range(n_runs), desc="Running models", unit="model"):

    print(f"\n--- Running model {run+1}/{n_runs} ---\n")

   
# Define the GRU model
    model = Sequential([
        Input(shape=(max_timesteps, len(numeric_features))),
        GRU(512, return_sequences=True),
        Dropout(0.2),
        GRU(256, return_sequences=True),
        Dropout(0.2),
        GRU(256),
        Dropout(0.2),
        Dense(25, activation="relu"),
        Dense(1, activation="sigmoid")
    ])

    # Define an optimizer with a specific learning rate
    optimizer = Adam(learning_rate=0.000005)  # Default is 0.001

    # Compile the model
    model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])

    # Define early stopping
    early_stopping = EarlyStopping(
        monitor='val_loss',  # Metric to monitor
        patience=5,          # Number of epochs to wait for improvement
        restore_best_weights=True  # Restore the best weights after stopping
    )

    # Train the model with early stopping and class weights
    model.fit(
        X_train, y_train,
        epochs=30,
        batch_size=3,
        validation_data=(X_val, y_val),
        callbacks=[early_stopping],  # Add early stopping callback
        class_weight=class_weight_dict  # Apply class weights
    )

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test Accuracy: {accuracy:.4f}")

    # Accuracy for the graduated and retired project 
    # Make predictions
    y_pred_prob = model.predict(X_test)
    y_pred = (y_pred_prob.astype("float32") > 0.5).astype(int)  # Convert probabilities to binary (0 or 1)

    # Compute overall accuracy
    overall_accuracy = accuracy_score(y_test, y_pred)
    print(f"Overall Accuracy: {overall_accuracy:.4f}")

    # Compute classification report (precision, recall, F1-score, and support)
    report = classification_report(y_test, y_pred, target_names=["Retired", "Graduated"], output_dict=True)
    print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=["Retired", "Graduated"]))

    # Extract overall precision, recall, and F1-score
    overall_precision = report["weighted avg"]["precision"]
    overall_recall = report["weighted avg"]["recall"]
    overall_f1 = report["weighted avg"]["f1-score"]

    # Print all metrics
    print("\nOverall Model Performance:")
    print(f"Accuracy:  {overall_accuracy:.4f}")
    print(f"Precision: {overall_precision:.4f}")
    print(f"Recall:    {overall_recall:.4f}")
    print(f"F1-Score:  {overall_f1:.4f}")

    # Compute confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    print("\nConfusion Matrix:\n", conf_matrix)

Running models:   0%|          | 0/4 [00:00<?, ?model/s]


--- Running model 1/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 301ms/step - accuracy: 0.7899 - loss: 0.6850 - val_accuracy: 0.8846 - val_loss: 0.6929
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 278ms/step - accuracy: 0.9005 - loss: 0.6410 - val_accuracy: 0.8846 - val_loss: 0.6923
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 280ms/step - accuracy: 0.9267 - loss: 0.5757 - val_accuracy: 0.8846 - val_loss: 0.6924
Epoch 4/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 277ms/step - accuracy: 0.8375 - loss: 0.7789 - val_accuracy: 0.8846 - val_loss: 0.6928
Epoch 5/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 274ms/step - accuracy: 0.8680 - loss: 0.7217 - val_accuracy: 0.8846 - val_loss: 0.6928
Epoch 6/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 276ms/step - accuracy: 0.9178 - loss: 0.5978 - val_accuracy: 0.8846 - val_loss: 0

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Running models:  25%|██▌       | 1/4 [01:33<04:41, 93.78s/model]

Overall Accuracy: 0.8667

Classification Report:
               precision    recall  f1-score   support

     Retired       0.00      0.00      0.00         4
   Graduated       0.87      1.00      0.93        26

    accuracy                           0.87        30
   macro avg       0.43      0.50      0.46        30
weighted avg       0.75      0.87      0.80        30


Overall Model Performance:
Accuracy:  0.8667
Precision: 0.7511
Recall:    0.8667
F1-Score:  0.8048

Confusion Matrix:
 [[ 0  4]
 [ 0 26]]

--- Running model 2/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 295ms/step - accuracy: 0.5380 - loss: 0.7773 - val_accuracy: 0.8846 - val_loss: 0.6931
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 256ms/step - accuracy: 0.8725 - loss: 0.7103 - val_accuracy: 0.8846 - val_loss: 0.6928
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 293ms/step - accuracy: 0.8705 - loss: 0.7154 - val_acc

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Running models:  50%|█████     | 2/4 [03:35<03:40, 110.41s/model]

Overall Accuracy: 0.8667

Classification Report:
               precision    recall  f1-score   support

     Retired       0.00      0.00      0.00         4
   Graduated       0.87      1.00      0.93        26

    accuracy                           0.87        30
   macro avg       0.43      0.50      0.46        30
weighted avg       0.75      0.87      0.80        30


Overall Model Performance:
Accuracy:  0.8667
Precision: 0.7511
Recall:    0.8667
F1-Score:  0.8048

Confusion Matrix:
 [[ 0  4]
 [ 0 26]]

--- Running model 3/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 294ms/step - accuracy: 0.8069 - loss: 0.6181 - val_accuracy: 0.8846 - val_loss: 0.6924
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 299ms/step - accuracy: 0.9025 - loss: 0.6358 - val_accuracy: 0.8846 - val_loss: 0.6925
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 293ms/step - accuracy: 0.9017 - loss: 0.6378 - val_acc

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Running models:  75%|███████▌  | 3/4 [04:59<01:38, 98.27s/model] 

Overall Accuracy: 0.8667

Classification Report:
               precision    recall  f1-score   support

     Retired       0.00      0.00      0.00         4
   Graduated       0.87      1.00      0.93        26

    accuracy                           0.87        30
   macro avg       0.43      0.50      0.46        30
weighted avg       0.75      0.87      0.80        30


Overall Model Performance:
Accuracy:  0.8667
Precision: 0.7511
Recall:    0.8667
F1-Score:  0.8048

Confusion Matrix:
 [[ 0  4]
 [ 0 26]]

--- Running model 4/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 318ms/step - accuracy: 0.6671 - loss: 0.6740 - val_accuracy: 0.8846 - val_loss: 0.6931
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 331ms/step - accuracy: 0.7168 - loss: 0.6446 - val_accuracy: 0.1154 - val_loss: 0.6932
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 312ms/step - accuracy: 0.5776 - loss: 0.6672 - val_acc

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
Running models: 100%|██████████| 4/4 [08:25<00:00, 126.27s/model]

Overall Accuracy: 0.8667

Classification Report:
               precision    recall  f1-score   support

     Retired       0.00      0.00      0.00         4
   Graduated       0.87      1.00      0.93        26

    accuracy                           0.87        30
   macro avg       0.43      0.50      0.46        30
weighted avg       0.75      0.87      0.80        30


Overall Model Performance:
Accuracy:  0.8667
Precision: 0.7511
Recall:    0.8667
F1-Score:  0.8048

Confusion Matrix:
 [[ 0  4]
 [ 0 26]]





### Define and Evaluate BiGRU (4 times)

In [27]:
# Set the number of runs
n_runs = 4

# Wrap the loop in tqdm for progress tracking
for run in tqdm(range(n_runs), desc="Running models", unit="model"):

    print(f"\n--- Running model {run+1}/{n_runs} ---\n")

   
# Define the GRU model
    model = Sequential([
        Input(shape=(max_timesteps, len(numeric_features))),
        Bidirectional(GRU(512, return_sequences=True)),
        Dropout(0.2),
        Bidirectional(GRU(256, return_sequences=True)),
        Dropout(0.2),
        Bidirectional(GRU(256)),
        Dropout(0.2),
        Dense(25, activation="relu"),
        Dense(1, activation="sigmoid")
])

    # Define an optimizer with a specific learning rate
    optimizer = Adam(learning_rate=0.000005)  # Default is 0.001

    # Compile the model
    model.compile(optimizer=optimizer, loss="binary_crossentropy", metrics=["accuracy"])

    # Define early stopping
    early_stopping = EarlyStopping(
        monitor='val_loss',  # Metric to monitor
        patience=5,          # Number of epochs to wait for improvement
        restore_best_weights=True  # Restore the best weights after stopping
    )

    # Train the model with early stopping and class weights
    model.fit(
        X_train, y_train,
        epochs=30,
        batch_size=3,
        validation_data=(X_val, y_val),
        callbacks=[early_stopping],  # Add early stopping callback
        class_weight=class_weight_dict  # Apply class weights
    )

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test Accuracy: {accuracy:.4f}")

    # Accuracy for the graduated and retired project 
    # Make predictions
    y_pred_prob = model.predict(X_test)
    y_pred = (y_pred_prob.astype("float32") > 0.5).astype(int)  # Convert probabilities to binary (0 or 1)

    # Compute overall accuracy
    overall_accuracy = accuracy_score(y_test, y_pred)
    print(f"Overall Accuracy: {overall_accuracy:.4f}")

    # Compute classification report (precision, recall, F1-score, and support)
    report = classification_report(y_test, y_pred, target_names=["Retired", "Graduated"], output_dict=True)
    print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=["Retired", "Graduated"]))

    # Extract overall precision, recall, and F1-score
    overall_precision = report["weighted avg"]["precision"]
    overall_recall = report["weighted avg"]["recall"]
    overall_f1 = report["weighted avg"]["f1-score"]

    # Print all metrics
    print("\nOverall Model Performance:")
    print(f"Accuracy:  {overall_accuracy:.4f}")
    print(f"Precision: {overall_precision:.4f}")
    print(f"Recall:    {overall_recall:.4f}")
    print(f"F1-Score:  {overall_f1:.4f}")

    # Compute confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)
    print("\nConfusion Matrix:\n", conf_matrix)

Running models:   0%|          | 0/4 [00:00<?, ?model/s]


--- Running model 1/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 522ms/step - accuracy: 0.7140 - loss: 0.6527 - val_accuracy: 0.9615 - val_loss: 0.6923
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 512ms/step - accuracy: 0.7118 - loss: 0.8075 - val_accuracy: 0.8077 - val_loss: 0.6923
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 809ms/step - accuracy: 0.7115 - loss: 0.7615 - val_accuracy: 0.8077 - val_loss: 0.6913
Epoch 4/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 806ms/step - accuracy: 0.7990 - loss: 0.6045 - val_accuracy: 0.8077 - val_loss: 0.6905
Epoch 5/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 775ms/step - accuracy: 0.6776 - loss: 0.7632 - val_accuracy: 0.8462 - val_loss: 0.6904
Epoch 6/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 809ms/step - accuracy: 0.7072 - loss: 0.6597 - val_accuracy: 0.8462 - val_loss: 0

Running models:  25%|██▌       | 1/4 [18:38<55:56, 1118.73s/model]

Overall Accuracy: 0.8000

Classification Report:
               precision    recall  f1-score   support

     Retired       0.40      1.00      0.57         4
   Graduated       1.00      0.77      0.87        26

    accuracy                           0.80        30
   macro avg       0.70      0.88      0.72        30
weighted avg       0.92      0.80      0.83        30


Overall Model Performance:
Accuracy:  0.8000
Precision: 0.9200
Recall:    0.8000
F1-Score:  0.8298

Confusion Matrix:
 [[ 4  0]
 [ 6 20]]

--- Running model 2/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 505ms/step - accuracy: 0.7817 - loss: 0.6947 - val_accuracy: 0.9231 - val_loss: 0.6921
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 538ms/step - accuracy: 0.8568 - loss: 0.6154 - val_accuracy: 0.9231 - val_loss: 0.6918
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 802ms/step - accuracy: 0.7769 - loss: 0.6566 - val_acc

Running models:  50%|█████     | 2/4 [36:05<35:52, 1076.14s/model]

Overall Accuracy: 0.8000

Classification Report:
               precision    recall  f1-score   support

     Retired       0.40      1.00      0.57         4
   Graduated       1.00      0.77      0.87        26

    accuracy                           0.80        30
   macro avg       0.70      0.88      0.72        30
weighted avg       0.92      0.80      0.83        30


Overall Model Performance:
Accuracy:  0.8000
Precision: 0.9200
Recall:    0.8000
F1-Score:  0.8298

Confusion Matrix:
 [[ 4  0]
 [ 6 20]]

--- Running model 3/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 515ms/step - accuracy: 0.4092 - loss: 0.8016 - val_accuracy: 0.7692 - val_loss: 0.6929
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 541ms/step - accuracy: 0.5678 - loss: 0.7944 - val_accuracy: 0.4615 - val_loss: 0.6929
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 763ms/step - accuracy: 0.3758 - loss: 0.7199 - val_acc

Running models:  75%|███████▌  | 3/4 [55:10<18:27, 1107.57s/model]

Overall Accuracy: 0.5667

Classification Report:
               precision    recall  f1-score   support

     Retired       0.24      1.00      0.38         4
   Graduated       1.00      0.50      0.67        26

    accuracy                           0.57        30
   macro avg       0.62      0.75      0.52        30
weighted avg       0.90      0.57      0.63        30


Overall Model Performance:
Accuracy:  0.5667
Precision: 0.8980
Recall:    0.5667
F1-Score:  0.6286

Confusion Matrix:
 [[ 4  0]
 [13 13]]

--- Running model 4/4 ---

Epoch 1/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 515ms/step - accuracy: 0.3534 - loss: 0.6933 - val_accuracy: 0.7692 - val_loss: 0.6929
Epoch 2/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 509ms/step - accuracy: 0.5650 - loss: 0.7292 - val_accuracy: 0.5385 - val_loss: 0.6927
Epoch 3/30
[1m47/47[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 747ms/step - accuracy: 0.5224 - loss: 0.7208 - val_acc

Running models: 100%|██████████| 4/4 [1:12:28<00:00, 1087.04s/model]

Overall Accuracy: 0.8000

Classification Report:
               precision    recall  f1-score   support

     Retired       0.40      1.00      0.57         4
   Graduated       1.00      0.77      0.87        26

    accuracy                           0.80        30
   macro avg       0.70      0.88      0.72        30
weighted avg       0.92      0.80      0.83        30


Overall Model Performance:
Accuracy:  0.8000
Precision: 0.9200
Recall:    0.8000
F1-Score:  0.8298

Confusion Matrix:
 [[ 4  0]
 [ 6 20]]



