In [138]:
import pandas as pd
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, auc
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import ElasticNetCV
from sklearn.linear_model import LogisticRegression
from sklearn.decomposition import PCA
import numpy as np
from sklearn.model_selection import StratifiedKFold

from tensorflow.keras import regularizers
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.pipeline import make_pipeline
from scikeras.wrappers import KerasClassifier
import joblib

# Load and process data

In [132]:
marginal_df = pd.read_csv(r"C:\Users\erict\Documents\projects\gwave_classify\data\gwtc_marginal.csv")
confident_df = pd.read_csv(r"C:\Users\erict\Documents\projects\gwave_classify\data\gwtc_confident.csv")

In [3]:
confident_df["label"] = 1  
marginal_df["label"] = 0  
cleaned_df = pd.concat([confident_df, marginal_df], ignore_index=True)
gwtc_df = cleaned_df.sample(frac=1, random_state=42).reset_index(drop=True)

In [7]:
drop_list = ["id", "commonName", "version", "catalog.shortName", "GPS", "reference", "jsonurl", "far_lower", "far_upper", "p_astro_lower", "p_astro_upper", "chirp_mass_lower", "chirp_mass_upper", "chirp_mass", "mass_1_source_lower", "mass_1_source_upper", "mass_2_source_upper", "mass_2_source_lower", "network_matched_filter_snr_lower", "network_matched_filter_snr_upper", "luminosity_distance_lower", "luminosity_distance_upper", "chi_eff_lower", "chi_eff_upper", "total_mass_source_lower", "total_mass_source_upper", "redshift_lower", "redshift_upper", "final_mass_source_lower", "final_mass_source_upper", "chirp_mass_source_lower", "chirp_mass_source_upper"]
train_df = gwtc_df.drop(columns=drop_list)
imputer = KNNImputer(n_neighbors=5)
train_df[:] = imputer.fit_transform(train_df)

In [118]:
X = train_df.drop(columns=["label"])
y = train_df["label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Build Feed forward neural network and train

In [105]:
model = keras.Sequential()

model.add(keras.layers.Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X_train.shape[1],)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dropout(0.1))
model.add(keras.layers.Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dropout(0.1))
model.add(keras.layers.Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
model.add(keras.layers.Dense(1, activation='sigmoid', kernel_regularizer=regularizers.l2(0.01)))
optimizer = keras.optimizers.Adam(learning_rate=0.0005)

model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for train_index, val_index in kf.split(X, y):
    X_train, X_val = X.iloc[train_index], X.iloc[val_index]
    y_train, y_val = y.iloc[train_index], y.iloc[val_index]

    model.fit(X_train, y_train, epochs=100, batch_size=10, validation_data=(X_val, y_val), callbacks=[early_stopping])

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.7914 - loss: 0.8908 - val_accuracy: 0.7826 - val_loss: 1.0330
Epoch 2/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8167 - loss: 0.8835 - val_accuracy: 0.7826 - val_loss: 0.9903
Epoch 3/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8087 - loss: 0.8300 - val_accuracy: 0.7826 - val_loss: 0.9821
Epoch 4/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7850 - loss: 0.8239 - val_accuracy: 0.7826 - val_loss: 0.9808
Epoch 5/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7346 - loss: 0.8170 - val_accuracy: 0.7826 - val_loss: 0.9677
Epoch 6/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7562 - loss: 0.8296 - val_accuracy: 0.7826 - val_loss: 0.9525
Epoch 7/100
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37

[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7470 - loss: 0.5905 - val_accuracy: 0.8182 - val_loss: 0.5278
Epoch 34/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8340 - loss: 0.5499 - val_accuracy: 0.8182 - val_loss: 0.5140
Epoch 35/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8612 - loss: 0.5068 - val_accuracy: 0.8182 - val_loss: 0.5346
Epoch 36/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7719 - loss: 0.6267 - val_accuracy: 0.8182 - val_loss: 0.5343
Epoch 37/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7908 - loss: 0.6082 - val_accuracy: 0.8182 - val_loss: 0.5129
Epoch 38/100
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7824 - loss: 0.6001 - val_accuracy: 0.8182 - val_loss: 0.5294
Epoch 39/100
[1m10/10[0m [32m━━━━━━━━━

In [137]:
joblib.dump(model, "keras_model.joblib")

['keras_model.joblib']

In [130]:

test_loss, test_acc = model.evaluate(X_test_scaled, y_test)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - accuracy: 0.7826 - loss: 1.6678


# Voting Classifier

In [108]:
def create_nn():
    model = keras.Sequential()

    model.add(keras.layers.Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01), input_shape=(X_train.shape[1],)))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dropout(0.1))
    model.add(keras.layers.Dense(8, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dropout(0.1))
    model.add(keras.layers.Dense(16, activation='relu', kernel_regularizer=regularizers.l2(0.01)))
    model.add(keras.layers.Dense(1, activation='sigmoid', kernel_regularizer=regularizers.l2(0.01)))
    
    optimizer = keras.optimizers.Adam(learning_rate=0.0005)

    model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])


    
    return model

In [128]:
nn_model = KerasClassifier(build_fn=create_nn, epochs=50, batch_size=10, verbose=0)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
voting_clf = VotingClassifier(estimators=[('rf', rf_model), ('nn', nn_model)], voting='soft')


voting_clf.fit(X_train_scaled, y_train)


  X, y = self._initialize(X, y)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [129]:
accuracy = voting_clf.score(X_test_scaled, y_test)
print(f'Voting Classifier Accuracy: {accuracy:.4f}')

Voting Classifier Accuracy: 0.9565


In [134]:
joblib.dump(voting_clf, 'voting_classifier.joblib')

['voting_classifier.joblib']