In [185]:
import tensorflow as tf
print(tf.__version__)
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dense, Flatten, GlobalMaxPooling1D, Embedding, Input, Dropout, BatchNormalization
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd
from tensorflow.keras.regularizers import l2
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.metrics import roc_curve, auc, roc_auc_score
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from datetime import datetime
import pickle



2.18.0


In [186]:
#variables
kernel_size=3
hidden_dims=250
test_size = 0.3
batch_size=32
#accuracy gains seem to be minimal after epoch 10
epochs=4
dp = 0.2
lr = 0.001
threshold=0.5
random_seed = 1
early_stopping = tf.keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True)

In [None]:
#dataset used: https://www.kaggle.com/datasets/nikhil7280/student-performance-multiple-linear-regression
#loading data
try:
    data = pd.read_csv("data/Student_Performance.csv")
except Exception as e:
    print(f"Error with getting file: {e}")

print(data.head())

#convery strings/integers to 1 for yes or 0 for no for extracurricular activities and previous scores
binary_extra = [1 if d.lower() == "yes" else 0 for d in data["Extracurricular Activities"]]
binary_pass = [1 if d >= 50.0 else 0 for d in data["Performance Index"]]

#print(data.isnull().sum())
#print(data.shape)

X = data.drop("Performance Index", axis=1)
y = binary_pass

X["Extracurricular Activities"] = binary_extra

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_seed)

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

y_train = np.array(y_train)
y_test = np.array(y_test)

print(X_train.shape)
print(X_test.shape)



   Hours Studied  Previous Scores Extracurricular Activities  Sleep Hours  \
0              7               99                        Yes            9   
1              4               82                         No            4   
2              8               51                        Yes            7   
3              5               52                        Yes            5   
4              7               75                         No            8   

   Sample Question Papers Practiced  Performance Index  
0                                 1               91.0  
1                                 2               65.0  
2                                 2               45.0  
3                                 2               36.0  
4                                 5               66.0  
(7000, 5, 1)
(3000, 5, 1)


In [188]:
#define and compile model
model = Sequential([
    Conv1D(64, 2, activation="relu", input_shape=(X_train.shape[1], 1)),
    BatchNormalization(),
    Dropout(0.3),
    Conv1D(128, 2, activation="relu"),
    BatchNormalization(),
    Dropout(0.3),
    GlobalMaxPooling1D(),
    Dense(128, activation="relu"),
    Dropout(dp),
    Dense(1, activation="sigmoid")

])

model.compile(optimizer=Adam(learning_rate=lr),
              loss="binary_crossentropy",
              metrics=["accuracy"])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [189]:
#train model
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=[early_stopping])

Epoch 1/4
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 6ms/step - accuracy: 0.7770 - loss: 0.4623 - val_accuracy: 0.8267 - val_loss: 0.4044
Epoch 2/4
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8660 - loss: 0.2936 - val_accuracy: 0.9363 - val_loss: 0.2048
Epoch 3/4
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.8896 - loss: 0.2467 - val_accuracy: 0.9370 - val_loss: 0.1679
Epoch 4/4
[1m219/219[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 7ms/step - accuracy: 0.9041 - loss: 0.2260 - val_accuracy: 0.9550 - val_loss: 0.1408


<keras.src.callbacks.history.History at 0x21dde97d790>

In [190]:
#predictions
y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > threshold).astype("int")
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Accuracy: " + str(accuracy * 100))
print("Precision: " + str(precision * 100))
print("Recall: " + str(recall * 100))
print("F1 Score: " + str(f1 * 100))

[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step
Accuracy: 95.5
Precision: 95.88732394366197
Recall: 96.48526077097506
F1 Score: 96.18536309692003


In [191]:
#save model and scaler
try:
    curr_date = datetime.now().strftime("%Y%m%d_%H%M_%f")

    model.save(f"models/classification_model.h5")
except Exception as e:
    print(f"Error with saving model: {e}")

try:
    with open("scalers/classification_scaler.pkl", "wb") as f:
        pickle.dump(scaler, f)
except Exception as e:
    print(f"Error with saving scaler: {e}")

