<a href="https://colab.research.google.com/github/wesleykieu/machine-learning-portfolio/blob/main/Student_Exams.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score, mean_squared_error
import tensorflow as tf
from tensorflow import keras


## Data Card:
student_id – A unique code given to each student for identification.

hours_studied – The number of hours a student studied before the exam.

sleep_hours – The average number of hours the student slept daily.

attendance_percent – The percentage of classes attended by the student.

previous_scores – The marks a student obtained in previous tests or assessments.

exam_score – The final exam score of the student, used as the main performance measure.


In [13]:
df = pd.read_csv('student_exam_scores.csv')
df.head()

Unnamed: 0,student_id,hours_studied,sleep_hours,attendance_percent,previous_scores,exam_score
0,S001,8.0,8.8,72.1,45,30.2
1,S002,1.3,8.6,60.7,55,25.0
2,S003,4.0,8.2,73.7,86,35.8
3,S004,3.5,4.8,95.1,66,34.0
4,S005,9.1,6.4,89.8,71,40.3


In [14]:
df.isnull().sum()

Unnamed: 0,0
student_id,0
hours_studied,0
sleep_hours,0
attendance_percent,0
previous_scores,0
exam_score,0


## Data Preprocessing

In [15]:
# Shuffle Dataset
df = df.sample(frac=1, random_state=42).reset_index(drop=True)

In [16]:
# Seperate Features and Target and drop student ID
X = df.drop(['student_id', 'exam_score'], axis = 1)
y = df['exam_score']


In [17]:
# Split the dataset 70-20-10
X_temp, X_test, y_temp, y_test = train_test_split(
    X, y, test_size=0.1, random_state=42
)

X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.222, random_state=42
)

In [18]:
# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

In [33]:
# Build model
model = keras.Sequential([
    keras.layers.Input(shape=(X_train.shape[1],)),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(1)
])

In [35]:
model.compile(
    optimizer='adam',
    loss='mse',
    metrics=['mae']
)

In [31]:
callbacks = [
    keras.callbacks.EarlyStopping(
        monitor='val_loss',
        patience=10,
        restore_best_weights=True
    )
]

In [36]:
history = model.fit(
    X_train_scaled, y_train,
    validation_data=(X_val_scaled, y_val),
    epochs=100,
    batch_size=32,
    callbacks=callbacks

)

Epoch 1/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 311ms/step - loss: 1251.7522 - mae: 34.6508 - val_loss: 1150.9980 - val_mae: 33.2869
Epoch 2/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 1191.4635 - mae: 33.8117 - val_loss: 1132.4102 - val_mae: 33.0088
Epoch 3/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 1161.0392 - mae: 33.2806 - val_loss: 1114.2372 - val_mae: 32.7346
Epoch 4/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 1176.3857 - mae: 33.5899 - val_loss: 1096.3616 - val_mae: 32.4610
Epoch 5/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 1166.7350 - mae: 33.4119 - val_loss: 1078.2781 - val_mae: 32.1809
Epoch 6/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step - loss: 1150.1068 - mae: 33.2150 - val_loss: 1059.5260 - val_mae: 31.8872
Epoch 7/100
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[

With a base line of 3 layers of 64, 32, 1, able to achieve a mae = ~4. Now with tuning hyperparameters,