<a href="https://colab.research.google.com/github/zelta-7/grade-prediction/blob/main/student_progress.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTING FILES

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
# LOADING DATA SET

data_maths = pd.read_csv('student-mat.csv', sep=';')
data_portuguese = pd.read_csv('student-por.csv', sep=';')

print(data_maths.head())
missing_m = data_maths.isnull().sum()
print("Missing value in each column are: ")
print(missing_m)

print(data_portuguese.head())
missing_p = data_portuguese.isnull().sum()
print("Missing values in each column are: ")
print(missing_p)

In [None]:
# EXAMINING DATA

maths_disc = data_maths.describe()
port_disc = data_portuguese.describe()
print("Description for Maths dataset:\n", maths_disc)
print("Description for Portuguese dataset:\n", port_disc)

In [None]:
# ENCODING OBJECT TYPE DATA

print("Data types for maths dataset are: ")
print(data_maths.dtypes)
print("\nData types for portuguese dataset are: ")
print(data_portuguese.dtypes)

selected_features = ["age", "sex", "studytime", "failures", "higher", "internet", "goout", "absences", "G1", "G2", "G3"]

data_maths_filtered = data_maths[selected_features]
data_encode_m_filtered = pd.get_dummies(data_maths_filtered, drop_first=True)

data_portuguese_filtered = data_portuguese[selected_features]
data_encode_p_filtered = pd.get_dummies(data_portuguese_filtered, drop_first=True)


In [None]:
# CONVERTING G3 TO BINARY

pass_mark = 10
data_encode_m_filtered['G3'] = (data_encode_m_filtered['G3'] > pass_mark).astype(int)
data_encode_p_filtered['G3'] = (data_encode_p_filtered['G3'] > pass_mark).astype(int)

In [None]:
# SPLITTING THE DATASET 80-20

x_m = data_encode_m_filtered.drop('G3', axis=1)
y_m = data_encode_m_filtered['G3']
x_train_m, x_test_m, y_train_m, y_test_m = train_test_split(x_m, y_m, test_size=0.2, random_state=42)

x_p = data_encode_p_filtered.drop('G3', axis=1)
y_p = data_encode_p_filtered['G3']
x_train_p, x_test_p, y_train_p, y_test_p = train_test_split(x_p, y_p, test_size=0.2, random_state=42)

In [None]:
# FEATURE SCALING

scaler_m = StandardScaler()
x_train_m = scaler_m.fit_transform(x_train_m)
x_test_m = scaler_m.transform(x_test_m)

scaler_p = StandardScaler()
x_train_p = scaler_p.fit_transform(x_train_p)
x_test_p = scaler_p.transform(x_test_p)

In [None]:
# PERCEPTRON MODEL

class Perceptron(object):

  def __init__(self, input_dim):
    self.weight = np.random.normal(loc=0.0, scale=1.0, size=input_dim)
    self.bias = np.random.normal(loc=0.0, scale=1.0, size=1)

  def activation(self, pred):
    return np.where(pred >= 0, 1, 0)

  def prediction(self, data):
    return self.activation(np.dot(data, self.weight.T)+self.bias)

  def accuracy(self, predictions, target):
    return np.mean(predictions==target)

  def train(self, input_data, targets, epochs, lr=0.01, early_stopping=True, patience=5):
    best_accuracy = 0
    no_improvment_count = 0
    history = {'loss':[], 'accuracy':[]}

    for e in range(1, epochs+1):
      epoch_loss = 0

      for data, target in zip(input_data, targets):
        pred = self.prediction(data)
        error = target - pred
        update = lr * error
        epoch_loss += error**2
        self.weight += update * data
        self.bias += update

      all_pred = self.prediction(input_data)
      epoch_accuracy = self.accuracy(all_pred, targets)
      history['accuracy'].append(epoch_accuracy)
      history['loss'].append(epoch_loss)

      if epoch_accuracy > best_accuracy:
        best_accuracy = epoch_accuracy
        no_improvment_count=0
      else:
        no_improvment_count+=1


      print(f"\r Trained Epoch {e}/{epochs}, Loss : {epoch_loss}, Accuracy : {epoch_accuracy*100: .2f}%", end = "")

      if early_stopping and no_improvment_count>patience:
        print(f"\nEarly Stopping at epoch {e+1} ")
        break



    return history

In [None]:
# TRAINING

input_dim_m = x_train_m.shape[1]
perc_m = Perceptron(input_dim=input_dim_m)

input_dim_p = x_train_p.shape[1]
perc_p = Perceptron(input_dim=input_dim_p)

history_m = perc_m.train(x_train_m, y_train_m, epochs=50)
history_p = perc_p.train(x_train_p, y_train_p, epochs=50)

In [None]:
# VISUALISING RESULTS

plt.figure(figsize=(10,5))

plt.subplot(1, 2, 1)
plt.plot(history_m['loss'], label='Maths Loss')
plt.plot(history_p['loss'], label='Portugues Loss', linestyle='--')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.title('LOSS OVER EPOCH')

plt.subplot(1, 2, 2)
plt.plot(history_m['accuracy'], label='Maths Accuracy')
plt.plot(history_p['accuracy'], label='Portugues Accuracy', linestyle='--')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.title('ACCURACY OVER EPOCH')

plt.tight_layout()
plt.show()

In [None]:
# MODEL EVALUATION

y_pred_m = perc_m.prediction(x_test_m)
y_pred_p = perc_p.prediction(x_test_p)

acc_m = perc_m.accuracy(y_pred_m, y_test_m)
acc_p = perc_p.accuracy(y_pred_p, y_test_p)

print(f"Test accuracy over maths dataset: {acc_m*100:.2f}%")
print(f"Test accuracy over portugues dataset: {acc_p*100:.2f}%")

In [None]:
#USER INPUT

class userInput:
  def __init__(self):
    self.attributes = {
            'age': None,
            'sex': ['F', 'M'],
            'studytime': ['1', '2', '3', '4'],
            'failures': ['0', '1', '2', '3'],
            'higher': ['yes', 'no'],
            'internet': ['yes', 'no'],
            'goout': ['1', '2', '3', '4', '5'],
            'absences': None,
            'G1': None,
            'G2': None
            }

  def getInput(self):
    user_data = {}
    for attri, options in self.attributes.items():
      if options is None:
        if attri == 'age':
          user_data[attri] = float(input(f"Enter value for {attri} (a number between 15-22): "))
        elif attri == 'absences':
          user_data[attri] = float(input(f"Enter value for {attri} (a number between 0-98): "))
        else:
          user_data[attri] = float(input(f"Enter value for {attri} (a number between 0-20): "))
        continue

      print(f"Enter value for {attri} ({',' .join(options)}): ")
      while True:
        choice = input().strip()
        lowered_choice = choice.lower()
        for option in options:
          if option.lower() == lowered_choice:
            user_data[attri] = option
            break
        else:
          print(f"Invalid input, try entring one of these {',' .join(options)}")
          continue
        break
    return user_data

In [None]:
user_instance = userInput()

user_data = user_instance.getInput()
for attributes, value in user_data.items():
  print(f"{attributes} : {value}")

In [None]:
# FORMATTING USER INPUT

def format_user_input(user_data, training_columns):
  user_df = pd.DataFrame([user_data])
  user_encoded = pd.get_dummies(user_df)

  for col in training_columns:
    if col not in user_encoded.columns:
      user_encoded[col] = 0

  user_encoded = user_encoded[training_columns]

  return user_encoded

training_columns_m = data_encode_m_filtered.drop('G3', axis=1).columns.tolist()
formatted_user_m = format_user_input(user_data, training_columns_m)

training_columns_p = data_encode_p_filtered.drop('G3', axis=1).columns.tolist()
formatted_user_p = format_user_input(user_data, training_columns_p)

In [None]:
# SCALING USER INPUT

scaled_data_m = scaler_m.transform(formatted_user_m)
scaled_data_p = scaler_p.transform(formatted_user_p)

In [None]:
# MAKING PREDICTIONS

result_m = perc_m.prediction(scaled_data_m)
result_p = perc_p.prediction(scaled_data_p)

print(f"Based on the provided inputs you are likely to {'PASS' if result_m[0]==1 else 'FAIL'} the MATHS course")
print(f"Based on the provided inputs you are likelly to {'PASS' if result_p[0]==1 else 'FAIL'} the PORTUGUESE course")
print(f"\n(NOTE: This prediction is based on a model with an accuracy of {acc_m*100:.2f}% for maths and {acc_p*100:.2f}% for portuguese.)")