# Performing needed imports and boilder plate code.

In [1]:
import random
import numpy as np
from tqdm.notebook import tqdm
import pandas as pd
import torch
from torch.utils.data import DataLoader

# set this variable to a number to be used as the random seed
# or to None if you don't want to set a random seed
seed = 1234

if seed is not None:
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

# Copied over data cleaning steps from our data cleaning notebook.

In [2]:
education_data = pd.read_csv('students_clean.csv')


education_data.drop('Parent_Education_Level', axis=1, inplace=True) 

education_data['Gender'] = education_data['Gender'].replace({'Male': 1, 'Female': 0}).astype(int)
education_data['Internet_Access_at_Home'] = education_data['Internet_Access_at_Home'].replace({'Yes': 1, 'No': 0}).astype(int)
education_data['Extracurricular_Activities'] = education_data['Extracurricular_Activities'].replace({'Yes': 1, 'No': 0}).astype(int)


# Low = 1, Medium = 2, High = 3
mapper = {'low': 1, 'medium': 2, 'high': 3}

education_data['Family_Income_Level'] = (
    education_data['Family_Income_Level']
      .astype(str)                  # works even if the value is already 1/2/3 or NaN
      .str.strip().str.lower()
      .map(mapper)                  # returns NaN where no mapping found
      .fillna(education_data['Family_Income_Level'])  # keepin the original numeric/blank entries
      .astype('Int64')              #  nullable integer dtype
)

labels = open('departments.txt').read().splitlines()
department_mapping = {name: index for index, name in enumerate(labels)}
department_indices = education_data['Department'].map(department_mapping)
education_data.insert(3, 'department index', department_indices)

mapper = {'A': 1, 'B': 1, 'C': 1, 'D':0,'F':0}

education_data['Grade'] = (
    education_data['Grade']
      .astype(str)              # convert everything to string
      .str.strip().str.upper()  # remove spaces and standardize to uppercase
      .map(mapper)              # map letters to numbers
)
education_data = education_data.drop(columns='Department')

education_data = education_data.apply(lambda col: col / col.max() if pd.api.types.is_numeric_dtype(col) else col)
education_data.head()

  education_data['Gender'] = education_data['Gender'].replace({'Male': 1, 'Female': 0}).astype(int)
  education_data['Internet_Access_at_Home'] = education_data['Internet_Access_at_Home'].replace({'Yes': 1, 'No': 0}).astype(int)
  education_data['Extracurricular_Activities'] = education_data['Extracurricular_Activities'].replace({'Yes': 1, 'No': 0}).astype(int)


Unnamed: 0,Gender,Age,department index,Attendance (%),Midterm_Score,Final_Score,Assignments_Avg,Quizzes_Avg,Participation_Score,Projects_Score,Total_Score,Grade,Study_Hours_per_Week,Extracurricular_Activities,Internet_Access_at_Home,Family_Income_Level,Stress_Level (1-10),Sleep_Hours_per_Night
0,0.0,0.916667,0.0,0.9736,0.406141,0.596219,0.736974,0.531753,0.734,0.6284,0.629778,0.0,0.343333,1.0,0.0,0.666667,0.1,0.655556
1,1.0,0.75,0.333333,0.9771,0.572757,0.740148,0.742374,0.982398,0.88,0.9823,0.861454,1.0,0.903333,0.0,0.0,0.333333,0.4,0.477778
2,1.0,1.0,0.666667,0.9952,0.418442,0.638628,0.858586,0.50005,0.047,0.9122,0.712125,0.0,0.413333,1.0,0.0,0.333333,0.9,0.677778
3,0.0,1.0,0.666667,0.9038,0.456546,0.444489,0.681068,0.662766,0.042,0.5548,0.543198,0.0,0.85,0.0,1.0,0.333333,0.8,0.544444
4,0.0,0.958333,1.0,0.5941,0.531353,0.617824,0.676668,0.839884,0.643,0.8743,0.750887,1.0,0.443333,1.0,0.0,0.666667,0.6,0.5


# Now defining our data loader and perceptron

In [3]:
from torch.utils.data import Dataset
import torch

class MyDataset(Dataset):
    def __init__(self, df, feature_cols, target_col):
        self.df = df
        self.feature_cols = feature_cols
        self.target_col = target_col

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, index):
        row = self.df.iloc[index]
        x = torch.tensor(row[self.feature_cols].to_numpy(dtype=np.float32), dtype=torch.float32)
        y = torch.tensor(row[self.target_col], dtype=torch.long)  # long for classification
        return x, y

In [4]:
def train_perceptron(train_dl, n_features, pos_class):
    # First initialize the model.
    w = np.zeros(n_features)
    b = 0
    n_errors = 0
    weight_steps = []
    total_pos_in_train = 0

    # Adding this in for debug purposes to track the changes to the weight vectors on each
    # round.
    
    # Average perceptron features
    totalW = np.zeros(n_features)
    totalB = 0;
    updateCount = 0;
    
    # Now loop through each batch.
    for batch_idx, (x, y) in tqdm(enumerate(train_dl), total=len(train_dl),):
        
        x_curr_np = x.numpy()
        y_curr_np = y.numpy()

        total_pos_in_train += (y_curr_np == 1).sum(axis=0)
        

        # Now perform the training/classification loop.
        scores = x_curr_np @ w + b
       
        
        y_pred = (scores > 0).astype(int)


        # Now we vectorize the update to make this more efficient.
        pred_error = y_curr_np - y_pred
        n_errors += np.sum(np.abs(pred_error) != 0) # If the pred error is zero then it is correct.

        # First append the previous weights to weight steps which will be used for debuging puprposes.
        weight_steps.append((pred_error[:,None]*x_curr_np).sum(axis=0).copy())
        
        w += (pred_error[:,None]*x_curr_np).sum(axis=0) # Re-shape pred errors to update and only add
                                                        # inccorect preds, axis=0 for rows.
        b += pred_error.sum()

        # Now print out the weights and bias updates every update if we are in debug mode.
        

    # Now once we are done training the result is the weights and biases.
    return (w,b,n_errors,weight_steps.copy(),total_pos_in_train) # I am just copying to avoid weird cases due to mutability of list.

# Create the training and testing partitions.

In [5]:
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(education_data, train_size=0.9,random_state=seed)
train_df,dev_df = train_test_split(train_df, train_size=0.8,random_state=seed)

train_df.reset_index(inplace=True,drop=True)
dev_df.reset_index(inplace=True,drop=True)
test_df.reset_index(inplace=True,drop=True)


print(f'train rows: {len(train_df.index):,}')
print(f'dev rows: {len(dev_df.index):,}')
print(f'test rows: {len(test_df.index):,}')

train rows: 3,600
dev rows: 900
test rows: 500


In [6]:
# Check pass/fail distribution in train and dev datasets
print("="*70)
print("Pass/Fail Distribution in Datasets")
print("="*70)

# Train set distribution
train_pass = (train_df['Grade'] == 1).sum()
train_fail = (train_df['Grade'] == 0).sum()
train_total = len(train_df)

print(f"\nTrain Set:")
print(f"  Pass (1): {train_pass} ({train_pass/train_total*100:.2f}%)")
print(f"  Fail (0): {train_fail} ({train_fail/train_total*100:.2f}%)")
print(f"  Total: {train_total}")

# Dev set distribution
dev_pass = (dev_df['Grade'] == 1).sum()
dev_fail = (dev_df['Grade'] == 0).sum()
dev_total = len(dev_df)

print(f"\nDev Set:")
print(f"  Pass (1): {dev_pass} ({dev_pass/dev_total*100:.2f}%)")
print(f"  Fail (0): {dev_fail} ({dev_fail/dev_total*100:.2f}%)")
print(f"  Total: {dev_total}")

# Test set distribution
test_pass = (test_df['Grade'] == 1).sum()
test_fail = (test_df['Grade'] == 0).sum()
test_total = len(test_df)

print(f"\nTest Set:")
print(f"  Pass (1): {test_pass} ({test_pass/test_total*100:.2f}%)")
print(f"  Fail (0): {test_fail} ({test_fail/test_total*100:.2f}%)")
print(f"  Total: {test_total}")


Pass/Fail Distribution in Datasets

Train Set:
  Pass (1): 2105 (58.47%)
  Fail (0): 1495 (41.53%)
  Total: 3600

Dev Set:
  Pass (1): 555 (61.67%)
  Fail (0): 345 (38.33%)
  Total: 900

Test Set:
  Pass (1): 301 (60.20%)
  Fail (0): 199 (39.80%)
  Total: 500


# Shave Columns

In [7]:
def get_features(data, column_name, exclude_cols, n):

    data = data.drop(columns=exclude_cols)

    corr_df = data.corr(method='pearson')[[column_name]]
    corr_df = corr_df.drop(index=[column_name])
    corr_df['Grade_Corr_Abs'] = corr_df[column_name].apply(lambda x: np.abs(x))
    corr_df = corr_df.sort_values(by='Grade_Corr_Abs',ascending=False)
    corr_df = corr_df[corr_df['Grade_Corr_Abs'] >= 0.01]
    top_features = corr_df.head(n).index.tolist()
    return top_features

# Before Midterm

In [8]:
# Averaged Perceptron: Train 10 perceptrons and average their weights
n_perceptrons = 10
weight_vecs = []
bias_vecs = []
features_lst = get_features(education_data, 'Grade', ['Total_Score','Midterm_Score','Final_Score','Projects_Score'], 10)
num_feat = len(features_lst)
batch_size = 64
shuffle = True
for i in range(n_perceptrons):
    train_ds = MyDataset(train_df, features_lst, 'Grade')
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=shuffle)
    
    w_curr, b_curr, error_curr, weight_hist_curr, tot_train_pos_curr = train_perceptron(train_dl, num_feat, pos_class=1)
    
    weight_vecs.append(w_curr)
    bias_vecs.append(b_curr)

# Average the weights and biases
w_avg = np.mean(weight_vecs, axis=0)
b_avg = np.mean(bias_vecs, axis=0)

print(f"-------------------Averaged Perceptron (10 perceptrons)------------------------------\n")
print(f"Averaged weight vector shape: {w_avg}")
print(f"Averaged bias value: {b_avg:.4f}\n")

# Test on dev.
X_dev_a = dev_df[features_lst].to_numpy()
dev_y_true = dev_df['Grade'].to_numpy()
dev_y_pred = ((X_dev_a @ w_avg + b_avg) > 0).astype(int)
n_correct_dev = (dev_y_true==dev_y_pred).sum(axis=0)

print(f"The number of correct preds was {n_correct_dev} for acc of {(n_correct_dev/dev_y_true.shape[0])*100}%")
print(f"The number of pos preds was {(dev_y_pred==1).sum(axis=0)} and neg num was {(dev_y_pred==0).sum(axis=0)}")

# Additional detailed metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

accuracy = accuracy_score(dev_y_true, dev_y_pred)
precision = precision_score(dev_y_true, dev_y_pred, zero_division=0)
recall = recall_score(dev_y_true, dev_y_pred, zero_division=0)
f1 = f1_score(dev_y_true, dev_y_pred, zero_division=0)
cm = confusion_matrix(dev_y_true, dev_y_pred)

print(f"\n{'='*70}")
print("Detailed Evaluation Metrics:")
print(f"{'='*70}")
print(f"Accuracy: {accuracy*100:.2f}%")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

print(f"\nConfusion Matrix:")
print(f"                Predicted")
print(f"              Fail    Pass")
print(f"Actual Fail   {cm[0,0]:4d}   {cm[0,1]:4d}")
print(f"       Pass   {cm[1,0]:4d}   {cm[1,1]:4d}")

print(f"\nPredictions breakdown:")
print(f"  Predicted Fail (0): {(dev_y_pred==0).sum()}")
print(f"  Predicted Pass (1): {(dev_y_pred==1).sum()}")
print(f"  Actual Fail (0): {(dev_y_true==0).sum()}")
print(f"  Actual Pass (1): {(dev_y_true==1).sum()}")


  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

-------------------Averaged Perceptron (10 perceptrons)------------------------------

Averaged weight vector shape: [37.71857245 17.80586082 30.43779938  1.94999998 -8.1822228  -1.6       ]
Averaged bias value: -26.2000

The number of correct preds was 557 for acc of 61.88888888888889%
The number of pos preds was 898 and neg num was 2

Detailed Evaluation Metrics:
Accuracy: 61.89%
Precision: 0.6180
Recall: 1.0000
F1-Score: 0.7639

Confusion Matrix:
                Predicted
              Fail    Pass
Actual Fail      2    343
       Pass      0    555

Predictions breakdown:
  Predicted Fail (0): 2
  Predicted Pass (1): 898
  Actual Fail (0): 345
  Actual Pass (1): 555


# After Midterm and before Final

In [9]:
# Averaged Perceptron: Train 10 perceptrons and average their weights
n_perceptrons = 10
weight_vecs = []
bias_vecs = []
features_lst = features_lst = get_features(education_data, 'Grade', ['Total_Score','Final_Score','Projects_Score'], 10)

num_feat = len(features_lst)

for i in range(n_perceptrons):
    train_ds = MyDataset(train_df, features_lst, 'Grade')
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=shuffle)
    
    w_curr, b_curr, error_curr, weight_hist_curr, tot_train_pos_curr = train_perceptron(train_dl, num_feat, pos_class=1)
    
    weight_vecs.append(w_curr)
    bias_vecs.append(b_curr)

# Average the weights and biases
w_avg = np.mean(weight_vecs, axis=0)
b_avg = np.mean(bias_vecs, axis=0)

print(f"-------------------Averaged Perceptron (10 perceptrons)------------------------------\n")
print(f"Averaged weight vector shape: {w_avg}")
print(f"Averaged bias value: {b_avg:.4f}\n")

# Test on dev.
X_dev_a = dev_df[features_lst].to_numpy()
dev_y_true = dev_df['Grade'].to_numpy()
dev_y_pred = ((X_dev_a @ w_avg + b_avg) > 0).astype(int)
n_correct_dev = (dev_y_true==dev_y_pred).sum(axis=0)

print(f"The number of correct preds was {n_correct_dev} for acc of {(n_correct_dev/dev_y_true.shape[0])*100}%")
print(f"The number of pos preds was {(dev_y_pred==1).sum(axis=0)} and neg num was {(dev_y_pred==0).sum(axis=0)}")

# Additional detailed metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

accuracy = accuracy_score(dev_y_true, dev_y_pred)
precision = precision_score(dev_y_true, dev_y_pred, zero_division=0)
recall = recall_score(dev_y_true, dev_y_pred, zero_division=0)
f1 = f1_score(dev_y_true, dev_y_pred, zero_division=0)
cm = confusion_matrix(dev_y_true, dev_y_pred)

print(f"\n{'='*70}")
print("Detailed Evaluation Metrics:")
print(f"{'='*70}")
print(f"Accuracy: {accuracy*100:.2f}%")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

print(f"\nConfusion Matrix:")
print(f"                Predicted")
print(f"              Fail    Pass")
print(f"Actual Fail   {cm[0,0]:4d}   {cm[0,1]:4d}")
print(f"       Pass   {cm[1,0]:4d}   {cm[1,1]:4d}")

print(f"\nPredictions breakdown:")
print(f"  Predicted Fail (0): {(dev_y_pred==0).sum()}")
print(f"  Predicted Pass (1): {(dev_y_pred==1).sum()}")
print(f"  Actual Fail (0): {(dev_y_true==0).sum()}")
print(f"  Actual Pass (1): {(dev_y_true==1).sum()}")

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

-------------------Averaged Perceptron (10 perceptrons)------------------------------

Averaged weight vector shape: [ 32.39464005  38.86777661  15.10251047  27.95419947  -4.61000013
 -15.63888958  -1.5       ]
Averaged bias value: -37.0000

The number of correct preds was 564 for acc of 62.66666666666667%
The number of pos preds was 885 and neg num was 15

Detailed Evaluation Metrics:
Accuracy: 62.67%
Precision: 0.6237
Recall: 0.9946
F1-Score: 0.7667

Confusion Matrix:
                Predicted
              Fail    Pass
Actual Fail     12    333
       Pass      3    552

Predictions breakdown:
  Predicted Fail (0): 15
  Predicted Pass (1): 885
  Actual Fail (0): 345
  Actual Pass (1): 555


# After Final

In [10]:
# Averaged Perceptron: Train 10 perceptrons and average their weights
n_perceptrons = 10
weight_vecs = []
bias_vecs = []
features_lst = features_lst = get_features(education_data, 'Grade', ['Total_Score'], 10)

num_feat = len(features_lst)

for i in range(n_perceptrons):
    train_ds = MyDataset(train_df, features_lst, 'Grade')
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=shuffle)
    
    w_curr, b_curr, error_curr, weight_hist_curr, tot_train_pos_curr = train_perceptron(train_dl, num_feat, pos_class=1)
    
    weight_vecs.append(w_curr)
    bias_vecs.append(b_curr)

# Average the weights and biases
w_avg = np.mean(weight_vecs, axis=0)
b_avg = np.mean(bias_vecs, axis=0)

print(f"-------------------Averaged Perceptron (10 perceptrons)------------------------------\n")
print(f"Averaged weight vector shape: {w_avg}")
print(f"Averaged bias value: {b_avg:.4f}\n")

# Test on dev.
X_dev_a = dev_df[features_lst].to_numpy()
dev_y_true = dev_df['Grade'].to_numpy()
dev_y_pred = ((X_dev_a @ w_avg + b_avg) > 0).astype(int)
n_correct_dev = (dev_y_true==dev_y_pred).sum(axis=0)

print(f"The number of correct preds was {n_correct_dev} for acc of {(n_correct_dev/dev_y_true.shape[0])*100}%")
print(f"The number of pos preds was {(dev_y_pred==1).sum(axis=0)} and neg num was {(dev_y_pred==0).sum(axis=0)}")

# Additional detailed metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

accuracy = accuracy_score(dev_y_true, dev_y_pred)
precision = precision_score(dev_y_true, dev_y_pred, zero_division=0)
recall = recall_score(dev_y_true, dev_y_pred, zero_division=0)
f1 = f1_score(dev_y_true, dev_y_pred, zero_division=0)
cm = confusion_matrix(dev_y_true, dev_y_pred)

print(f"\n{'='*70}")
print("Detailed Evaluation Metrics:")
print(f"{'='*70}")
print(f"Accuracy: {accuracy*100:.2f}%")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

print(f"\nConfusion Matrix:")
print(f"                Predicted")
print(f"              Fail    Pass")
print(f"Actual Fail   {cm[0,0]:4d}   {cm[0,1]:4d}")
print(f"       Pass   {cm[1,0]:4d}   {cm[1,1]:4d}")

print(f"\nPredictions breakdown:")
print(f"  Predicted Fail (0): {(dev_y_pred==0).sum()}")
print(f"  Predicted Pass (1): {(dev_y_pred==1).sum()}")
print(f"  Actual Fail (0): {(dev_y_true==0).sum()}")
print(f"  Actual Pass (1): {(dev_y_true==1).sum()}")

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

  0%|          | 0/57 [00:00<?, ?it/s]

-------------------Averaged Perceptron (10 perceptrons)------------------------------

Averaged weight vector shape: [ 85.82600546  63.3842903   12.35861642  25.11083091  -7.74887449
  26.11179943 -13.55000019 -35.11333377  -5.1       ]
Averaged bias value: -66.9000

The number of correct preds was 576 for acc of 64.0%
The number of pos preds was 879 and neg num was 21

Detailed Evaluation Metrics:
Accuracy: 64.00%
Precision: 0.6314
Recall: 1.0000
F1-Score: 0.7741

Confusion Matrix:
                Predicted
              Fail    Pass
Actual Fail     21    324
       Pass      0    555

Predictions breakdown:
  Predicted Fail (0): 21
  Predicted Pass (1): 879
  Actual Fail (0): 345
  Actual Pass (1): 555


In [11]:
# Find False Positives (FP): Predicted Pass (1) but actual Fail (0)
false_pos_indices = np.where((dev_y_pred == 1) & (dev_y_true == 0))[0]
false_pos_examples = dev_df.iloc[false_pos_indices]

# Find False Negatives (FN): Predicted Fail (0) but actual Pass (1)
false_neg_indices = np.where((dev_y_pred == 0) & (dev_y_true == 1))[0]
false_neg_examples = dev_df.iloc[false_neg_indices]

# Get top 15 false positives and top 10 false negatives
false_pos_examples_top_15 = false_pos_examples.head(15)
false_neg_examples_top_10 = false_neg_examples.head(10)

# Print out the examples
print("15 Examples of False Positives (Predicted Pass, Actual Fail):")
print(false_pos_examples_top_15[['Attendance (%)', 'Extracurricular_Activities', 'Midterm_Score', 'Final_Score', 
                                  'Assignments_Avg', 'Quizzes_Avg', 'Participation_Score', 'Projects_Score', 
                                  'Stress_Level (1-10)', 'Sleep_Hours_per_Night', 'Grade']])

print("\n10 Examples of False Negatives (Predicted Fail, Actual Pass):")
print(false_neg_examples_top_10[['Attendance (%)', 'Extracurricular_Activities', 'Midterm_Score', 'Final_Score', 
                                  'Assignments_Avg', 'Quizzes_Avg', 'Participation_Score', 'Projects_Score', 
                                  'Stress_Level (1-10)', 'Sleep_Hours_per_Night', 'Grade']])


15 Examples of False Positives (Predicted Pass, Actual Fail):
    Attendance (%)  Extracurricular_Activities  Midterm_Score  Final_Score  \
1           0.9268                         1.0       0.603360     0.710042   
8           0.7032                         1.0       0.411441     0.836767   
13          0.7736                         0.0       0.979498     0.413983   
16          0.6804                         1.0       0.574957     0.423885   
19          0.5296                         0.0       0.783878     0.552310   
20          0.6814                         1.0       0.958296     0.563913   
22          0.7290                         1.0       0.971997     0.464093   
23          0.8730                         0.0       0.754075     0.493599   
27          0.9662                         0.0       0.799680     0.506701   
31          0.6687                         0.0       0.722572     0.886677   
32          0.6847                         1.0       0.570757     0.606521   
36

In [12]:
# Test on Test.
X_test_a = test_df[features_lst].to_numpy()
test_y_true = test_df['Grade'].to_numpy()
test_y_pred = ((X_test_a @ w_avg + b_avg) > 0).astype(int)
n_correct_test = (test_y_true==test_y_pred).sum(axis=0)

print(f"The number of correct preds was {n_correct_test} for acc of {(n_correct_test/test_y_true.shape[0])*100}%")
print(f"The number of pos preds was {(test_y_pred==1).sum(axis=0)} and neg num was {(test_y_pred==0).sum(axis=0)}")

# Additional detailed metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

accuracy = accuracy_score(test_y_true, test_y_pred)
precision = precision_score(test_y_true, test_y_pred, zero_division=0)
recall = recall_score(test_y_true, test_y_pred, zero_division=0)
f1 = f1_score(test_y_true, test_y_pred, zero_division=0)
cm = confusion_matrix(test_y_true, test_y_pred)

print(f"\n{'='*70}")
print("Detailed Evaluation Metrics:")
print(f"{'='*70}")
print(f"Accuracy: {accuracy*100:.2f}%")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-Score: {f1:.4f}")

print(f"\nConfusion Matrix:")
print(f"                Predicted")
print(f"              Fail    Pass")
print(f"Actual Fail   {cm[0,0]:4d}   {cm[0,1]:4d}")
print(f"       Pass   {cm[1,0]:4d}   {cm[1,1]:4d}")

print(f"\nPredictions breakdown:")
print(f"  Predicted Fail (0): {(test_y_pred==0).sum()}")
print(f"  Predicted Pass (1): {(test_y_pred==1).sum()}")
print(f"  Actual Fail (0): {(test_y_true==0).sum()}")
print(f"  Actual Pass (1): {(test_y_true==1).sum()}")

The number of correct preds was 314 for acc of 62.8%
The number of pos preds was 487 and neg num was 13

Detailed Evaluation Metrics:
Accuracy: 62.80%
Precision: 0.6181
Recall: 1.0000
F1-Score: 0.7640

Confusion Matrix:
                Predicted
              Fail    Pass
Actual Fail     13    186
       Pass      0    301

Predictions breakdown:
  Predicted Fail (0): 13
  Predicted Pass (1): 487
  Actual Fail (0): 199
  Actual Pass (1): 301
