In [4]:
# Imports
from logr_utils import sigmoid, logistic_sigmoid_regression
from sklearn.metrics import accuracy_score, precision_score, recall_score

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Read the data
data = pd.read_csv('data/admission.csv')

data.head()
data.columns = data.columns.str.strip()

# Now create the 'Admit' column
data['Admit'] = (data['Chance of Admit'] >= 0.75).astype(int)

# Split the data: use 350 rows for training and the rest for testing
train_data = data.iloc[:350]
test_data = data.iloc[350:]

# Extract features and target
X_train = train_data[['GRE Score', 'TOEFL Score', 'University Rating', 'SOP', 'LOR', 'CGPA', 'Research']].values.T
y_train = train_data['Admit'].values

X_test = test_data[['GRE Score', 'TOEFL Score', 'University Rating', 'SOP', 'LOR', 'CGPA', 'Research']].values.T
y_test = test_data['Admit'].values

In [5]:
# Number of training examples
m = X_train.shape[1]

# Add a row of ones to X_train to account for the intercept term (X0)
Xbar_train = np.vstack((np.ones((1, m)), X_train))

# Add intercept to the test set
m_test = X_test.shape[1]
Xbar_test = np.vstack((np.ones((1, m_test)), X_test))

In [24]:
# Initialize weights (w_init), learning rate (eta), tolerance (tol), and max iterations (max_count)
w_init = np.zeros(Xbar_train.shape[0])
eta = 0.01
tol = 1e-4
max_count = 10000

# Train the model using logistic regression
weights = logistic_sigmoid_regression(Xbar_train, y_train, w_init, eta)

# Prediction function using sigmoid
def predict(X, w):
    # Calculate the dot product of w and X
    z = np.dot(w[-1].T, X)  # Note the transpose of w
    y_pred = sigmoid(z)
    return (y_pred >= 0.5).astype(int)

# Predict on test data
y_pred_test = predict(Xbar_test, weights[-1])

# Print the shapes to debug
print(f"Shape of y_test: {y_test.shape}")  # Should be (50,)
print(f"Shape of y_pred_test: {y_pred_test.shape}")  # Should be (50,)

# Calculate metrics using scikit-learn
accuracy = accuracy_score(y_test, y_pred_test)
precision = precision_score(y_test, y_pred_test)
recall = recall_score(y_test, y_pred_test)

# Print the metrics
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")

Shape of y_test: (50,)
Shape of y_pred_test: (50,)
Accuracy: 0.44
Precision: 0.44
Recall: 1.00
