In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import StandardScaler

#Load Data

In [None]:
training_data = {
    'gmat':[780, 750, 690, 710, 680, 730, 690, 720, 740, 690, 610, 690, 710, 680, 770, 610, 580, 650, 540, 590, 620, 600, 550, 550, 570, 670, 660, 580, 650, 660, 640, 620, 660, 660, 680, 650, 670, 580, 590, 690],
    'gpa':[4, 3.9, 3.3, 3.7, 3.9, 3.7, 2.3, 3.3, 3.3, 1.7, 2.7, 3.7, 3.7, 3.3, 3.3, 3, 2.7, 3.7, 2.7, 2.3, 3.3, 2, 2.3, 2.7, 3, 3.3, 3.7, 2.3, 3.7, 3.3, 3, 2.7, 4, 3.3, 3.3, 2.3, 2.7, 3.3, 1.7, 2.7],
    'work_experience':[3, 4, 3, 5, 4, 6, 1, 4, 5, 1, 3, 5, 6, 4, 3, 1, 4, 6, 2, 3, 2, 1, 4, 1, 2, 6, 4, 2, 6, 5, 1, 2, 4, 6, 5, 1, 2, 1, 4, 5],
    'admitted':[1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1]
}
prediction_data = {
    'gmat':[590, 740, 680, 610, 710],
    'gpa':[2, 3.7, 3.3, 2.3, 3],
    'work_experience':[3, 4, 6, 1, 5]
}

In [None]:
training_dataset = pd.DataFrame(training_data)
prediction_data = pd.DataFrame(prediction_data)

In [None]:
training_dataset.dtypes

gmat                 int64
gpa                float64
work_experience      int64
admitted             int64
dtype: object

In [None]:
label = training_dataset['admitted']
feature = training_dataset.drop('admitted', axis=1)

#Preprocessing

In [None]:
x_train, x_val, y_train, y_val = train_test_split(feature, label, test_size = 0.2, random_state = 0)

In [None]:
x_train = np.array(x_train)
x_val = np.array(x_val)
y_train = np.array(y_train)
y_val = np.array(y_val)
prediction_data = np.array(prediction_data)

In [None]:
scaler = StandardScaler().fit(x_train)
x_train = scaler.transform(x_train)
x_val = scaler.transform(x_val)
prediction_data = scaler.transform(prediction_data)

#Building the Model

In [None]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

In [None]:
class LogisticRegression(object):
    def __init__(self, learning_rate, num_steps):
        self.learning_rate = learning_rate
        self.num_steps = num_steps
        self.w = None
        self.X = None
        self.Y = None

    def _predict_single_point(self, x):
        return sigmoid(np.dot(self.w, x))

    def _gradient(self):
        return sum([x*(self._predict_single_point(x) - y) for x,y in zip(self.X, self.Y)])

    def fit(self, X, Y):
        self.X = X
        self.Y = Y
        self.w = np.random.random(self.X.shape[1])
        for i in range(self.num_steps):
            gradient = self._gradient()
            self.w = self.w - self.learning_rate*gradient

    def predict(self, X_new):
        return [int(self._predict_single_point(x) > 0.5) for x in X_new]

#Training

In [None]:
lr = LogisticRegression(0.001, num_steps = 100)

In [None]:
lr.fit(x_train, y_train)

#Validation Evaluation

In [None]:
val = lr.predict(x_val)
print(f'Validation Prediction: {val}')
print(f'Actual Validation Label: {y_val}')

Validation Prediction: [0, 0, 1, 1, 0, 0, 1, 1]
Actual Validation Label: [0 1 1 0 0 0 1 1]


In [None]:
cm = confusion_matrix(y_val, val, labels=[1, 0])
print(f'Confusion Matrix: \n{cm}\n 6/8 predictions correct')

Confusion Matrix: 
[[3 1]
 [1 3]]
 6/8 predictions correct


In [None]:
acc = sum(val == y_val) / len(y_val)
print(f'Validation Accuracy: {acc*100}%')

Validation Accuracy: 75.0%


#Test Data Prediction

In [None]:
pred = lr.predict(prediction_data)
print(f'Prediction for Test Data: {pred}')

Prediction for Test Data: [0, 1, 1, 0, 1]
