# Predicting Student Admissions with Neural Networks
In this notebook, we predict student admissions to graduate school at UCLA based on three pieces of data:

    -GRE Scores (Test)
    -GPA Scores (Grades)
    -Class rank (1-4)

The dataset originally came from here: http://www.ats.ucla.edu/

### Dependencies

In [3]:
%config InlineBackend.figure_format = 'retina'
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Neural Network Class

In [52]:
# This calss handles all the procedure from loading data to testing accuracy
class NN:
    # constructor to load data from csv file
    def __init__(self, file):
        np.random.seed(20)  # to make sure, random is most random all the time
        self.__data = pd.read_csv(file)
        print('Data Loaded')
    
    
    # show data
    def getData(self):
        return self.__data
    
    
    # One-hot encode
    # General function to apply one-hot encode on any data, just pass the dataset and col to convert to one-hot encoded vector
    def one_hot_encoder(self, col):
        classes = np.sort(self.__data[col].unique())
        one_hot_vector = np.array([[0  if val != c else 1 for c in classes] for val in self.__data[col]])
        self.__data = pd.merge(
            self.__data,
            pd.DataFrame(data=one_hot_vector, columns=classes),
            left_index=True, 
            right_index=True
        )
        self.__data.drop([col], axis=1, inplace=True)
        print('Done! One Hot Encoding on {} column'.format(col))
    
    
    # Scaling the data
    # We notice that the range for grades is 1.0-4.0, 
    # whereas the range for test scores is roughly 200-800, which is much larger. 
    # This means our data is skewed, and that makes it hard for a neural network to handle. 
    # Let's fit our two features into a range of 0-1, by dividing the grades by 4.0, and the test score by 800.
    # But below will follow for any kind of data
    def scale_data(self, *cols):
        for col in cols:
            self.__data[col] = self.__data[col]/np.max(self.__data[col])
        print('Scaling Done')
    
    
    # Splitting the data into training and testing 
    # further splitting the data into features(X) and lables(y)
    # As we want random values from dataset to be selected as sample, 
    # we will use np.random.choice() to select random indices for sample
    def train_test_split(self, label_col):
        sample = np.random.choice(self.__data.index, size=int(len(self.__data)*0.8), replace=False)
        train_data, test_data = self.__data.iloc[sample], self.__data.drop(sample)
        print('No. of training data: ', len(train_data))
        print('No. of testing data: ', len(test_data))
        __features = train_data.drop([label_col], axis=1)
        __labels = train_data[label_col]
        __features_test = test_data.drop([label_col], axis=1)
        __labels_test = test_data[label_col]
    
    
    # activation function
    def __sigmoid(self, x):
        return 1/(1+np.exp(-x))
    
    
    # Define layers of neural network
    # in neurons enter a list 
    # e.g. [input_features/neurons, hidden_layer_1_neurons, hidden_layer_2_neurons, output_layer_neurons]
    def layers(self, neurons):
        self.__neurons = neurons
        print('Layers of Neural Network:')
        print('Layer 1, Input Layer Neurons: {}'.format(self.__neurons[0]))
        for i in range(1, len(self.__neurons)-1):
            print('Layer {}, Hidden Layer {} Neurons: {}'.format(i+1, i, self.__neurons[i]))
        print('Layer {}, Output Layer Neurons: {}'.format(len(self.__neurons), self.__neurons[-1]))
        
    
    # Training Neural Network
    def train_nn(self, epochs=1000, alpha=0.1):
        n_records, n_features = features.shape
        last_loss = 1
        # initialze weights/thetas
        self.__weights = np.random.normal(scale=1/n_features**0.5, size=n_features)

        for e in range(epochs):
            del_w = np.zeros(weights.shape)
            for x, y in zip(features.values, labels):
                output = sigmoid(np.dot(x, weights))
                # error = error_formula(y, output)
                error = y - output
                error_term = error_term_formula(error, output)
                del_w += error_term*x
            weights += alpha * del_w/n_records
            if e % (epochs/10) == 0:
                out = sigmoid(np.dot(features, weights))
                loss = np.mean((out - labels)**2)
                print('Epoch:', e)
                if last_loss < loss:
                    print('Train loss:', loss, "WARNING - Loss Increasing")
                else:
                    print('Train loss:', loss)
                last_loss = loss
                print("============")
        print('Training Finished')
        return weights
    
    
    # forward pass
    

In [53]:
obj = NN('student_data.csv')

Data Loaded


In [120]:
# nn hyperparameters
epochs = 1000
alpha = 0.1

# Training function
def train_nn(features, labels, epochs, alpha):
    np.random.seed(20)
    n_records, n_features = features.shape
    last_loss = 1
    weights = np.random.normal(scale=1/n_features**0.5, size=n_features)
    
    for e in range(epochs):
        del_w = np.zeros(weights.shape)
        for x, y in zip(features.values, labels):
            output = sigmoid(np.dot(x, weights))
            # error = error_formula(y, output)
            error = y - output
            error_term = error_term_formula(error, output)
            del_w += error_term*x
        weights += alpha * del_w/n_records
        if e % (epochs/10) == 0:
            out = sigmoid(np.dot(features, weights))
            loss = np.mean((out - labels)**2)
            print('Epoch:', e)
            if last_loss < loss:
                print('Train loss:', loss, "WARNING - Loss Increasing")
            else:
                print('Train loss:', loss)
            last_loss = loss
            print("============")
    print('Training Finished')
    return weights

weights = train_nn(features, labels, epochs, alpha)

Epoch: 0
Train loss: 0.25385492547853555
Epoch: 100
Train loss: 0.22935988613136993
Epoch: 200
Train loss: 0.22063119597944447
Epoch: 300
Train loss: 0.21595722627505323
Epoch: 400
Train loss: 0.2127429996470624
Epoch: 500
Train loss: 0.21030519363498557
Epoch: 600
Train loss: 0.20840062878909005
Epoch: 700
Train loss: 0.2068997653895651
Epoch: 800
Train loss: 0.20571138936235975
Epoch: 900
Train loss: 0.20476505908660333
Training Finished


In [121]:
weights

array([ 0.11693129, -0.35895849,  0.29031635, -0.4534367 , -0.88931928,
       -0.66072138])

## Calculating Accuracy on Test Data

In [122]:
test_output = sigmoid(np.dot(features_test, weights))
predictions = test_output > 0.5
accuracy = np.mean(predictions == labels_test)
print("Prediction accuracy: {:.3f}".format(accuracy))

Prediction accuracy: 0.675
