# Exercise: Implementing Gradient Descent

## Data Prep

In [4]:
import numpy as np
import pandas as pd

admissions = pd.read_csv('binary.csv')

admissions

Unnamed: 0,admit,gre,gpa,rank
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.00,1
3,1,640,3.19,4
4,0,520,2.93,4
...,...,...,...,...
395,0,620,4.00,2
396,0,560,3.04,3
397,0,460,2.63,2
398,0,700,3.65,2


In [5]:
# Make dummy variables for rank
data = pd.get_dummies(admissions, prefix='rank', columns=['rank'])

data

Unnamed: 0,admit,gre,gpa,rank_1,rank_2,rank_3,rank_4
0,0,380,3.61,False,False,True,False
1,1,660,3.67,False,False,True,False
2,1,800,4.00,True,False,False,False
3,1,640,3.19,False,False,False,True
4,0,520,2.93,False,False,False,True
...,...,...,...,...,...,...,...
395,0,620,4.00,False,True,False,False
396,0,560,3.04,False,False,True,False
397,0,460,2.63,False,True,False,False
398,0,700,3.65,False,True,False,False


In [6]:
# Standardize features
for field in ['gre', 'gpa']:
    mean, std = data[field].mean(), data[field].std()
    data.loc[:,field] = (data[field]-mean)/std

data

Unnamed: 0,admit,gre,gpa,rank_1,rank_2,rank_3,rank_4
0,0,-1.798011,0.578348,False,False,True,False
1,1,0.625884,0.736008,False,False,True,False
2,1,1.837832,1.603135,True,False,False,False
3,1,0.452749,-0.525269,False,False,False,True
4,0,-0.586063,-1.208461,False,False,False,True
...,...,...,...,...,...,...,...
395,0,0.279614,1.603135,False,True,False,False
396,0,-0.239793,-0.919418,False,False,True,False
397,0,-1.105469,-1.996759,False,True,False,False
398,0,0.972155,0.683454,False,True,False,False


In [9]:
# Split off random 10% of the data for testing
np.random.seed(42)
sample = np.random.choice(data.index, size=int(len(data)*0.9), replace=False)
data, test_data = data.loc[sample], data.drop(sample)

In [10]:
# Split into features and targets
features, targets = data.drop('admit', axis=1), data['admit']
features_test, targets_test = test_data.drop('admit', axis=1), test_data['admit']

features

Unnamed: 0,gre,gpa,rank_1,rank_2,rank_3,rank_4
329,-0.759199,-1.208461,False,False,False,True
326,0.799020,-0.209950,False,True,False,False
66,1.318426,0.604625,False,False,False,True
24,1.491561,-0.104844,False,True,False,False
205,1.664697,1.077603,False,False,True,False
...,...,...,...,...,...,...
295,-0.586063,-0.367610,False,False,True,False
248,0.799020,0.657178,False,False,True,False
377,1.837832,1.603135,False,True,False,False
1,0.625884,0.736008,False,False,True,False


In [12]:
targets

329    0
326    0
66     0
24     1
205    1
      ..
295    0
248    0
377    1
1      1
338    0
Name: admit, Length: 291, dtype: int64