# Model1 - Amissions: Accept or Reject?

## Based on findings from feature selections work files: 
 * successful-student-profile/feature-selection-1.ipynb 
 * successful-student-profile/feature-selection-2-with-clustering.ipynb
 * successful-student-profile/feature-selection-3-with-ND-separated.ipynb
 * successful-student-profile/apps-goal-text-analysis-NB.ipynb

## Let's build our first model!

Output: % of graduation of each students
 * assumptions: our data visibility stops after student submit the application
       means we know when the cohort is open, close, start; when student apply, submit; but we don't know when we are going to accept/reject and notify the student, when the student going to put down payment

Approach: ND separated; application type separated

Methods: Logistics Regression / Decision Tree

Features:
 * user_age : account created with Udacity
 * cohort_open_to_notify
 * cohort_open_to_close
 * apply_before_start : days student apply before cohort start
 * apply_to_submit : days from apply to submit application
 * num_course_enrolled
 
 * education
 * employment
 * professional experience
 * python, java, c++, porbability, statistics, linear algebra, computer science, machine learning
 
 * mentioned programming skills / technology in goal
 
Added:
 * ND enrolled previously than applications
 * suspended / paused: -2; cancelled / trial ended: -1; no touch: 0; enrolled: 1; graduated/term finished: 2

In [112]:
import psycopg2
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.model_selection import KFold
from sklearn import linear_model,tree,svm,ensemble
from sklearn.metrics import accuracy_score
import statsmodels.api as sm
import random
from random import randrange
import re
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
laurel = open('conn_laurel.txt', 'r')
hardy = open('conn_hardy.txt', 'r')
conn_laurel = psycopg2.connect(laurel.read())
conn_hardy = psycopg2.connect(hardy.read())

sql_apps = open('successful-student-profile-apps.sql', 'r')
sql_courses = open('successful-student-profile-courses.sql', 'r')
sql_nd_enrolls = open('successful-student-profile-nd-enrolls.sql','r')
sql_questions = open('successful-student-profile-questions.sql', 'r')

df_apps = pd.read_sql(sql_apps.read(),conn_laurel)
df_courses = pd.read_sql(sql_courses.read(),conn_hardy)
df_nd_enrolls = pd.read_sql(sql_nd_enrolls.read(),conn_laurel)
df_questions = pd.read_sql(sql_questions.read(),conn_laurel)

In [3]:
df_apps.groupby(['nd_key','application_status'])['application_id'].count()

nd_key              application_status
nd001-connect       paid                     1
nd002-connect       paid                    26
nd004-connect-reno  graduated                6
                    paid                    14
nd009-connect       paid                    39
nd013               graduated               11
                    paid                  2762
                    term completed        1019
nd209               graduated                4
                    paid                  1166
                    term completed         227
nd889               paid                  1364
                    term completed         146
Name: application_id, dtype: int64

In [4]:
# geo / country -> dummies
country = pd.get_dummies(df_apps['applicant_country'])
df_app = pd.concat([df_apps,country],axis=1)

In [5]:
# ND previously than application
df_nd_enrolls = df_nd_enrolls.assign(status1 = 0)
df_nd_enrolls['status1'] = df_nd_enrolls['status'].map({'SUSPENDED':-2,'CANCELLED':-1,'GRADUATED':3,'PAUSED':-2,'ENROLLED':1,'TERM_COMPLETED':2})
df_nd_enroll = df_nd_enrolls.groupby(['application_id','nd_key'])['status1'].max().reset_index()
df_nd = df_nd_enroll[['application_id','nd_key','status1']].pivot(index='application_id', values='status1', columns='nd_key').fillna(0).reset_index()

In [6]:
# data
df = pd.merge(df_app,df_courses,on=['application_id','cohort_id','applicant_id','nd_key'],how='left')
df = pd.merge(df,df_nd,on=['application_id'],how='left').fillna(0)

In [None]:
df.head()

In [None]:
df.shape

In [None]:
df_apps.columns

In [None]:
df_apps.shape

## Based on the pivot above, let's pick nd013 as pilot to model.

In [7]:
df_nd013 = df.query("nd_key == 'nd013'")
df_nd013 = df_nd013.assign(status = df_nd013['application_status'].map({'graduated':1,'term completed':1,'paid':0}))
print(df_nd013.shape)
df_nd013.columns

(3792, 161)


Index(['application_id', 'cohort_id', 'applicant_id', 'nd_key',
       'application_type', 'applicant_country', 'applicant_geo',
       'application_status', 'user_age', 'cohort_open_to_notify',
       ...
       'nd116', 'nd124', 'nd201', 'nd209', 'nd801', 'nd802', 'nd803', 'nd818',
       'nd889', 'status'],
      dtype='object', length=161)

In [8]:
df_nd013.groupby(['cohort_id','application_status'])['application_id'].count()

cohort_id  application_status
18         graduated               9
           paid                  610
           term completed        501
220        paid                  332
293        paid                  314
47         graduated               2
           paid                  463
           term completed        349
88         paid                  647
           term completed        159
89         paid                  396
           term completed         10
Name: application_id, dtype: int64

In [9]:
# among cohorts 18,47,88,89,220, cohort 18 and 47 are closed. cohort 88 is closing in Dec
df_nd013 = df_nd013.loc[(df_nd013['cohort_id']=='18') | (df_nd013['cohort_id']=='47')]
df_nd013.shape

(1934, 161)

In [10]:
# only columns we care about
#cols = ['status','application_id','user_age','cohort_open_to_notify','cohort_open_to_close'
#        ,'apply_before_start','apply_to_submit','num_course_enrolled']
cols = ['status','application_id','user_age', 'cohort_open_to_notify',
       'cohort_open_to_close', 'cohort_open_month', 'application_month',
       'apply_before_start', 'apply_to_submit', 'num_courses',
       'num_course_finished', 'num_course_enrolled',
       'user_study_age'] + list(df['applicant_country'].unique()) + list(df_nd_enrolls['nd_key'].unique())
df_nd013 = df_nd013[cols]

In [11]:
# from questions_responses
# categorical
df1 = df_nd013
categories = {'education':'48e7b492-62b4-4d99-b596-80d68f2966ae'
             ,'employment':'fba3666b-db04-46e9-8f3d-2a303f13e0a5'
             ,'professional_experience':'6967091c-09c6-4455-9f1e-d0de318bacc5'
             ,'goal':'779c3b6c-3648-423b-8d3f-8a4f36f23e2a'}
for i,qr_id in categories.items():
    d = df_questions.copy()
    d[i] = 0
    d[i] = np.where(d['question_id']==qr_id,d['response'],'0')
    o_i = d.groupby('application_id').agg({i:'max'}).reset_index()
    o_i = o_i[o_i.iloc[:,1] != '0']
    o_i = o_i[o_i.iloc[:,1] != 'Other']
    #df1 = pd.merge(df1,o_i,on=['application_id'],how='inner')
    d_i = pd.get_dummies(o_i[i])
    df_i = pd.concat([o_i['application_id'],d_i],axis=1)
    df1 = pd.merge(df1,df_i,on='application_id',how='left')

In [36]:
# from questions_responses
# programming languages
data = df1
for i in (['python','java','cplus','probability','statistics','linear algebra','computer science','machine learning']):
    if i == 'cplus':
        j = 'c\+\+' 
    else:
        j = i
    d = df_questions.copy()
    d[i] = 0
    d[i] = np.where(d['response'].str.lower().str.contains(j),1,d[i])
    d[i] = np.where(d['question_prompt'].str.lower().str.contains(j)&d['response'].str.match('.*[1-9].*'),1,d[i])
    o_i = d.groupby('application_id').agg({i:'max'}).reset_index()
    data = pd.merge(data,o_i,on=['application_id'],how='left')

In [37]:
# from goals : text
# data
df_goal = df_questions.loc[df_questions['question_id'].isin(['2ad03aaa-1b35-4381-9c43-907b1b4eba67','6afe0061-746b-4bd7-807c-393fe5c7599d'])]

In [38]:
def text_process(x):
    import string
    import nltk
    from nltk.corpus import stopwords
    stopWords = set(stopwords.words('english'))   
    x = x.lower()
    x.translate(str.maketrans('', '', string.punctuation))
    return ' '.join([w for w in x.split() if w not in stopWords])

In [39]:
# mentioned ND
nd_words = ['self driving','ai','artificial intelligence','robotics','vr','ar','machine learning','deep learning']
# technology related?
technology_words = ['computer science','machine learning','deep learning']
# some other words?
other_words = ['knowledge','learn','program','skills','experience','engineering','industry','data','field'
              ,'technology','vision','autonomous','project','projects']

df_goal = df_goal.assign(goal = df_goal['response'].apply(lambda x: text_process(x)))

words = {'nd_words': ['self driving','ai','artificial intelligence','robotics','vr','ar','machine learning','deep learning']
        ,'technology_words': ['computer science','machine learning','deep learning']
        ,'other_words': ['knowledge','learn','program','skills','experience','engineering','industry','data','field'
              ,'technology','vision','autonomous','project','projects']}

In [40]:
for i,word_list in words.items():
    d = df_goal.copy()
    d[i] = 0
    d[i] = df_goal['response'].apply(lambda x: 1 if any(w in x for w in word_list) else 0)
    o = d.groupby('application_id').agg({i:'max'}).reset_index()
    data = pd.merge(data,o,on=['application_id'],how='left')

In [41]:
# scale data

# fill NaN
data = data.fillna(0)
data_scale = data.iloc[:,2:]

# Find the min and max values for each column
def dataset_minmax(dataset):
    minmax = list()
    for column in dataset:
        col_values = [data[column][i] for i in range(dataset.shape[0])]
        value_min = np.min(col_values)
        value_max = np.max(col_values)
        minmax.append([value_min, value_max])
    return minmax
 
# Rescale dataset columns to the range 0-1
def normalize_dataset(dataset, minmax):
    for row in range(dataset.shape[0]):
        for column in range(dataset.shape[1]):
            if (minmax[column][1] - minmax[column][0]) == 0:
                dataset.iloc[row,column] = dataset.iloc[row,column]
            else:
                dataset.iloc[row,column] = (dataset.iloc[row,column] - minmax[column][0]) / (minmax[column][1] - minmax[column][0])

minmax = dataset_minmax(data_scale)
normalize_dataset(data_scale, minmax)
data = pd.concat([data['status'],data_scale],axis=1)

In [42]:
data.head()

Unnamed: 0,status,user_age,cohort_open_to_notify,cohort_open_to_close,cohort_open_month,application_month,apply_before_start,apply_to_submit,num_courses,num_course_finished,...,java,cplus,probability,statistics,linear algebra,computer science,machine learning,nd_words,technology_words,other_words
0,0,0.032258,0.0,0.0,1.0,1.0,0.746479,0.0,0.0,0.0,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0
1,1,0.443548,0.0,0.0,1.0,0.5,0.028169,0.0,0.027778,0.0,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0
2,0,0.080645,0.0,0.0,1.0,1.0,0.661972,0.119403,0.009259,0.0,...,0.0,1.0,1.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0
3,0,0.403226,0.0,0.0,1.0,0.75,0.098592,0.492537,0.027778,0.0,...,0.0,1.0,1.0,1.0,1.0,0.0,1.0,1.0,0.0,1.0
4,0,0.935484,1.0,1.0,0.0,0.0,0.15493,0.0,0.064815,0.0,...,0.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0


In [166]:
# split into training and test data set
ratio = 0.1
N = data.shape[0]
index = random.sample(range(N),int(ratio*N))
TEST = data[data.index.isin(index)]#.reset_index()
TRAIN = data[~data.index.isin(index)]#.reset_index()
X_train = TRAIN.iloc[:,1:]
y_train = TRAIN.iloc[:,0]
X_test = TEST.iloc[:,1:]
y_test = TEST.iloc[:,0]

In [153]:
TRAIN.groupby(['status'])['user_age'].count()

status
0    963
1    778
Name: user_age, dtype: int64

In [57]:
767/(767+974)

0.44055140723721997

In [154]:
TEST.groupby(['status'])['user_age'].count()

status
0    110
1     83
Name: user_age, dtype: int64

In [63]:
# build 5 models:
clf1 = linear_model.LogisticRegression()
clf2 = tree.DecisionTreeClassifier()
clf3 = svm.SVC()
clf4 = ensemble.RandomForestClassifier()
clf5 = ensemble.GradientBoostingClassifier()

In [70]:
def train_predict(learner, X_train, y_train, X_test, y_test): 
    from sklearn.metrics import accuracy_score,confusion_matrix
    results = {}
    learner = learner.fit(X_train,y_train)
    predictions_test = learner.predict(X_test)
    predictions_train = learner.predict(X_train)
    results['acc_train'] = accuracy_score(y_train,predictions_train)
    results['acc_test'] = accuracy_score(y_test,predictions_test)
    results['confusion_matrix_test'] = confusion_matrix(y_test,predictions_test)
    try:
        results['coef_'] = learner.coef_
    except Exception:
        pass
    print("{} trained on {} samples.".format(learner.__class__.__name__,X_train.shape[0]))
    return results

In [77]:
# 5-fold CV:
kf = KFold(n_splits = 5, shuffle = False, random_state = 42)
results = {}

for clf in [clf1,clf2,clf3,clf4,clf5]:
    clf_name = clf.__class__.__name__
    results[clf_name] = {}
    for i,index in zip(range(kf.get_n_splits()),kf.split(X_train)):
        x_training,x_validate = X_train.iloc[index[0],:],X_train.iloc[index[1],:]
        y_training,y_validate = y_train.iloc[index[0]],y_train.iloc[index[1]]
        results[clf_name][i] = train_predict(clf,x_training,y_training,x_validate,y_validate)

LogisticRegression trained on 1392 samples.
LogisticRegression trained on 1393 samples.
LogisticRegression trained on 1393 samples.
LogisticRegression trained on 1393 samples.
LogisticRegression trained on 1393 samples.
DecisionTreeClassifier trained on 1392 samples.
DecisionTreeClassifier trained on 1393 samples.
DecisionTreeClassifier trained on 1393 samples.
DecisionTreeClassifier trained on 1393 samples.
DecisionTreeClassifier trained on 1393 samples.
SVC trained on 1392 samples.
SVC trained on 1393 samples.
SVC trained on 1393 samples.
SVC trained on 1393 samples.
SVC trained on 1393 samples.
RandomForestClassifier trained on 1392 samples.
RandomForestClassifier trained on 1393 samples.
RandomForestClassifier trained on 1393 samples.
RandomForestClassifier trained on 1393 samples.
RandomForestClassifier trained on 1393 samples.
GradientBoostingClassifier trained on 1392 samples.
GradientBoostingClassifier trained on 1393 samples.
GradientBoostingClassifier trained on 1393 samples.

In [72]:
for k,learner in enumerate(results.keys()):
    for i in np.arange(5):
        print(k,learner,'acc_test',np.mean(results[learner][i]['acc_test']))

0 LogisticRegression acc_test 0.607449856734
0 LogisticRegression acc_test 0.528735632184
0 LogisticRegression acc_test 0.600574712644
0 LogisticRegression acc_test 0.566091954023
0 LogisticRegression acc_test 0.600574712644
1 DecisionTreeClassifier acc_test 0.512893982808
1 DecisionTreeClassifier acc_test 0.51724137931
1 DecisionTreeClassifier acc_test 0.520114942529
1 DecisionTreeClassifier acc_test 0.534482758621
1 DecisionTreeClassifier acc_test 0.522988505747
2 SVC acc_test 0.570200573066
2 SVC acc_test 0.543103448276
2 SVC acc_test 0.548850574713
2 SVC acc_test 0.594827586207
2 SVC acc_test 0.522988505747
3 RandomForestClassifier acc_test 0.573065902579
3 RandomForestClassifier acc_test 0.563218390805
3 RandomForestClassifier acc_test 0.551724137931
3 RandomForestClassifier acc_test 0.543103448276
3 RandomForestClassifier acc_test 0.537356321839
4 GradientBoostingClassifier acc_test 0.561604584527
4 GradientBoostingClassifier acc_test 0.511494252874
4 GradientBoostingClassifier a

In [80]:
val = {}
for k, learner in enumerate(results.keys()):
    val[learner] = [results[learner][j]['acc_test'] for j in results[learner]]

pick_a_model = {}
for learner in val:
    pick_a_model[learner] = sum(val[learner])/len(val[learner])
    print(learner,"mean acc: ",sum(val[learner])/len(val[learner]))

LogisticRegression mean acc:  0.580685373646
DecisionTreeClassifier mean acc:  0.51464940882
SVC mean acc:  0.555994137602
RandomForestClassifier mean acc:  0.54624049007
GradientBoostingClassifier mean acc:  0.56806474986


In [81]:
pick_a_model

{'DecisionTreeClassifier': 0.51464940881994536,
 'GradientBoostingClassifier': 0.56806474986002709,
 'LogisticRegression': 0.5806853736455555,
 'RandomForestClassifier': 0.54624049007015107,
 'SVC': 0.55599413760168626}

In [85]:
model_picked = max(pick_a_model, key=lambda k: pick_a_model[k])
model_picked

'LogisticRegression'

In [91]:
results

{'DecisionTreeClassifier': {0: {'acc_test': 0.51002865329512892,
   'acc_train': 1.0,
   'confusion_matrix_test': array([[116,  83],
          [ 88,  62]])},
  1: {'acc_test': 0.49712643678160917,
   'acc_train': 0.99928212491026558,
   'confusion_matrix_test': array([[100,  89],
          [ 86,  73]])},
  2: {'acc_test': 0.49712643678160917,
   'acc_train': 0.99928212491026558,
   'confusion_matrix_test': array([[101,  90],
          [ 85,  72]])},
  3: {'acc_test': 0.54022988505747127,
   'acc_train': 1.0,
   'confusion_matrix_test': array([[111,  95],
          [ 65,  77]])},
  4: {'acc_test': 0.52873563218390807,
   'acc_train': 0.99928212491026558,
   'confusion_matrix_test': array([[119,  63],
          [101,  65]])}},
 'GradientBoostingClassifier': {0: {'acc_test': 0.56733524355300857,
   'acc_train': 0.70977011494252873,
   'confusion_matrix_test': array([[150,  49],
          [102,  48]])},
  1: {'acc_test': 0.51436781609195403,
   'acc_train': 0.73223259152907394,
   'confusi

In [100]:
pick_highest_test_acc = {}
for i in range(len(results[model_picked])):
    pick_highest_test_acc[i] = results[model_picked][i]['acc_test']
pick_highest_test_acc

{0: 0.60744985673352436,
 1: 0.52873563218390807,
 2: 0.60057471264367812,
 3: 0.56609195402298851,
 4: 0.60057471264367812}

In [99]:
highest_test_acc_picked = max(pick_highest_test_acc, key=lambda k: pick_highest_test_acc[k])
highest_test_acc_picked

0

In [102]:
start_coef = results[model_picked][highest_test_acc_picked]['coef_']

## Do a Stochastic Gradient Descent on Logistics Regression

In [156]:
# make a prediction with coefficients
def predict(row,coef):
    from math import exp
    yhat = coef[0]
    for i in range(len(row)-1):
        yhat += coef[i+1] * row[i]
    return 1.0 / (1.0 + exp(-yhat))

# estimate logistic regression coefficients using stochastic gradient descent
def coef_sgd(train,l_rate,n_epoch):
    coef = [0.0 for i in range(train.shape[1])]
    for epoch in range(n_epoch):
        for row in range(train.shape[0]):
            yhat = predict(train.iloc[row,1:],coef)
            error = train.iloc[row,0] - yhat
            coef[0] = coef[0] + l_rate*error*yhat*(1.0-yhat)
            for i in range(train.shape[1]-1):
                coef[i+1] = coef[i+1] + l_rate*error*yhat*(1.0-yhat)*train.iloc[row,i]
    return coef

# Linear Regression Algorithm With Stochastic Gradient Descent
def logistic_regression(train, test, l_rate, n_epoch):
    predictions = list()
    coef = coef_sgd(train, l_rate, n_epoch)
    for row in range(test.shape[0]):
        yhat = predict(test.iloc[row,1:],coef)
        yhat = round(yhat)
        predictions.append(yhat)
    return(coef,predictions)

# Evaluate an algorithm using a cross validation split
def evaluate_algorithm(dataset, algorithm, n_folds, *args):
    folds = cross_validation_split(dataset, n_folds)
    scores = list()
    for fold in folds:
        train_set = list(folds)
        train_set.remove(fold)
        train_set = sum(train_set, [])
        test_set = list()
        for row in fold:
            row_copy = list(row)
            test_set.append(row_copy)
            row_copy[-1] = None
        predicted = algorithm(train_set, test_set, *args)
        actual = [row[-1] for row in fold]
        accuracy = accuracy_metric(actual, predicted)
        scores.append(accuracy)
    return scores

# Split a dataset into k folds
def cross_validation_split(dataset, n_folds):
    dataset_split = list()
    dataset_copy = list(dataset)
    fold_size = int(len(dataset) / n_folds)
    for i in range(n_folds):
        fold = list()
        while len(fold) < fold_size:
            index = randrange(len(dataset_copy))
            fold.append(dataset_copy.pop(index))
        dataset_split.append(fold)
    return dataset_split

In [168]:
l_rate = 0.01
n_epoch = 10
predict_sgd = logistic_regression(TRAIN,TEST,l_rate,n_epoch)

In [169]:
end_coef = predict_sgd[0]
pd.concat([pd.DataFrame(start_coef).T,pd.DataFrame(end_coef)],axis=1)

Unnamed: 0,0,0.1
0,-0.049900,-0.526459
1,-0.276095,8.468038
2,-0.276095,-1.568854
3,-0.490733,-1.610255
4,-0.335540,-1.610255
5,0.142253,1.083796
6,0.088916,0.800953
7,0.101329,0.403081
8,0.193358,-0.200690
9,-0.554491,-0.010147


In [176]:
from sklearn.metrics import confusion_matrix
T = confusion_matrix(y_test,predict_sgd[1])
(T[0][0]+T[1][1])/(T[0][0]+T[0][1]+T[1][0]+T[1][1])

0.49222797927461137

In [177]:
T

array([[57, 35],
       [63, 38]])

In [None]:
[[161,  38],
 [ 99,  51]]

In [188]:
for row in range(TEST.shape[0]):
    yhat = predict(TEST.iloc[row,1:],start_coef)

IndexError: index 1 is out of bounds for axis 0 with size 1

In [190]:
matrix(X_test)*start_coef

NameError: name 'matrix' is not defined

In [113]:
# evaluate algorithm
n_folds = 5
l_rate = 0.1
n_epoch = 100
evaluate_algorithm(data, logistic_regression, n_folds, l_rate, n_epoch)

ValueError: empty range for randrange()

In [118]:
dataset_copy = list(data)
len(dataset_copy)
randrange(179)

100

In [None]:
# Make a prediction with coefficients
def predict(row, coefficients):
	yhat = coefficients[0]
	for i in range(len(row)-1):
		yhat += coefficients[i + 1] * row[i]
	return 1.0 / (1.0 + exp(-yhat))
 
# Estimate logistic regression coefficients using stochastic gradient descent
def coefficients_sgd(train, l_rate, n_epoch):
	coef = [0.0 for i in range(len(train[0]))]
	for epoch in range(n_epoch):
		for row in train:
			yhat = predict(row, coef)
			error = row[-1] - yhat
			coef[0] = coef[0] + l_rate * error * yhat * (1.0 - yhat)
			for i in range(len(row)-1):
				coef[i + 1] = coef[i + 1] + l_rate * error * yhat * (1.0 - yhat) * row[i]
	return coef
 
# Linear Regression Algorithm With Stochastic Gradient Descent
def logistic_regression(train, test, l_rate, n_epoch):
	predictions = list()
	coef = coefficients_sgd(train, l_rate, n_epoch)
	for row in test:
		yhat = predict(row, coef)
		yhat = round(yhat)
		predictions.append(yhat)
	return(predictions)

In [None]:
# Logistic Regression
clf = linear_model.LogisticRegression()
clf.fit(X_train,y_train)
predictions = clf.predict(X_test)
accuracy_score(y_test, predictions)

In [None]:
features = X_train.describe().T
features

In [None]:
logit = sm.Logit(y_train,X_train)
result = logit.fit()
print(result.summary())

In [None]:
# Neural Network - keras
import keras
from keras.datasets import imdb
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation

In [None]:
num_classes = 2
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [None]:
# Build the model architecture
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=X_train.shape[1]))
model.add(Dropout(.1))
model.add(Dense(64, activation='relu'))
model.add(Dropout(.1))
model.add(Dense(2, activation='softmax'))

# Compiling the model using a loss function and an optimizer.
model.compile(loss = 'categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

In [None]:
# Training the model
model.fit(X_train.as_matrix(), y_train, epochs=300, batch_size=10, verbose=0)

In [None]:
score = model.evaluate(X_test.as_matrix(), y_test, verbose=0)
print("Accuracy: ", score[1])