# Classical ML Training and Results V2 

## INITIALIZATION

In [19]:
from sklearn import svm
from sklearn.model_selection import ShuffleSplit
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle 
import statistics as stats
from sklearn.model_selection import cross_val_score

## MODEL INITIALIZATION

In [2]:
# read data from csv file and put into one matrix 
# same process of v1
def readData(filename,x1):
    f = open("data/"+filename,'r')
    for row in f:
        data = [int(p) for p in row.split(',')]
        x1.append(np.array(data))
    return 

x = []
readData("getOutBed.csv",x)
readData("getOnBed.csv",x)
readData("layOnBed.csv",x)
readData("sitOnBed.csv",x)

In [3]:
# make X an numpy array and initialize Y 
x = np.array(x)
y = np.array([1]*120+[0]*120)

## FEATURE EXTRACTION

In [4]:
x_feature = []
for r in x:
    s1, s2, s3 = r[:20000], r[20000:40000], r[40000:]
    temp1 = [stats.mean(s1), stats.median(s1), stats.variance(s1), max(s1), min(s1)]
    temp2 = [stats.mean(s2), stats.median(s2), stats.variance(s2), max(s2), min(s2)]
    temp3 = [stats.mean(s3), stats.median(s3), stats.variance(s3), max(s3), min(s3)]
#     print temp1
    x_feature.append(np.array(temp1+temp2+temp3))


In [5]:
x_feature = np.array(x_feature)

## BALANCING SAMPLES

In [6]:
index = 120+np.random.choice(360,240,replace=False)

In [7]:
x_lst = x_feature.tolist()
print len(x_lst[0])
for i in sorted(index, reverse=True):
    del x_lst[i]

15


In [8]:
x_feature = np.array(x_lst)

In [9]:
print x_feature.shape

(240, 15)


In [10]:
# shuffle X and Y correspondingly 
X, Y = shuffle(x_feature, y, random_state = 34)
offset = int(len(X)*0.8)
X_train, Y_train = X[:offset], Y[:offset]
X_test, Y_test = X[offset:], Y[offset:]
print (type(X_train), type(Y_train))
print (len(X_train), len(X_train[0]), len(Y_train), len(X_test), len(X_test[0]), len(Y_test))

(<type 'numpy.ndarray'>, <type 'numpy.ndarray'>)
(192, 15, 192, 48, 15, 48)


## SVM 

In [11]:
# training of SVM 
clf = svm.SVC(gamma='scale',max_iter=5000, verbose=True)
clf.fit(X_train, Y_train) 

[LibSVM]

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
  max_iter=5000, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=True)

In [12]:
# Verification of SVM 
results = clf.predict(X_test)
correct = 0
for i in xrange(len(results)):
    if results[i] == Y_test[i]:
        correct+=1
print (correct/float(len(Y_test)))

0.75


## GAUSSIAN NAIVE BAYES

In [13]:
# training 
clf_nb = GaussianNB()
clf_nb.fit(X_train, Y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [14]:
results_nb = clf_nb.predict(X_test)
print results

[1 0 1 0 0 1 0 1 1 0 1 0 0 1 1 1 0 1 1 1 1 1 0 1 0 0 0 1 1 1 0 0 0 0 1 1 1
 1 0 1 0 1 1 0 1 0 0 0]


In [15]:
# verification
c_nb = 0 
for i in xrange(len(results_nb)):
    if results_nb[i] == Y_test[i]:
        c_nb += 1
print (c_nb/float(len(Y_test)))

0.833333333333


## GRADIENT BOOSTING CLASSIFIER

In [39]:
clf_gbc = GradientBoostingClassifier(n_estimators=500, learning_rate=1.0,max_depth=2, random_state=0, verbose = True).fit(X_train, Y_train)

      Iter       Train Loss   Remaining Time 
         1           0.6280            0.38s
         2           0.4397            0.39s
         3           0.3261            0.37s
         4           0.2599            0.35s
         5           0.2385            0.32s
         6           0.2035            0.32s
         7           0.1787            0.31s
         8           0.1448            0.30s
         9           0.1088            0.30s
        10           0.0948            0.30s
        20           0.0277            0.25s
        30           0.0092            0.23s
        40           0.0036            0.22s
        50           0.0013            0.21s
        60           0.0006            0.21s
        70           0.0003            0.19s
        80           0.0003            0.18s
        90           0.0003            0.17s
       100           0.0003            0.16s
       200           0.0003            0.09s
       300           0.0003            0.05s
       40

In [37]:
# verification
clf_gbc.score(X_test,Y_test)

0.8958333333333334

### Cross Validation

In [40]:
cv = ShuffleSplit(n_splits=5, test_size=0.4, random_state=0)
this_score = cross_val_score(clf_gbc, X, Y,cv=cv, verbose=0)
print this_score

      Iter       Train Loss   Remaining Time 
         1           0.6496            0.43s
         2           0.4771            0.38s
         3           0.3924            0.35s
         4           0.3173            0.33s
         5           0.3820            0.32s
         6           0.1821            0.31s
         7           0.1326            0.31s
         8           0.1068            0.30s
         9           0.0922            0.29s
        10           0.0794            0.29s
        20           0.0143            0.24s
        30           0.0043            0.22s
        40           0.0017            0.21s
        50           0.0005            0.20s
        60           0.0004            0.18s
        70           0.0004            0.17s
        80           0.0004            0.16s
        90           0.0004            0.15s
       100           0.0004            0.14s
       200           0.0004            0.08s
       300           0.0004            0.05s
       40