# Classical ML Training and Results V2 

## INITIALIZATION

In [56]:
from sklearn import svm
from sklearn.model_selection import ShuffleSplit
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import GradientBoostingClassifier
import numpy as np
import matplotlib.pyplot as plt
from sklearn.utils import shuffle 
import statistics as stats

## MODEL INITIALIZATION

In [57]:
# read data from csv file and put into one matrix 
# same process of v1
def readData(filename,x1):
    f = open("data/"+filename,'r')
    for row in f:
        data = [int(p) for p in row.split(',')]
        x1.append(np.array(data))
    return 

x = []
readData("getOutBed.csv",x)
readData("getOnBed.csv",x)
readData("layOnBed.csv",x)
readData("sitOnBed.csv",x)

In [59]:
# make X an numpy array and initialize Y 
x = np.array(x)
y = np.array([1]*120+[0]*120)

## FEATURE EXTRACTION

In [63]:
x_feature = []
for r in x:
    s1, s2, s3 = r[:20000], r[20000:40000], r[40000:]
    temp1 = [stats.mean(s1), stats.median(s1), stats.variance(s1), max(s1), min(s1)]
    temp2 = [stats.mean(s2), stats.median(s2), stats.variance(s2), max(s2), min(s2)]
    temp3 = [stats.mean(s3), stats.median(s3), stats.variance(s3), max(s3), min(s3)]
#     print temp1
    x_feature.append(np.array(temp1+temp2+temp3))


In [64]:
x_feature = np.array(x_feature)

## BALANCING SAMPLES

In [65]:
index = 120+np.random.choice(360,240,replace=False)

In [66]:
x_lst = x_feature.tolist()
print len(x_lst[0])
for i in sorted(index, reverse=True):
    del x_lst[i]

15


In [67]:
x_feature = np.array(x_lst)

In [68]:
print x_feature.shape

(240, 15)


In [69]:
# shuffle X and Y correspondingly 
X, Y = shuffle(x_feature, y, random_state = 34)
offset = int(len(X)*0.8)
X_train, Y_train = X[:offset], Y[:offset]
X_test, Y_test = X[offset:], Y[offset:]
print (type(X_train), type(Y_train))
print (len(X_train), len(X_train[0]), len(Y_train), len(X_test), len(X_test[0]), len(Y_test))

(<type 'numpy.ndarray'>, <type 'numpy.ndarray'>)
(192, 15, 192, 48, 15, 48)


## SVM 

In [70]:
# training of SVM 
clf = svm.SVC(gamma='scale',max_iter=5000, verbose=True)
clf.fit(X_train, Y_train) 

[LibSVM]

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='scale', kernel='rbf',
  max_iter=5000, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=True)

In [71]:
# Verification of SVM 
results = clf.predict(X_test)
correct = 0
for i in xrange(len(results)):
    if results[i] == Y_test[i]:
        correct+=1
print (correct/float(len(Y_test)))

0.833333333333


## GAUSSIAN NAIVE BAYES

In [72]:
# training 
clf_nb = GaussianNB()
clf_nb.fit(X_train, Y_train)

GaussianNB(priors=None, var_smoothing=1e-09)

In [77]:
results_nb = clf_nb.predict(X_test)
print results

[0 0 1 0 0 1 0 1 0 0 1 0 0 1 1 1 0 1 1 1 1 1 0 1 0 0 0 1 1 1 0 1 0 0 1 1 1
 1 0 0 0 1 1 0 1 0 0 0]


In [74]:
# verification
c_nb = 0 
for i in xrange(len(results_nb)):
    if results_nb[i] == Y_test[i]:
        c_nb += 1
print (c_nb/float(len(Y_test)))

0.854166666667


## GRADIENT BOOSTING CLASSIFIER

In [75]:
clf_gbc = GradientBoostingClassifier(n_estimators=100, learning_rate=1.0,max_depth=1, random_state=0, verbose = True).fit(X_train, Y_train)

      Iter       Train Loss   Remaining Time 
         1           0.8200            0.47s
         2           0.7261            0.26s
         3           0.6216            0.19s
         4           0.5702            0.15s
         5           0.5509            0.13s
         6           0.5293            0.11s
         7           0.4962            0.10s
         8           0.4585            0.09s
         9           0.4131            0.09s
        10           0.3689            0.08s
        20           0.2152            0.05s
        30           0.1278            0.03s
        40           0.0984            0.03s
        50           0.0705            0.02s
        60           0.0553            0.01s
        70           0.0410            0.01s
        80           0.0335            0.01s
        90           0.0267            0.00s
       100           0.0200            0.00s


In [76]:
# verification
clf_gbc.score(X_test,Y_test)

0.8958333333333334