<a href="https://colab.research.google.com/github/vijayshgupta/viz/blob/master/Drive_Analytics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import csv
import math
import pandas as pd
import numpy as np

from scipy.stats import skew,kurtosis
from statsmodels.tsa import stattools

from sklearn.ensemble import RandomForestClassifier
from sklearn.dummy import DummyClassifier
from sklearn.model_selection import train_test_split,cross_validate,GridSearchCV
from sklearn.metrics import confusion_matrix

  from pandas.core import datetools


In [0]:
# Create the parameter grid based on the results of random search 
param_grid = {
    'bootstrap': [True],
    'max_depth': [25,30,35],
    'max_features': [7,9],
    'min_samples_leaf': [3,5],
    'min_samples_split': [10, 15],
    'n_estimators': [200, 300, 500]
}
# Create a based model
rf = RandomForestClassifier()
# Instantiate the grid search model
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, 
                          cv = 5, n_jobs = -1, verbose = 2)

dataset = np.loadtxt('../content/Features.csv', delimiter=",")
X = dataset[:, 1:]
y = dataset[:, 0]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)
# Fit the grid search to the data
grid_search.fit(X_train, y_train)


In [0]:
print(grid_search.best_params_)

In [0]:
from sklearn.metrics import accuracy_score
def evaluate(model, test_features, test_labels):
    predictions = model.predict(test_features)
    print('Model Performance')
    
    print('Accuracy = {:0.2f}%.'.format(accuracy_score(test_labels, predictions)))
    print(confusion_matrix(test_labels, predictions))
    accuracy=accuracy_score(test_labels, predictions)
    return accuracy

In [0]:

best_grid = grid_search.best_estimator_
grid_accuracy = evaluate(best_grid, X_test, y_test)
#pd.crosstab(y_test,)

Model Performance
Accuracy = 0.94%.
[[65  1  0  1  1]
 [ 0 55  0  0  0]
 [ 0  0 53  3  2]
 [ 2  2  1 55  0]
 [ 3  0  2  1 50]]


In [0]:
def magnitude(activity):
    x2=activity['X']*activity['X']
    y2 = activity['Y'] * activity['Y']
    z2 = activity['Z'] * activity['Z']
    m2=x2+y2+z2
    m=m2.apply(lambda x:math.sqrt(x))
    return m

def windows(df,size=50):
    start=0
    while start < df.count():
        yield start,start+size
        start += int(size / 2)

def jitter(axis, start, end):
    j = float(0)
    # for i in xrange(start, min(end, axis.count())):
    for i in range(start, min(end, axis.count())):
        if start != 0:
            j += abs(axis[i] - axis[i-1])
    return j / (end - start)

def mean_crossing_rate(axis, start, end):
    cr = 0
    m = axis.mean()
   # for i in xrange(start, min(end, axis.count())):
    for i in range(start, min(end, axis.count())):
        if start != 0:
            p = axis[i-1] > m
            c = axis[i] > m
            if p != c:
                cr += 1
    return float(cr) / (end - start -1)

def window_summary(axis, start, end):
    acf = stattools.acf(axis[start:end])
    acv = stattools.acovf(axis[start:end])
    sqd_error = (axis[start:end] - axis[start:end].mean()) ** 2
    return [
        jitter(axis, start, end),
        mean_crossing_rate(axis, start, end),
        axis[start:end].mean(),
        axis[start:end].std(),
        axis[start:end].var(),
        axis[start:end].min(),
        axis[start:end].max(),
        acf.mean(),
        acf.std(),
        acv.mean(),
        acv.std(),
        skew(axis[start:end]),
        kurtosis(axis[start:end]),
        math.sqrt(sqd_error.mean())
    ]

def features(activity):
    print("Inside features")
    for (start, end) in windows(activity['timestamp']):
        features = []
        for axis in ['X', 'Y', 'Z', 'magnitude']:
            features += window_summary(activity[axis], start, end)
        yield features




In [0]:
#BASE MODEL / DEFAULT FEATURE
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)
base = RandomForestClassifier()
base.fit(X_train, y_train)
base_accuracy = evaluate(base, X_test, y_test)


In [0]:
def model_training():
    print("Model Training ")
    dataset = np.loadtxt('../content/Features.csv', delimiter=",")
    X = dataset[:, 1:]
    y = dataset[:, 0]

    rf = RandomForestClassifier()
    base = DummyClassifier() # generates predictions by respecting the training set's class distribution
    

    results = []
    baselines = []
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
    
    """for i in range(0, 10):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.3)
        rf.fit(X_train, y_train)
        base.fit(X_train, y_train)
        res = rf.score(X_test, y_test)
        bas = base.score(X_test, y_test)
        print ('Loop', i, res, bas)
        results.append(res)
        baselines.append(bas)

    print ('\nBaseline', np.mean(baselines), np.std(baselines))
    print ('Random Forest', np.mean(results), np.std(results))"""



In [0]:
def main():
    print("python main function")
    COLUMNS = ['timestamp','X','Y','Z']
    ACCLR=pd.read_csv('../content/acclr.csv',header=None, names=COLUMNS)[:14850]
    BRAKE=pd.read_csv('../content/brake.csv',header=None, names=COLUMNS)[:14850]
    RTURN=pd.read_csv('../content/rturn.csv',header=None, names=COLUMNS)[:14850]
    LTURN=pd.read_csv('../content/lturn.csv',header=None, names=COLUMNS)[:14850]
    COAST=pd.read_csv('../content/coast.csv',header=None, names=COLUMNS)[:14850]

    #print(ACC.head())
    #print(MGN.head())

    ACCLR['magnitude']=magnitude(ACCLR)
    BRAKE['magnitude']=magnitude(BRAKE)
    RTURN['magnitude']=magnitude(RTURN)
    LTURN['magnitude']=magnitude(LTURN)
    COAST['magnitude']=magnitude(COAST)

    activities = [ACCLR, BRAKE, RTURN, LTURN, COAST]
    with open('/../content/Features.csv','w') as out:
        rows = csv.writer(out)
        for i in range(0, len(activities)):
            for f in features(activities[i]):
                rows.writerow([i] + f)
    
"""
    model_training()"""


if __name__ == '__main__':
    main()


In [0]:
print("Accelerometer: ",(ACCLR.shape))
print("Brake: ",(BRAKE.shape))
print("Right Turn: ",(RTURN.shape))
print("Left Turn: ",(LTURN.shape))
print("Coast: ",(COAST.shape))

print("Feature: ",(dataset.shape))

In [0]:
pd.crosstab(X_test.make.X_test.body_style)

In [0]:
np.unique(y_test,return_counts=True)

(array([0., 1., 2., 3., 4.]), array([58, 52, 61, 70, 56]))

In [0]:
header=["label","X","Y","Z","Mag","X_jitter","X_mean_crossing_rate","X_mean","X_std",
        "X_var","X_min","X_max","X_mean","X_std","X_mean","X_std","X_skew","X_kurtosis","X_RMSE",
       "Y_jitter","Y_mean_crossing_rate","Y_mean","Y_std","Y_var","Y_min","Y_maY","Y_mean","Y_std",
        "Y_mean","Y_std","Y_skew","Y_kurtosis","Y_RMSE","Z_jitter","Z_mean_crossing_rate","Z_mean",
        "Z_std","Z_var","Z_min","Z_maZ","Z_mean","Z_std","Z_mean","Z_std","Z_skew","Z_kurtosis","Z_RMSE",
       "mag_jitter","mag_mean_crossing_rate","mag_mean","mag_std","mag_var","mag_min","mag_mamag","mag_mean",
        "mag_std","mag_mean","mag_std","mag_skew","mag_kurtosis","mag_RMSE"]
dataset = pd.read_csv('../content/Features.csv',header=header, delimiter=",")
dataset.corr()