In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [34]:
import os

In [2]:
from sklearn.preprocessing import StandardScaler

In [3]:
cols = ['time','value']

In [27]:
df1 = pd.read_csv("./data/a1.filtered.csv",
                  names=cols, header=0) # ON
df2 = pd.read_csv("./data/a2.filtered.csv",
                 names=cols, header=0) # OFF
df3 = pd.read_csv("./data/a3.filtered.csv",
                 names=cols, header=0) # RIGHT
df4 = pd.read_csv("./data/a4.filtered.csv",
                 names=cols, header=0) # LEFT

In [173]:
print('importing dependencies..')
from sklearn.linear_model import LinearRegression, LogisticRegressionCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm

importing dependencies..


In [208]:
print('importing dependencies..')
from sklearn.linear_model import LinearRegression, LogisticRegressionCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.model_selection import KFold, cross_val_score

from sklearn.preprocessing import StandardScaler

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tqdm import tqdm

importing dependencies..


In [209]:
class Base_Score(object):
    
    def __init__(self, root = "./data/", steps=3, shuffle=True, scoring='accuracy', attr='filtered', seed=101):        
        self.models = [
                        ('log_r', LogisticRegressionCV()),
                        ('dt', DecisionTreeClassifier()),
                        ('rf', RandomForestClassifier()),
                        ('nb', GaussianNB()),
                        ('knn', KNeighborsClassifier()),
                        ('lda', LinearDiscriminantAnalysis())
                    ]
        self._seed_ = seed
        self._root_ = root
        self._attr_= attr
        self._shuffle_ = shuffle
        self._scoring_ = scoring
        self._steps_ = steps
        self.X = []
        self.y = []
    
    def create_data_feature_steps(self, data, steps=3):
        '''
        Creating steps feature from data file
        '''
        new_df = []
        for i in range(steps, data.shape[0]):
            new_df = np.append(new_df,
                               data[i-steps: steps+i])
        new_df_cols = ['col'+str(i) for i in range(steps)]
        new_df = pd.DataFrame(new_df.reshape(-1,steps),
                     columns=new_df_cols)
        return new_df
    
    def preprocess_data(self, data):
        '''
        Dropping the 'time' column, Scaling and creating features steps 
        '''
        data = data.drop('time', axis=1)
        data = StandardScaler().fit_transform(data)
        data = create_data_feature_steps(data, steps=_steps_)
        return data
    
    def read_multiple_files(path=root):
        '''
        Reading Multiple files and creating the final dataset.
        '''

        cols = ['time','value']

        all_files = os.listdir(path)

        data = pd.DataFrame()

        for f in tqdm(all_files):
            if f.split('.')[1]==self._attr_:

                # reading csv file
                df = pd.read_csv(os.path.join(path, f),
                                 names=cols, header=0)

                # preprocessing data with steps
                df = preprocess_data(df, 3)

                # adding labels
                labels = np.array([list(
                                    f.split('.')[0]
                                    )[1]
                               ]*df.shape[0]).astype(int)

                df['label'] = labels

                # appending df to main data
                data = data.append(df)

        # shuffling
        if self._shuffle_:
            data = data.sample(frac=1).reset_index(drop=True)

        
        return data.drop('label', axis=1), data['label']
    

    
    def run_read_files(self):
        print('Reading files..')
        self.X, self.y = read_multiple_files()

    def plot_base_model_score(self):
            
        def get_model_score(X, y, estimator, name):
            print('*'*15, name, '*'*15)

            print('Init folds...')
            # fold
            kfold = KFold(10, True, random_state=self._seed_)

            print('Training...')
            model_score = cross_val_score(estimator, X, y,
                            cv=kfold,
                            verbose=1,
                            scoring=self._scoring_)

            print('Evaluation:')

            result = 'MEAN: %.3f \nSTD: (%.3f)' %(model_score.mean(), model_score.std())
            print(result)
            print( '\n')


            return model_score.mean()

        scores = []
        names = []
        for name, model in self.models:
            scores.append(get_model_score(self.X, self.y, model, name))
            names.append(name)


        plt.figure(figsize=(10, 6))
        plt.bar(np.array(range(len(scores))), scores)
        plt.xticks(np.array(range(len(scores))), names)
        plt.show()

        return scores

In [210]:
final = Base_Score()

In [None]:
def predict(path='nb_01.pkl', pred_file):
    import pickle
    from sklearn.metrics import confusion_matrix, classification_report
    model_pkl = open(path, 'rb')
    saved_model = pickle.load(model_pkl)
    print("Loading model :: ", saved_model)
    saved_model.predict()

In [33]:
root = "./data/"

In [29]:
def create_data_feature_steps(data, steps=3):
    new_df = []
    for i in range(steps, data.shape[0]):
        new_df = np.append(new_df,
                           data[i-steps: steps+i])
    new_df_cols = ['col'+str(i) for i in range(steps)]
    new_df = pd.DataFrame(new_df.reshape(-1,steps),
                 columns=new_df_cols)
    return new_df

In [30]:
def preprocess_data(data, steps=3):
    data = data.drop('time', axis=1)
    data = StandardScaler().fit_transform(data)
    data = create_data_feature_steps(data, steps=steps)
    return data

In [232]:
def read_multiple_files(path=root, attr='filtered', shuffle=True):
    
    cols = ['time','value']
    
    all_files = os.listdir(path)
    
    data = pd.DataFrame()
    
    for f in all_files:
        if f.split('.')[1]==attr:
            
            # reading csv file
            df = pd.read_csv(os.path.join(path, f),
                             names=cols, header=0)
            
            # preprocessing data with steps
            df = preprocess_data(df, 3)
            
            # adding labels
            labels = np.array([list(
                                f.split('.')[0]
                                )[1]
                           ]*df.shape[0]).astype(int)
            
            df['label'] = labels
            
            # appending df to main data
            data = data.append(df)
    
    # shuffling
    if shuffle:
        data = data.sample(frac=1).reset_index(drop=True)
            
        
    return data

In [233]:
df = read_multiple_files()

In [234]:
X, y = data.drop('label', axis=1), data['label']

In [219]:
gnb_final = GaussianNB()

In [221]:
gnb_final.fit(X, y)

GaussianNB(priors=None, var_smoothing=1e-09)

In [237]:
import pickle
from sklearn.metrics import classification_report

In [224]:
pickle_file_name = 'nb_02.pkl'
pickle_file = open(pickle_file_name, mode='wb')
pickle.dump(gnb_final, pickle_file)
pickle_file.close()

In [229]:
# loading pickle file
model_pkl = open(pickle_file_name, 'rb')
saved_model = pickle.load(model_pkl)
print("Loading model :: ", saved_model)

Loading model ::  GaussianNB(priors=None, var_smoothing=1e-09)


In [253]:
data_test = data.sample(frac=0.3, random_state=101)
X_test, y_test = data_test.drop('label', axis=1), data_test['label']

In [254]:
print(classification_report(y_test, saved_model.predict(X_test)))

  'precision', 'predicted', average, warn_for)


              precision    recall  f1-score   support

           1       0.00      0.00      0.00      3706
           2       0.29      1.00      0.45      4270
           3       0.00      0.00      0.00      3431
           4       0.00      0.00      0.00      3330

   micro avg       0.29      0.29      0.29     14737
   macro avg       0.07      0.25      0.11     14737
weighted avg       0.08      0.29      0.13     14737

