# CAN BUS Machine Learning

### Imports

In [2]:
import pandas as pd
from  sklearn.preprocessing  import StandardScaler
from sklearn.linear_model import LogisticRegression
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report


### Training Code

In [3]:

# turn of warning messages
pd.options.mode.chained_assignment = None  # default='warn'

# get data
df = pd.read_csv('trimmedMTrainingData.csv')
#df
# get features and corresponding outcomes
feature_names = ['can_id', 'can_data_byte_0', 'can_data_byte_1', 'can_data_byte_2', 'can_data_byte_3', 'can_data_byte_4',
                 'can_data_byte_5', 'can_data_byte_6', 'can_data_byte_7']
training_features = df[feature_names]

outcome_name = ['data_type']
outcome_labels = df[outcome_name]
# list down features based on type
numeric_feature_names = ['can_data_byte_0', 'can_data_byte_1', 'can_data_byte_2', 'can_data_byte_3', 'can_data_byte_4',
                 'can_data_byte_5', 'can_data_byte_6', 'can_data_byte_7']
categoricial_feature_names = ['can_id']
ss = StandardScaler()

# fit scaler on numeric features
ss.fit(training_features[numeric_feature_names])

# scale numeric features now
training_features[numeric_feature_names] = ss.transform(training_features[numeric_feature_names])
training_features = pd.get_dummies(training_features, columns=categoricial_feature_names)

# fit the model
lr = LogisticRegression() 
model = lr.fit(training_features, (outcome_labels['data_type']))

categorical_engineered_features = list(set(training_features.columns) - set(numeric_feature_names))
# simple evaluation on training data
pred_labels = model.predict(training_features)
actual_labels = np.array(outcome_labels['data_type'])

print('Accuracy:', float(accuracy_score(actual_labels, pred_labels))*100, '%')
print('Classification Stats:')
print(classification_report(actual_labels, pred_labels))

Accuracy: 92.50690597767573 %
Classification Stats:
             precision    recall  f1-score   support

          0       0.99      0.26      0.41     69588
          1       0.92      1.00      0.96    623290

avg / total       0.93      0.93      0.90    692878



### Testing Code

In [7]:
Average_Sum = 0
Max_Tests = 40

for i in range(0,Max_Tests):
    data_name = "trimmedMTestData_" + str(i) + ".csv"
     #turn of warning messages
    pd.options.mode.chained_assignment = None  # default='warn'

    # get data
    df = pd.read_csv(data_name)
    #df
    # get features and corresponding outcomes
    feature_names = ['can_id', 'can_data_byte_0', 'can_data_byte_1', 'can_data_byte_2', 'can_data_byte_3', 'can_data_byte_4',
                 'can_data_byte_5', 'can_data_byte_6', 'can_data_byte_7']
    training_features = df[feature_names]

    outcome_name = ['data_type']
    outcome_labels = df[outcome_name]
    # list down features based on type
    numeric_feature_names = ['can_data_byte_0', 'can_data_byte_1', 'can_data_byte_2', 'can_data_byte_3', 'can_data_byte_4',
                 'can_data_byte_5', 'can_data_byte_6', 'can_data_byte_7']
    categoricial_feature_names = ['can_id']
    ss = StandardScaler()

    # fit scaler on numeric features
    ss.fit(training_features[numeric_feature_names])

    # scale numeric features now
    training_features[numeric_feature_names] = ss.transform(training_features[numeric_feature_names])
    training_features = pd.get_dummies(training_features, columns=categoricial_feature_names)
    categorical_engineered_features = list(set(training_features.columns) - set(numeric_feature_names))
    # simple evaluation on training data
    pred_labels = model.predict(training_features)
    actual_labels = np.array(outcome_labels['data_type'])

    Average_Sum += (float(accuracy_score(actual_labels, pred_labels)))
Total_Average = (Average_Sum / Max_Tests) * 100
print(Total_Average)

92.57666573336142
