In [1]:
import joblib
import sys
import os
import pandas as pd
import numpy as np
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), "lib"))
from sklearn.utils  import shuffle
import data_prep
import feature_extraction

In [2]:
# Base directory
data_loc = os.path.join(os.path.dirname(os.getcwd()), "DATA")

# File names
file_names = {
    0: "machine_ON_ref_overtravel-error_x_neg_axes-extreme.csv",
    1: "machine_ON_ref_no-error_3.csv",
    2: "machine_ON_ref_no-error_4.csv"
}


segment_secs = 60
# Dont choose "no" and "sample_time" as they will be added later to the beginning
# Chosen - Three different power components for three phases
chosen_cols = ["Power1", "Power2", "Power3", "PowerReac1", "PowerReac2", "PowerReac3", "PowerApp1", "PowerApp2", "PowerApp3"]
segmented_data = {}
for index, file_name in file_names.items():
    path = os.path.join(data_loc, file_name)
    temp = data_prep.segment_data(file_name=path, col_names=chosen_cols, segment_secs=segment_secs)
    # Remove the sample_time col
    temp = temp[:, 1:, :]
    segmented_data[file_name] =  temp


# Associations between the classes and the files in this study
class_file_association = {
    "on-ref": ["machine_ON_ref_no-error_3.csv", "machine_ON_ref_no-error_4.csv"],
    "overtravel-x": ["machine_ON_ref_overtravel-error_x_neg_axes-extreme.csv"]
}
# Segment and assign to class
class_segmented_data = {}
for class_instance in class_file_association.keys():
    for index, file_name in enumerate(class_file_association[class_instance]):

        if index == 0:
            class_segmented_data[class_instance] = segmented_data[file_name]
        else:
            class_segmented_data[class_instance] = np.append(class_segmented_data[class_instance], segmented_data[file_name], axis=-1)
# Reshape the data appropriately
for class_instance in class_segmented_data.keys():
    class_segmented_data[class_instance] = np.transpose(class_segmented_data[class_instance], (2, 1, 0))

In [3]:
# Feature extraction
class_dataset_features = {}
for class_instance in class_segmented_data.keys():
    dataset_features = []
    for row in class_segmented_data[class_instance]:
        computed_features = []
        for col in row:
            freq_args = [{"axis": 0}, {"axis": 0}, {"axis": 0, "nperseg": 30}]
            freq_time_args = [{"wavelet": "db1"}, {"wavelet": "db1"}, {"wavelet": "db1"}]
            computed_features += feature_extraction.compute_all_features(col, freq_args=freq_args, freq_time_args=freq_time_args)

        # Append to a list
        dataset_features.append(computed_features)

    # Add to class instance
    class_dataset_features[class_instance] = np.array(dataset_features)

In [4]:
sys.stdout.write("After feature extraction process\n\n")
for class_instance in class_dataset_features.keys():

    sys.stdout.write(f'For the class-{class_instance} , the extracted features has the shape={class_dataset_features[class_instance].shape}\n')

After feature extraction process

For the class-on-ref , the extracted features has the shape=(1732, 153)
For the class-overtravel-x , the extracted features has the shape=(153, 153)


In [5]:
class_label_associations = {
    "on-ref": 0,
    "overtravel-x": 2
}
for index, class_instance in enumerate(class_dataset_features.keys()):

    temp_X = class_dataset_features[class_instance]
    temp_y = np.repeat(class_label_associations[class_instance], temp_X.shape[0])[:, np.newaxis]

    if index == 0:
        X = temp_X
        y = temp_y
    else:
        X = np.append(X, temp_X, axis=0)
        y = np.append(y, temp_y, axis=0)

# Shuffle the dataset
X, y = shuffle(X, y, random_state=42)
# To a vector format
y =  np.squeeze(y)

sys.stdout.write(f"The final combined shape-{X.shape}\n")

The final combined shape-(1885, 153)


Load the models

In [6]:
import joblib
import os
import pickle
from collections import Counter

models_dir = os.path.join(os.path.dirname(os.getcwd()), "trained_models")
models = os.listdir(models_dir)
loaded_models = {}

for model in models:

    if model.split(".")[-1] == "joblib":
          model_path = os.path.join(models_dir, model)
          with open(model_path, "rb") as file_handle:
              loaded_models[model.split(".")[0]] = joblib.load(file_handle)
    else:
        preprocessing_path = os.path.join(models_dir, model)
        with open(preprocessing_path , "rb") as file_handle:
            scaler = pickle.load(file_handle)

In [7]:
# Preprocess the data
X = scaler["object"].transform(X)

prediction_counts = {}
scores = {}
for model_name in loaded_models:
    prediction_counts[model_name] = loaded_models[model_name].predict(X)
    scores[model_name] = loaded_models[model_name].score(X, y)

In [8]:
for model_name in prediction_counts:
    sys.stdout.write(f"{model_name} - {Counter(prediction_counts[model_name])}\n")

RandomForestClassifier - Counter({0: 1733, 4: 152})
SVC - Counter({0: 1729, 4: 152, 2: 4})
LogisticRegression - Counter({0: 1729, 4: 101, 2: 54, 3: 1})
BaggingClassifier - Counter({0: 1728, 4: 153, 3: 4})
KNeighborsClassifier - Counter({0: 1725, 4: 89, 2: 52, 3: 18, 1: 1})
DecisionTreeClassifier - Counter({0: 1726, 4: 153, 2: 4, 3: 2})


In [9]:
for model_name in scores:
    sys.stdout.write(f"{model_name} = {scores[model_name]}\n")

RandomForestClassifier = 0.9188328912466843
SVC = 0.9177718832891246
LogisticRegression = 0.9448275862068966
BaggingClassifier = 0.916710875331565
KNeighborsClassifier = 0.9395225464190982
DecisionTreeClassifier = 0.9156498673740053
