# One-shot Prediction

- Predict from trained models using selected data

In [None]:
import joblib
import sys
import os
import pandas as pd
import numpy as np
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), "lib"))
from sklearn.utils  import shuffle
from sklearn.metrics import f1_score
import data_prep
import feature_extraction
import anomaly_detection_models

# Loading data

- Load the data that is required

In [None]:
# Base directory
data_loc = os.path.join(os.path.dirname(os.getcwd()), "DATA")

# File names
file_names = {
    0: "machine_ON_ref_overtravel-error_x_neg_axes-extreme_1.csv",
    1: "machine_ON_ref_no-error_3.csv",
    2: "machine_ON_ref_no-error_4.csv",
    3: "machine_ON_ref_no-error_10.csv",
    4: "machine_ON_ref_no-error_11.csv",
    5: "machine_ON_ref_overtravel-error_z_pos_1.csv",
    6: "machine_ON_no-ref_start-error_4.csv",
    7: "machine_ON_ref_overtravel-error_x_neg_axes-extreme_1.csv"
}


segment_secs = 60
# Dont choose "no" and "sample_time" as they will be added later to the beginning
# Chosen - Three different power components for three phases
chosen_cols = ["Power1", "Power2", "Power3", "PowerReac1", "PowerReac2", "PowerReac3", "PowerApp1", "PowerApp2", "PowerApp3"]
segmented_data = {}
for index, file_name in file_names.items():
    path = os.path.join(data_loc, file_name)
    temp = data_prep.segment_data(file_name=path, col_names=chosen_cols, segment_secs=segment_secs)
    # Remove the sample_time col
    temp = temp[:, 1:, :]
    segmented_data[file_name] =  temp


# Associations between the classes and the files in this study
# class_file_association = {
#     "on-ref":[],
#     "on-noref-error": [],
#     "overtravel-x":[],
#     "overtravel-y":[],
#     "overtravel-z":[]
# }
class_file_association = {
    "on-ref": ["machine_ON_ref_no-error_11.csv"],
    #"overtravel": ["machine_ON_ref_overtravel-error_x_neg_axes-extreme_1.csv", "machine_ON_ref_overtravel-error_x_neg_axes-extreme_1.csv", "machine_ON_ref_overtravel-error_z_pos_1.csv"],
    "on-noref-error": ["machine_ON_no-ref_start-error_4.csv"]
}
# Segment and assign to class
class_segmented_data = {}
for class_instance in class_file_association.keys():
    for index, file_name in enumerate(class_file_association[class_instance]):

        if index == 0:
            class_segmented_data[class_instance] = segmented_data[file_name]
        else:
            class_segmented_data[class_instance] = np.append(class_segmented_data[class_instance], segmented_data[file_name], axis=-1)
# Reshape the data appropriately
for class_instance in class_segmented_data.keys():
    class_segmented_data[class_instance] = np.transpose(class_segmented_data[class_instance], (2, 1, 0))

In [None]:
# Feature extraction
class_dataset_features = {}
for class_instance in class_segmented_data.keys():
    dataset_features = []
    for row in class_segmented_data[class_instance]:
        computed_features = []
        for col in row:
            freq_args = [{"axis": 0}, {"axis": 0}, {"axis": 0, "nperseg": 15}]
            freq_time_args = [{"wavelet": "db1"}, {"wavelet": "db1"}, {"wavelet": "db1"}]
            computed_features += feature_extraction.compute_all_features(col, freq_args=freq_args, freq_time_args=freq_time_args)

        # Append to a list
        dataset_features.append(computed_features)

    # Add to class instance
    class_dataset_features[class_instance] = np.array(dataset_features)

In [None]:
sys.stdout.write("After feature extraction process\n\n")
for class_instance in class_dataset_features.keys():

    sys.stdout.write(f'For the class-{class_instance} , the extracted features has the shape={class_dataset_features[class_instance].shape}\n')

In [None]:
# class_label_associations = {
#     "on-ref": 0,
#     "on-noref-error": 1,
#     "overtravel-x": 2,
#     "overtravel-y": 3,
#     "overtravel-z": 4
# }
class_label_associations = {
    "on-ref": 0,
    "on-noref-error": 1,
    "overtravel": 2,
}
for index, class_instance in enumerate(class_dataset_features.keys()):

    temp_X = class_dataset_features[class_instance]
    temp_y = np.repeat(class_label_associations[class_instance], temp_X.shape[0])[:, np.newaxis]

    if index == 0:
        X = temp_X
        y = temp_y
    else:
        X = np.append(X, temp_X, axis=0)
        y = np.append(y, temp_y, axis=0)

# Shuffle the dataset
X, y = shuffle(X, y, random_state=42)
# To a vector format
y =  np.squeeze(y)

sys.stdout.write(f"The final combined shape-{X.shape}\n")

# Loading the models

## Multi-class models

- Predicting the individual classes

In [None]:
import joblib
import os
import pickle
from collections import Counter

models_dir = os.path.join(os.path.dirname(os.getcwd()), "trained_models", "multi_class")
models = os.listdir(models_dir)
model_pipelines = {}

# Loading the model pipelines into dictionary
for model in models:
    model_path = os.path.join(models_dir, model)
    with open(model_path, "rb") as file_handle:
        model_pipelines[model.split(".")[0]] = joblib.load(file_handle)


In [None]:
# Scoring
prediction_counts = {}
scores = {}
for model_name in model_pipelines.keys():
    prediction_counts[model_name] = model_pipelines[model_name].predict(X)
    scores[model_name] = model_pipelines[model_name].score(X, y)

In [None]:
# Get the results - Counts in predictions
for model_name in prediction_counts:
    sys.stdout.write(f"{model_name} - {Counter(prediction_counts[model_name])}\n")

In [None]:
for model_name in scores:
    sys.stdout.write(f"{model_name} = {scores[model_name]}\n")

## Anomaly Detection
- Only two classes
- Anomaly or NOT

In [None]:
models_dir = os.path.join(os.path.dirname(os.getcwd()), "trained_models", "anomaly_detection")
models = os.listdir(models_dir)
model_pipelines = {}

# Loading the model pipelines into dictionary
for model in models:
    model_path = os.path.join(models_dir, model)
    with open(model_path, "rb") as file_handle:
        print(model)
        model_pipelines[model.split(".")[0]] = joblib.load(file_handle)

In [None]:
# The y should be modified
y_mod = np.where(y > 0, 1, 0)
# Scoring
prediction_counts = {}
f1_scores = {}
for model_name in model_pipelines.keys():
    prediction_counts[model_name] = model_pipelines[model_name].predict(X)
    f1_scores[model_name] = f1_score(y_mod, prediction_counts[model_name])

In [None]:
# Get the results - Counts in predictions
for model_name in prediction_counts:
    sys.stdout.write(f"{model_name} - {Counter(prediction_counts[model_name])}\n")

In [None]:
f1_scores