In [None]:
# To enable faster auto-complete
%config Completer.use_jedi = False

In [None]:
import os
import sys
import numpy as np
import pandas as pd
import math
import copy
import re
from sklearn.preprocessing import StandardScaler

# Data Preparation

Two different dataset was created for model development

- Axis detection model 

- Feed rate prediction model

Both of them use the same set of features. The features list can be modified as required.

In [None]:
def check_create_dir(dir_name):
    if os.path.isdir(os.path.join(os.getcwd(), "model_data", dir_name)):
        pass
    else:
        os.makedirs(os.path.join(os.getcwd(), "model_data", dir_name))

In [None]:
# Segmentation parameters
overlap_rate = 0.50
segment_seconds = 5
window_size = segment_seconds * 17
save_dir_name = "axis"
normalization_required = True

In [None]:
# The data file location
base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
dir_loc = os.path.join(base_dir, "data", "establishing_baseline")

# The columns to read from data set
cols_list = ["power_active_all", "power_apparent_all", "power_reactive_all", "Feed_Rate"]

## Axis detection model

In [None]:
for file in os.listdir(dir_loc):
    # Get the file name
    file_name = os.path.join(dir_loc, file, "dataset_model.csv")
    
    # Open the file using pandas
    df = pd.read_csv(file_name, header="infer", sep=",",  usecols=cols_list)
    df = df.dropna()
    df = df[df['Feed_Rate'] != 0]
    df = df[cols_list[0:-1]]
    df = df.reset_index(drop=True)
    
    if normalization_required:
        scaler = StandardScaler().fit(df)
        temp = scaler.transform(df)
        df = pd.DataFrame(temp, columns=cols_list[0:-1])
    
    # Get the total number of rows to segment
    sample_points = math.floor(df.index[-1])
    
    starting_points = np.arange(0, sample_points, int(window_size * (1 - overlap_rate))).astype('uint32')
    for index, i in enumerate(starting_points):
        
        # Start the segmentation process
        if (i + window_size) < sample_points:
            segmented_points = copy.deepcopy(df.iloc[i : (i + window_size)])
            segmented_points = segmented_points.to_numpy()
            # Appending to numpy arrays
            if index == 0:
                segmented_data = segmented_points[np.newaxis, :, :]
            else:
                segmented_data = np.append(segmented_data, segmented_points[np.newaxis, :, :], axis=0)
                
    # Save the file
    check_create_dir(save_dir_name)
    save_file_name = file[0:5] + "_af" + "_ov" + str(overlap_rate) + "_w" + str(window_size) + "_n" + str(normalization_required)
    np.save(os.path.join(os.getcwd(), "model_data", save_dir_name, save_file_name), segmented_data)
    
    # Display output for completion
    sys.stdout.write(f"Completed processing - {file}\n")
    sys.stdout.write(f"The shape of the data is {segmented_data.shape}\n")

## Feed rate prediction

In [None]:
save_dir_name = "feedrate"

for file in os.listdir(dir_loc):
    # Get the file name
    file_name = os.path.join(dir_loc, file, "dataset_model.csv")
    
    # Open the file using pandas
    df = pd.read_csv(file_name, header="infer", sep=",", usecols=cols_list)
    df = df.dropna()
    df = df[df["Feed_Rate"] != 0]
    df = df.reset_index(drop=True)
    # Taking out the feed rate 
    df_feed = df[cols_list[-1]]
    df = df[cols_list[0:-1]]
    
    # Normalizing the dataset
    if normalization_required:
        scaler = StandardScaler().fit(df)
        temp = scaler.transform(df)
        df = pd.DataFrame(temp, columns=cols_list[0:-1])
    # Putting the feed rate back in
    df["Feed_Rate"] = df_feed
        
    # Grouping by the feed rates
    df_group = df.groupby('Feed_Rate')
    feed_rates = list(df_group.groups)
    
    # For each group get the appropriate data
    data_dict = {}
    for feed_rate in feed_rates:
        
        # Segment the data within each feed rate
        # Get the grouped data into a dict
        temp = df_group.get_group(feed_rate)
        temp = temp.reset_index(drop=True)
        df = temp[cols_list[0:-1]]

        # Get the total number of rows to segment
        sample_points = math.floor(df.index[-1])
    
        starting_points = np.arange(0, sample_points, int(window_size * (1 - overlap_rate))).astype('uint32')
        for index, i in enumerate(starting_points):

            # Start the segmentation process
            if (i + window_size) < sample_points:
                segmented_points = copy.deepcopy(df.iloc[i : (i + window_size)])
                segmented_points = segmented_points.to_numpy()
                # Appending to numpy arrays
                if index == 0:
                    segmented_data = segmented_points[np.newaxis, :, :]
                else:
                    segmented_data = np.append(segmented_data, segmented_points[np.newaxis, :, :], axis=0)

        data_dict[feed_rate] = segmented_data
        
    # Save the data for each axis seperately
    check_create_dir(save_dir_name)
    save_file_name = file[0:5] + "_feeddict" + "_ov" + str(overlap_rate) + "_w" + str(window_size) + "_n" + str(normalization_required)
    np.save(os.path.join(os.getcwd(), "model_data", save_dir_name, save_file_name), data_dict)
        

## Multi-output [Axis and Feed rate]

In [None]:
# Segmentation parameters
overlap_rate = 0.50
segment_seconds = 5
window_size = segment_seconds * 17
normalization_required = True

In [None]:
save_dir_name = "multi_output_axisfr"

# Class association for axis
axis = {"xaxis": 0, "yaxis": 1, "zaxis": 2, "baxis": 3, "caxis": 4}

for file in os.listdir(dir_loc):
    # Get the file name
    file_name = os.path.join(dir_loc, file, "dataset_model.csv")
    
    # Open the file using pandas
    df = pd.read_csv(file_name, header="infer", sep=",", usecols=cols_list)
    df = df.dropna()
    df = df[df["Feed_Rate"] != 0]
    df = df.reset_index(drop=True)
    # Taking out the feed rate 
    df_feed = df[cols_list[-1]]
    df = df[cols_list[0:-1]]
    
    # Normalizing the dataset
    if normalization_required:
        scaler = StandardScaler().fit(df)
        temp = scaler.transform(df)
        df = pd.DataFrame(temp, columns=cols_list[0:-1])
    # Putting the feed rate back in
    df["Feed_Rate"] = df_feed
    
    # Grouping by the feed rates
    df_group = df.groupby('Feed_Rate')
    feed_rates = list(df_group.groups)
    
    # For each group get the appropriate data
    data_dict = {}
    for feed_rate in feed_rates:
        
        # Segment the data within each feed rate
        # Get the grouped data into a dict
        temp = df_group.get_group(feed_rate)
        temp = temp.reset_index(drop=True)
        df = temp[cols_list[0:-1]]

        # Get the total number of rows to segment
        sample_points = math.floor(df.index[-1])
    
        starting_points = np.arange(0, sample_points, int(window_size * (1 - overlap_rate))).astype('uint32')
        for index, i in enumerate(starting_points):

            # Start the segmentation process
            if (i + window_size) < sample_points:
                segmented_points = copy.deepcopy(df.iloc[i : (i + window_size)])
                segmented_points = segmented_points.to_numpy()
                # Appending to numpy arrays
                if index == 0:
                    segmented_data = segmented_points[np.newaxis, :, :]
                else:
                    segmented_data = np.append(segmented_data, segmented_points[np.newaxis, :, :], axis=0)
        
        # Create the class id based on the type of axis
        temp = [True if re.search(file[0:5] + "*", x) else False for x in axis.keys()]
        class_id = [val for i, val in zip(temp, axis.values()) if i][0]

        data_dict[(class_id, feed_rate)] = segmented_data
        
    # Save the data for each axis seperately
    check_create_dir(save_dir_name)
    save_file_name = file[0:5] + "_axisfeeddict" + "_ov" + str(overlap_rate) + "_w" + str(window_size) + "_n" + str(normalization_required)
    np.save(os.path.join(os.getcwd(), "model_data", save_dir_name, save_file_name), data_dict)


# Rough Work

In [None]:
# The data file location
base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))
dir_loc = os.path.join(base_dir, "data", "establishing_baseline")

# The columns to read from data set
cols_list = ["power_active_all", "power_apparent_all", "power_reactive_all", "Feed_Rate"]

In [None]:
file = "xaxis_20-20-980_A50"
# Get the file name
file_name = os.path.join(dir_loc, file, "dataset_model.csv")

# Open the file using pandas
df = pd.read_csv(file_name, header="infer", sep=",",  usecols=cols_list)
df = df.dropna()

In [None]:
df = df[df["Feed_Rate"] != 0]

In [None]:
df = df[cols_list[0:]].reset_index(drop=True)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler().fit(df)
a = scaler.transform(df)
df = pd.DataFrame(a, columns=cols_list[0:-1])

In [None]:
dfg = df.groupby(by="Feed_Rate")

In [None]:
dfg.get_group(20)

In [None]:
df["Something"] = 0

In [None]:
df["Feed_Rate"]