In [1]:
import numpy as np
import scipy.io as sio
from sklearn.utils import shuffle
import pathlib
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt

import h5py
import zipfile

%matplotlib inline
# %load_ext autoreload
# %autoreload 2

In [None]:
# download zip file from NASA Prognostic Data Repository, and the labeled tool class data, so that it
# can be run on google colab
# https://ti.arc.nasa.gov/tech/dash/groups/pcoe/prognostic-data-repository/
!wget 'https://ti.arc.nasa.gov/m/project/prognostic-repository/mill.zip'
!wget 'https://raw.githubusercontent.com/tvhahn/ml-tool-wear/master/labels_with_tool_class.csv'

In [3]:
# data_folder = Path(r'C:\_Python\_milling\data\raw') # windows link
data_file = Path('mill.zip') # linux link
# data_file = data_folder / 'mill.zip'

with zipfile.ZipFile(data_file, 'r') as zip_ref:
    zip_ref.extractall('')

# load the data from the matlab file
m = sio.loadmat('mill.mat',struct_as_record=True)

# store the 'mill' data in a seperate np array
data = m['mill']

# store the field names in the data np array in a tuple, l
l = data.dtype.names

# create empty dataframe for the labels
df_labels = pd.DataFrame()

# get the labels from the original .mat file and put in dataframe
for i in range(7):
    # list for storing the label data for each field
    x = []
    
    # iterate through each of the unique cuts
    for j in range(167):
        x.append(data[0,j][i][0][0])
    x = np.array(x)
    df_labels[str(i)] = x

# add column names to the dataframe
df_labels.columns = l[0:7]
    
# create a column with the unique cut number
df_labels['cut_no'] = [i for i in range(167)]

df_labels.head()

Unnamed: 0,case,run,VB,time,DOC,feed,material,cut_no
0,1,1,0.0,2,1.5,0.5,1,0
1,1,2,,4,1.5,0.5,1,1
2,1,3,,6,1.5,0.5,1,2
3,1,4,0.11,7,1.5,0.5,1,3
4,1,5,,11,1.5,0.5,1,4


In [None]:
current_dir = Path.cwd()
processed_data = current_dir.parent.parent / 'src' 
processed_data

In [None]:
(X_train, y_train, 
X_train_slim, y_train_slim,
X_val, y_val,
X_val_slim, y_val_slim,
X_test,y_test) = data_transforms.load_train_test(processed_data) 

In [None]:
X_train.shape

In [None]:
plt.plot(X_train[0])

In [None]:
test_signal = X_train[0,:,0]
plt.plot(test_signal)

In [None]:
# s, _, sub_s = np.shape(X_train)
# print(np.shape(X_train))
# print(s,sub_s)

# for i in range(s):
#     for j in range(sub_s):
#         X_train[i,:,j] = np.interp(X_train[i,:,j], (min_vals[j], max_vals[j]), (0, 1))

In [None]:
def scaler(x, min_val_array, max_val_array, lower_norm_val, upper_norm_val):
    
    # get the shape of the array
    s, _, sub_s = np.shape(x)
    
    for i in range(s):
        for j in range(sub_s):
            x[i,:,j] = np.interp(x[i,:,j], 
                                 (min_val_array[j], max_val_array[j]), 
                                 (lower_norm_val, upper_norm_val))
            
    return x
    
def get_min_max(x):

    # flatten the input array http://bit.ly/2MQuXZd
    flat_vector = np.concatenate(x)

    min_vals = np.min(flat_vector,axis=0)
    max_vals = np.max(flat_vector,axis=0)

    return min_vals, max_vals


In [None]:
min_vals, max_vals = get_min_max(X_train)
print(min_vals)
print(max_vals)

X_sample = X_train[0:2]
X_sample = scaler(X_sample, min_vals, max_vals, 0, 1)


In [None]:
plt.plot(X_sample[0])

In [None]:
plt.plot(X_train[0])

However, there is a problem when the signal is out of the lower/upper scaling range:

In [None]:
X_val = scaler(X_val, min_vals, max_vals, 0, 0.6)
min_vals_validation, max_vals_validation = get_min_max(X_val)
print(min_vals_validation)
print(max_vals_validation)

X_sample = scaler(X_val, min_vals, max_vals, 0, 1)
plt.plot(X_sample[0])

## Make scaler that doesn't truncate

In [None]:
(X_train, y_train, 
X_train_slim, y_train_slim,
X_val, y_val,
X_val_slim, y_val_slim,
X_test,y_test) = data_transforms.load_train_test(processed_data) 

In [None]:
def scaler(x, min_val_array, max_val_array):
    
    # get the shape of the array
    s, _, sub_s = np.shape(x)
    
    for i in range(s):
        for j in range(sub_s):
            x[i,:,j] = np.divide((x[i,:,j] - min_val_array[j]), np.abs(max_val_array[j] - min_val_array[j]))
           
    return x
    

In [None]:
min_vals, max_vals = get_min_max(X_train)
print(min_vals)
print(max_vals)

X_sample = X_train
X_sample = scaler(X_sample, min_vals, max_vals)

In [None]:
min_vals, max_vals = get_min_max(X_sample)
print(min_vals)
print(max_vals)

In [None]:
X_sample = X_train[0,:,0]
plt.plot(X_sample)

In [None]:
X_val = scaler(X_test, min_vals, max_vals)

min_vals_validation, max_vals_validation = get_min_max(X_val)
print(min_vals_validation)
print(max_vals_validation)

# Make train/val/test set lists

In [None]:
df_labels = pd.read_csv('labels_with_tool_class.csv',index_col=False)
df_labels.head()

In [None]:
cut_numbers = np.arange(0,167)
cut_numbers

In [None]:
# delete certain cut_numbers that we know are erroneous
# cut_no to delete
cuts_to_delete = np.array([17, 94])

y = df_labels['tool_class'].to_numpy()
y = np.delete(y,cuts_to_delete)
print(y.shape)
X = np.delete(cut_numbers,cuts_to_delete)
print(X.shape)
X

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=15, stratify=y)

print(cut_no_test.shape, y_test.shape)

X_val, X_test, y_val, y_test = train_test_split(X_test, y_test, test_size=0.5, 
                                                random_state=42, 
                                                stratify=y_test)

In [None]:
cuts_with_failure = [60,70,93,114,140,141,142,143,144,152,153,160]

print('X_train cuts with failures:')
for i in X_train:
    if i in cuts_with_failure:
        print(i)

print('X_val cuts with failures:')
for i in X_val:
    if i in cuts_with_failure:
        print(i)
        
print('X_test cuts with failures:')
for i in X_test:
    if i in cuts_with_failure:
        print(i)