# 

### Importing required libraries : 

In [1]:
import math
import time

import numpy as np
import pandas as pd
import sklearn
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split, cross_val_score

import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.backend as K
from tensorflow.keras.layers import Activation, Dense, Dropout, LSTM
from tensorflow.keras.models import Sequential, load_model

import seaborn as sns
import matplotlib.pyplot as plt
from pylab import rcParams
from tqdm import tqdm
import xgboost

In [2]:
#setting a seed
np.random.seed(42)

In [3]:
pwd

'/home/yash/projects/prognostics_thesis'

In [4]:
ls -g CMAPSSData/

total 44324
-rw-r--r-- 1 yash   434158 Oct  3  2008 'Damage Propagation Modeling.pdf'
-rw-r--r-- 1 yash     2442 Oct  3  2008  readme.txt
-rw-r--r-- 1 yash      429 Oct  2  2008  RUL_FD001.txt
-rw-r--r-- 1 yash     1110 Oct  2  2008  RUL_FD002.txt
-rw-r--r-- 1 yash      428 Oct  2  2008  RUL_FD003.txt
-rw-r--r-- 1 yash     1084 Oct  2  2008  RUL_FD004.txt
-rw-r--r-- 1 yash  2228855 Mar 25  2008  test_FD001.txt
-rw-r--r-- 1 yash  5734587 Sep 17  2008  test_FD002.txt
-rw-r--r-- 1 yash  2826651 Mar 25  2008  test_FD003.txt
-rw-r--r-- 1 yash  6957759 Sep 17  2008  test_FD004.txt
-rw-r--r-- 1 yash  3515356 Mar 25  2008  train_FD001.txt
-rw-r--r-- 1 yash  9082480 Sep 17  2008  train_FD002.txt
-rw-r--r-- 1 yash  4213862 Mar 25  2008  train_FD003.txt
-rw-r--r-- 1 yash 10350705 Sep 17  2008  train_FD004.txt


In [5]:
#Loading Dataset files into the notebook: 

train_FD001 = pd.read_csv('./CMAPSSData/train_FD001.txt', sep = " ", header = None)
train_FD002 = pd.read_csv('./CMAPSSData/train_FD002.txt', sep = " ", header = None)
train_FD003 = pd.read_csv('./CMAPSSData/train_FD003.txt', sep = " ", header = None)
train_FD004 = pd.read_csv('./CMAPSSData/train_FD004.txt', sep = " ", header = None)

test_FD001 = pd.read_csv('./CMAPSSData/test_FD001.txt', sep = " ", header = None)
test_FD002 = pd.read_csv('./CMAPSSData/test_FD002.txt', sep = " ", header = None)
test_FD003 = pd.read_csv('./CMAPSSData/test_FD003.txt', sep = " ", header = None)
test_FD004 = pd.read_csv('./CMAPSSData/test_FD004.txt', sep = " ", header = None)

In [6]:
#Description of first dataset file : 
train_FD001.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,26,27
count,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,...,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,0.0,0.0
mean,51.506568,108.807862,-9e-06,2e-06,100.0,518.67,642.680934,1590.523119,1408.933782,14.62,...,8143.752722,8.442146,0.03,393.210654,2388.0,100.0,38.816271,23.289705,,
std,29.227633,68.88099,0.002187,0.000293,0.0,0.0,0.500053,6.13115,9.000605,1.7764e-15,...,19.076176,0.037505,1.3878120000000003e-17,1.548763,0.0,0.0,0.180746,0.108251,,
min,1.0,1.0,-0.0087,-0.0006,100.0,518.67,641.21,1571.04,1382.25,14.62,...,8099.94,8.3249,0.03,388.0,2388.0,100.0,38.14,22.8942,,
25%,26.0,52.0,-0.0015,-0.0002,100.0,518.67,642.325,1586.26,1402.36,14.62,...,8133.245,8.4149,0.03,392.0,2388.0,100.0,38.7,23.2218,,
50%,52.0,104.0,0.0,0.0,100.0,518.67,642.64,1590.1,1408.04,14.62,...,8140.54,8.4389,0.03,393.0,2388.0,100.0,38.83,23.2979,,
75%,77.0,156.0,0.0015,0.0003,100.0,518.67,643.0,1594.38,1414.555,14.62,...,8148.31,8.4656,0.03,394.0,2388.0,100.0,38.95,23.3668,,
max,100.0,362.0,0.0087,0.0006,100.0,518.67,644.53,1616.91,1441.49,14.62,...,8293.72,8.5848,0.03,400.0,2388.0,100.0,39.43,23.6184,,


Since the last two columns are empty, they are removed from the dataset.

In [7]:
train_FD001.drop(columns = [26, 27], inplace  = True)
train_FD002.drop(columns = [26, 27], inplace  = True)
train_FD003.drop(columns = [26, 27], inplace  = True)
train_FD004.drop(columns = [26, 27], inplace  = True)

test_FD001.drop(columns = [26, 27], inplace  = True)
test_FD002.drop(columns = [26, 27], inplace  = True)
test_FD003.drop(columns = [26, 27], inplace  = True)
test_FD004.drop(columns = [26, 27], inplace  = True)

Labelling every column with the representative parameter : 

In [8]:
column_labels = ['unit_number', 'time_in_cycles', 'setting_1', 'setting_2', 'TRA', 'T2', 'T24', 'T30', 'T50', 'P2', 'P15', 'P30', 'Nf', 'Nc', 'epr', 'Ps30', 'phi', 'NRf', 'NRc', 'BPR', 'farB', 'htBleed', 'Nf_dmd', 'PCNfR_dmd', 'W31', 'W32']

In [9]:
#Adding labels to all training sets :
train_FD001.columns = column_labels
train_FD002.columns = column_labels
train_FD003.columns = column_labels
train_FD004.columns = column_labels

test_FD001.columns = column_labels
test_FD002.columns = column_labels
test_FD003.columns = column_labels
test_FD004.columns = column_labels

In [10]:
train_FD001.describe()

Unnamed: 0,unit_number,time_in_cycles,setting_1,setting_2,TRA,T2,T24,T30,T50,P2,...,phi,NRf,NRc,BPR,farB,htBleed,Nf_dmd,PCNfR_dmd,W31,W32
count,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,...,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0,20631.0
mean,51.506568,108.807862,-9e-06,2e-06,100.0,518.67,642.680934,1590.523119,1408.933782,14.62,...,521.41347,2388.096152,8143.752722,8.442146,0.03,393.210654,2388.0,100.0,38.816271,23.289705
std,29.227633,68.88099,0.002187,0.000293,0.0,0.0,0.500053,6.13115,9.000605,1.7764e-15,...,0.737553,0.071919,19.076176,0.037505,1.3878120000000003e-17,1.548763,0.0,0.0,0.180746,0.108251
min,1.0,1.0,-0.0087,-0.0006,100.0,518.67,641.21,1571.04,1382.25,14.62,...,518.69,2387.88,8099.94,8.3249,0.03,388.0,2388.0,100.0,38.14,22.8942
25%,26.0,52.0,-0.0015,-0.0002,100.0,518.67,642.325,1586.26,1402.36,14.62,...,520.96,2388.04,8133.245,8.4149,0.03,392.0,2388.0,100.0,38.7,23.2218
50%,52.0,104.0,0.0,0.0,100.0,518.67,642.64,1590.1,1408.04,14.62,...,521.48,2388.09,8140.54,8.4389,0.03,393.0,2388.0,100.0,38.83,23.2979
75%,77.0,156.0,0.0015,0.0003,100.0,518.67,643.0,1594.38,1414.555,14.62,...,521.95,2388.14,8148.31,8.4656,0.03,394.0,2388.0,100.0,38.95,23.3668
max,100.0,362.0,0.0087,0.0006,100.0,518.67,644.53,1616.91,1441.49,14.62,...,523.38,2388.56,8293.72,8.5848,0.03,400.0,2388.0,100.0,39.43,23.6184
