In [2]:
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PolynomialFeatures
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np
from tsfresh import extract_features
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.utilities.dataframe_functions import roll_time_series
from tsfresh.feature_selection.relevance import calculate_relevance_table

In [3]:
data3_train=r"C:\Users\65962\Desktop\JUPYTER\CMAPSSData\train_FD003.txt"

columns = [
    'unit_number', 'time_in_cycles', 'op_setting_1', 'op_setting_2', 'op_setting_3',
] + [f'sensor_{i}' for i in range(1, 22)]  # 26 columns in total

data3_traindf= pd.read_csv(data3_train, delim_whitespace=True, header=None, names=columns)

# Display the first few rows of the dataset
print(data3_traindf.head())

   unit_number  time_in_cycles  op_setting_1  op_setting_2  op_setting_3  \
0            1               1       -0.0005        0.0004         100.0   
1            1               2        0.0008       -0.0003         100.0   
2            1               3       -0.0014       -0.0002         100.0   
3            1               4       -0.0020        0.0001         100.0   
4            1               5        0.0016        0.0000         100.0   

   sensor_1  sensor_2  sensor_3  sensor_4  sensor_5  ...  sensor_12  \
0    518.67    642.36   1583.23   1396.84     14.62  ...     522.31   
1    518.67    642.50   1584.69   1396.89     14.62  ...     522.42   
2    518.67    642.18   1582.35   1405.61     14.62  ...     522.03   
3    518.67    642.92   1585.61   1392.27     14.62  ...     522.49   
4    518.67    641.68   1588.63   1397.65     14.62  ...     522.58   

   sensor_13  sensor_14  sensor_15  sensor_16  sensor_17  sensor_18  \
0    2388.01    8145.32     8.4246       0.03

  data3_traindf= pd.read_csv(data3_train, delim_whitespace=True, header=None, names=columns)


In [4]:
data3_traindf['max_cycle'] = data3_traindf.groupby('unit_number')['time_in_cycles'].transform('max')

# Define the function to calculate piecewise linear RUL
def piecewise_linear_rul(row, start=125):
     if row['time_in_cycles'] <= start:
        # Before degradation starts, RUL decreases gently
        return row['max_cycle'] - row['time_in_cycles']
     else:
         return max(0, row['max_cycle'] - row['time_in_cycles'])

# Apply the piecewise linear RUL calculation to the dataset
data3_traindf['piecewise_rul'] = data3_traindf.apply(piecewise_linear_rul, axis=1)
print(data3_traindf[['unit_number', 'time_in_cycles', 'max_cycle', 'piecewise_rul']].head())

   unit_number  time_in_cycles  max_cycle  piecewise_rul
0            1               1        259          258.0
1            1               2        259          257.0
2            1               3        259          256.0
3            1               4        259          255.0
4            1               5        259          254.0


In [5]:
# Apply the rolling window function with a window size of 40
window_size = 40
fourtyrolled_df = roll_time_series(data3_traindf,            # pass the dataframe
    max_timeshift=window_size - 1,   # window size for rolling
    column_id='unit_number',     # unit identifier for engines
    column_sort='time_in_cycles', # time column for sorting
    rolling_direction=1          # direction for rolling
)
fourtyrolled_df.head()

Rolling: 100%|█████████████████████████████████████████████████████████████████████████| 38/38 [00:05<00:00,  6.87it/s]


Unnamed: 0,unit_number,time_in_cycles,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,max_cycle,piecewise_rul,id
0,1,1,-0.0005,0.0004,100.0,518.67,642.36,1583.23,1396.84,14.62,...,8.4246,0.03,391,2388,100.0,39.11,23.3537,259,258.0,"(1, 1)"
100,1,1,-0.0005,0.0004,100.0,518.67,642.36,1583.23,1396.84,14.62,...,8.4246,0.03,391,2388,100.0,39.11,23.3537,259,258.0,"(1, 2)"
101,1,2,0.0008,-0.0003,100.0,518.67,642.5,1584.69,1396.89,14.62,...,8.4403,0.03,392,2388,100.0,38.99,23.4491,259,257.0,"(1, 2)"
300,1,1,-0.0005,0.0004,100.0,518.67,642.36,1583.23,1396.84,14.62,...,8.4246,0.03,391,2388,100.0,39.11,23.3537,259,258.0,"(1, 3)"
301,1,2,0.0008,-0.0003,100.0,518.67,642.5,1584.69,1396.89,14.62,...,8.4403,0.03,392,2388,100.0,38.99,23.4491,259,257.0,"(1, 3)"


In [6]:
# Apply the rolling window function with a window size of 50
window_size = 50
fiftyrolled_df = roll_time_series(data3_traindf,            # pass the dataframe
    max_timeshift=window_size - 1,   # window size for rolling
    column_id='unit_number',     # unit identifier for engines
    column_sort='time_in_cycles', # time column for sorting
    rolling_direction=1          # direction for rolling
)
fiftyrolled_df.head()

Rolling: 100%|█████████████████████████████████████████████████████████████████████████| 38/38 [00:05<00:00,  6.36it/s]


Unnamed: 0,unit_number,time_in_cycles,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,max_cycle,piecewise_rul,id
0,1,1,-0.0005,0.0004,100.0,518.67,642.36,1583.23,1396.84,14.62,...,8.4246,0.03,391,2388,100.0,39.11,23.3537,259,258.0,"(1, 1)"
100,1,1,-0.0005,0.0004,100.0,518.67,642.36,1583.23,1396.84,14.62,...,8.4246,0.03,391,2388,100.0,39.11,23.3537,259,258.0,"(1, 2)"
101,1,2,0.0008,-0.0003,100.0,518.67,642.5,1584.69,1396.89,14.62,...,8.4403,0.03,392,2388,100.0,38.99,23.4491,259,257.0,"(1, 2)"
300,1,1,-0.0005,0.0004,100.0,518.67,642.36,1583.23,1396.84,14.62,...,8.4246,0.03,391,2388,100.0,39.11,23.3537,259,258.0,"(1, 3)"
301,1,2,0.0008,-0.0003,100.0,518.67,642.5,1584.69,1396.89,14.62,...,8.4403,0.03,392,2388,100.0,38.99,23.4491,259,257.0,"(1, 3)"


In [7]:
# Apply the rolling window function with a window size of 60
window_size = 60
sixtyrolled_df = roll_time_series(data3_traindf,            # pass the dataframe
    max_timeshift=window_size - 1,   # window size for rolling
    column_id='unit_number',     # unit identifier for engines
    column_sort='time_in_cycles', # time column for sorting
    rolling_direction=1          # direction for rolling
)
sixtyrolled_df.head()

Rolling: 100%|█████████████████████████████████████████████████████████████████████████| 38/38 [00:05<00:00,  6.49it/s]


Unnamed: 0,unit_number,time_in_cycles,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,max_cycle,piecewise_rul,id
0,1,1,-0.0005,0.0004,100.0,518.67,642.36,1583.23,1396.84,14.62,...,8.4246,0.03,391,2388,100.0,39.11,23.3537,259,258.0,"(1, 1)"
100,1,1,-0.0005,0.0004,100.0,518.67,642.36,1583.23,1396.84,14.62,...,8.4246,0.03,391,2388,100.0,39.11,23.3537,259,258.0,"(1, 2)"
101,1,2,0.0008,-0.0003,100.0,518.67,642.5,1584.69,1396.89,14.62,...,8.4403,0.03,392,2388,100.0,38.99,23.4491,259,257.0,"(1, 2)"
300,1,1,-0.0005,0.0004,100.0,518.67,642.36,1583.23,1396.84,14.62,...,8.4246,0.03,391,2388,100.0,39.11,23.3537,259,258.0,"(1, 3)"
301,1,2,0.0008,-0.0003,100.0,518.67,642.5,1584.69,1396.89,14.62,...,8.4403,0.03,392,2388,100.0,38.99,23.4491,259,257.0,"(1, 3)"


In [8]:
#let's try with window size 40
selected_columns = ['unit_number', 'time_in_cycles', 'id', 'sensor_2', 'sensor_3', 'sensor_4', 
                    'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 
                    'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 
                    'sensor_17', 'sensor_20', 'sensor_21']
print(fourtyrolled_df)

        unit_number  time_in_cycles  op_setting_1  op_setting_2  op_setting_3  \
0                 1               1       -0.0005        0.0004         100.0   
100               1               1       -0.0005        0.0004         100.0   
101               1               2        0.0008       -0.0003         100.0   
300               1               1       -0.0005        0.0004         100.0   
301               1               2        0.0008       -0.0003         100.0   
...             ...             ...           ...           ...           ...   
680275          100             148       -0.0016       -0.0003         100.0   
680276          100             149        0.0034       -0.0003         100.0   
680277          100             150       -0.0016        0.0004         100.0   
680278          100             151       -0.0023        0.0004         100.0   
680279          100             152        0.0000        0.0003         100.0   

        sensor_1  sensor_2 

In [9]:
#let's try with 50
selected_columns = ['unit_number', 'time_in_cycles', 'id', 'sensor_2', 'sensor_3', 'sensor_4', 
                    'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 
                    'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 
                    'sensor_17', 'sensor_20', 'sensor_21']
print(fiftyrolled_df)

        unit_number  time_in_cycles  op_setting_1  op_setting_2  op_setting_3  \
0                 1               1       -0.0005        0.0004         100.0   
100               1               1       -0.0005        0.0004         100.0   
101               1               2        0.0008       -0.0003         100.0   
300               1               1       -0.0005        0.0004         100.0   
301               1               2        0.0008       -0.0003         100.0   
...             ...             ...           ...           ...           ...   
825345          100             148       -0.0016       -0.0003         100.0   
825346          100             149        0.0034       -0.0003         100.0   
825347          100             150       -0.0016        0.0004         100.0   
825348          100             151       -0.0023        0.0004         100.0   
825349          100             152        0.0000        0.0003         100.0   

        sensor_1  sensor_2 

In [11]:
#let's try with 60
selected_columns = ['unit_number', 'time_in_cycles', 'id', 'sensor_2', 'sensor_3', 'sensor_4', 
                    'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 
                    'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 
                    'sensor_17', 'sensor_20', 'sensor_21']
print(sixtyrolled_df)

        unit_number  time_in_cycles  op_setting_1  op_setting_2  op_setting_3  \
0                 1               1       -0.0005        0.0004         100.0   
100               1               1       -0.0005        0.0004         100.0   
101               1               2        0.0008       -0.0003         100.0   
300               1               1       -0.0005        0.0004         100.0   
301               1               2        0.0008       -0.0003         100.0   
...             ...             ...           ...           ...           ...   
889915          100             148       -0.0016       -0.0003         100.0   
889916          100             149        0.0034       -0.0003         100.0   
889917          100             150       -0.0016        0.0004         100.0   
889918          100             151       -0.0023        0.0004         100.0   
889919          100             152        0.0000        0.0003         100.0   

        sensor_1  sensor_2 