In [12]:
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PolynomialFeatures
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np
from tsfresh import extract_features
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.utilities.dataframe_functions import roll_time_series
from tsfresh.feature_selection.relevance import calculate_relevance_table

In [13]:
data4_train=r"C:\Users\65962\Desktop\JUPYTER\CMAPSSData\train_FD004.txt"

columns = [
    'unit_number', 'time_in_cycles', 'op_setting_1', 'op_setting_2', 'op_setting_3',
] + [f'sensor_{i}' for i in range(1, 22)]  # 26 columns in total

data4_traindf= pd.read_csv(data4_train, delim_whitespace=True, header=None, names=columns)

# Display the first few rows of the dataset
print(data4_traindf.head())

  data4_traindf= pd.read_csv(data4_train, delim_whitespace=True, header=None, names=columns)


   unit_number  time_in_cycles  op_setting_1  op_setting_2  op_setting_3  \
0            1               1       42.0049        0.8400         100.0   
1            1               2       20.0020        0.7002         100.0   
2            1               3       42.0038        0.8409         100.0   
3            1               4       42.0000        0.8400         100.0   
4            1               5       25.0063        0.6207          60.0   

   sensor_1  sensor_2  sensor_3  sensor_4  sensor_5  ...  sensor_12  \
0    445.00    549.68   1343.43   1112.93      3.91  ...     129.78   
1    491.19    606.07   1477.61   1237.50      9.35  ...     312.59   
2    445.00    548.95   1343.12   1117.05      3.91  ...     129.62   
3    445.00    548.70   1341.24   1118.03      3.91  ...     129.80   
4    462.54    536.10   1255.23   1033.59      7.05  ...     164.11   

   sensor_13  sensor_14  sensor_15  sensor_16  sensor_17  sensor_18  \
0    2387.99    8074.83     9.3335       0.02

In [14]:
data4_traindf['max_cycle'] = data4_traindf.groupby('unit_number')['time_in_cycles'].transform('max')

# Define the function to calculate piecewise linear RUL
def piecewise_linear_rul(row, start=125):
     if row['time_in_cycles'] <= start:
        # Before degradation starts, RUL decreases gently
        return row['max_cycle'] - row['time_in_cycles']
     else:
         return max(0, row['max_cycle'] - row['time_in_cycles'])

# Apply the piecewise linear RUL calculation to the dataset
data4_traindf['piecewise_rul'] = data4_traindf.apply(piecewise_linear_rul, axis=1)
print(data4_traindf[['unit_number', 'time_in_cycles', 'max_cycle', 'piecewise_rul']].head())

   unit_number  time_in_cycles  max_cycle  piecewise_rul
0            1               1        321          320.0
1            1               2        321          319.0
2            1               3        321          318.0
3            1               4        321          317.0
4            1               5        321          316.0


In [15]:
# Apply the rolling window function with a window size of 40
window_size = 40
fourtyrolled_df = roll_time_series(data4_traindf,            # pass the dataframe
    max_timeshift=window_size - 1,   # window size for rolling
    column_id='unit_number',     # unit identifier for engines
    column_sort='time_in_cycles', # time column for sorting
    rolling_direction=1          # direction for rolling
)
fourtyrolled_df.head()

Rolling: 100%|█████████████████████████████████████████████████████████████████████████| 39/39 [00:11<00:00,  3.32it/s]


Unnamed: 0,unit_number,time_in_cycles,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,max_cycle,piecewise_rul,id
0,1,1,42.0049,0.84,100.0,445.0,549.68,1343.43,1112.93,3.91,...,9.3335,0.02,330,2212,100.0,10.62,6.367,321,320.0,"(1, 1)"
249,1,1,42.0049,0.84,100.0,445.0,549.68,1343.43,1112.93,3.91,...,9.3335,0.02,330,2212,100.0,10.62,6.367,321,320.0,"(1, 2)"
250,1,2,20.002,0.7002,100.0,491.19,606.07,1477.61,1237.5,9.35,...,9.1913,0.02,361,2324,100.0,24.37,14.6552,321,319.0,"(1, 2)"
747,1,1,42.0049,0.84,100.0,445.0,549.68,1343.43,1112.93,3.91,...,9.3335,0.02,330,2212,100.0,10.62,6.367,321,320.0,"(1, 3)"
748,1,2,20.002,0.7002,100.0,491.19,606.07,1477.61,1237.5,9.35,...,9.1913,0.02,361,2324,100.0,24.37,14.6552,321,319.0,"(1, 3)"


In [16]:
# Apply the rolling window function with a window size of 50
window_size = 50
fiftyrolled_df = roll_time_series(data4_traindf,            # pass the dataframe
    max_timeshift=window_size - 1,   # window size for rolling
    column_id='unit_number',     # unit identifier for engines
    column_sort='time_in_cycles', # time column for sorting
    rolling_direction=1          # direction for rolling
)
fiftyrolled_df.head()

Rolling: 100%|█████████████████████████████████████████████████████████████████████████| 39/39 [00:11<00:00,  3.28it/s]


Unnamed: 0,unit_number,time_in_cycles,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,max_cycle,piecewise_rul,id
0,1,1,42.0049,0.84,100.0,445.0,549.68,1343.43,1112.93,3.91,...,9.3335,0.02,330,2212,100.0,10.62,6.367,321,320.0,"(1, 1)"
249,1,1,42.0049,0.84,100.0,445.0,549.68,1343.43,1112.93,3.91,...,9.3335,0.02,330,2212,100.0,10.62,6.367,321,320.0,"(1, 2)"
250,1,2,20.002,0.7002,100.0,491.19,606.07,1477.61,1237.5,9.35,...,9.1913,0.02,361,2324,100.0,24.37,14.6552,321,319.0,"(1, 2)"
747,1,1,42.0049,0.84,100.0,445.0,549.68,1343.43,1112.93,3.91,...,9.3335,0.02,330,2212,100.0,10.62,6.367,321,320.0,"(1, 3)"
748,1,2,20.002,0.7002,100.0,491.19,606.07,1477.61,1237.5,9.35,...,9.1913,0.02,361,2324,100.0,24.37,14.6552,321,319.0,"(1, 3)"


In [17]:
# Apply the rolling window function with a window size of 60
window_size = 60
sixtyrolled_df = roll_time_series(data4_traindf,            # pass the dataframe
    max_timeshift=window_size - 1,   # window size for rolling
    column_id='unit_number',     # unit identifier for engines
    column_sort='time_in_cycles', # time column for sorting
    rolling_direction=1          # direction for rolling
)
sixtyrolled_df.head()

Rolling: 100%|█████████████████████████████████████████████████████████████████████████| 39/39 [00:12<00:00,  3.20it/s]


Unnamed: 0,unit_number,time_in_cycles,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,max_cycle,piecewise_rul,id
0,1,1,42.0049,0.84,100.0,445.0,549.68,1343.43,1112.93,3.91,...,9.3335,0.02,330,2212,100.0,10.62,6.367,321,320.0,"(1, 1)"
249,1,1,42.0049,0.84,100.0,445.0,549.68,1343.43,1112.93,3.91,...,9.3335,0.02,330,2212,100.0,10.62,6.367,321,320.0,"(1, 2)"
250,1,2,20.002,0.7002,100.0,491.19,606.07,1477.61,1237.5,9.35,...,9.1913,0.02,361,2324,100.0,24.37,14.6552,321,319.0,"(1, 2)"
747,1,1,42.0049,0.84,100.0,445.0,549.68,1343.43,1112.93,3.91,...,9.3335,0.02,330,2212,100.0,10.62,6.367,321,320.0,"(1, 3)"
748,1,2,20.002,0.7002,100.0,491.19,606.07,1477.61,1237.5,9.35,...,9.1913,0.02,361,2324,100.0,24.37,14.6552,321,319.0,"(1, 3)"


In [18]:
#let's try with window size 40
selected_columns = ['unit_number', 'time_in_cycles', 'id', 'sensor_2', 'sensor_3', 'sensor_4', 
                    'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 
                    'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 
                    'sensor_17', 'sensor_20', 'sensor_21']
print(fourtyrolled_df)

         unit_number  time_in_cycles  op_setting_1  op_setting_2  \
0                  1               1       42.0049        0.8400   
249                1               1       42.0049        0.8400   
250                1               2       20.0020        0.7002   
747                1               1       42.0049        0.8400   
748                1               2       20.0020        0.7002   
...              ...             ...           ...           ...   
1890495          249             251        9.9998        0.2500   
1890496          249             252        0.0028        0.0015   
1890497          249             253        0.0029        0.0000   
1890498          249             254       35.0046        0.8400   
1890499          249             255       42.0030        0.8400   

         op_setting_3  sensor_1  sensor_2  sensor_3  sensor_4  sensor_5  ...  \
0               100.0    445.00    549.68   1343.43   1112.93      3.91  ...   
249             100.0  

In [19]:
#let's try with 50
selected_columns = ['unit_number', 'time_in_cycles', 'id', 'sensor_2', 'sensor_3', 'sensor_4', 
                    'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 
                    'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 
                    'sensor_17', 'sensor_20', 'sensor_21']
print(fiftyrolled_df)

         unit_number  time_in_cycles  op_setting_1  op_setting_2  \
0                  1               1       42.0049        0.8400   
249                1               1       42.0049        0.8400   
250                1               2       20.0020        0.7002   
747                1               1       42.0049        0.8400   
748                1               2       20.0020        0.7002   
...              ...             ...           ...           ...   
2630420          249             251        9.9998        0.2500   
2630421          249             252        0.0028        0.0015   
2630422          249             253        0.0029        0.0000   
2630423          249             254       35.0046        0.8400   
2630424          249             255       42.0030        0.8400   

         op_setting_3  sensor_1  sensor_2  sensor_3  sensor_4  sensor_5  ...  \
0               100.0    445.00    549.68   1343.43   1112.93      3.91  ...   
249             100.0  

In [20]:
#let's try with 60
selected_columns = ['unit_number', 'time_in_cycles', 'id', 'sensor_2', 'sensor_3', 'sensor_4', 
                    'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 
                    'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 
                    'sensor_17', 'sensor_20', 'sensor_21']
print(sixtyrolled_df)

         unit_number  time_in_cycles  op_setting_1  op_setting_2  \
0                  1               1       42.0049        0.8400   
249                1               1       42.0049        0.8400   
250                1               2       20.0020        0.7002   
747                1               1       42.0049        0.8400   
748                1               2       20.0020        0.7002   
...              ...             ...           ...           ...   
2958085          249             251        9.9998        0.2500   
2958086          249             252        0.0028        0.0015   
2958087          249             253        0.0029        0.0000   
2958088          249             254       35.0046        0.8400   
2958089          249             255       42.0030        0.8400   

         op_setting_3  sensor_1  sensor_2  sensor_3  sensor_4  sensor_5  ...  \
0               100.0    445.00    549.68   1343.43   1112.93      3.91  ...   
249             100.0  