In [7]:
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PolynomialFeatures
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np
from tsfresh import extract_features
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.utilities.dataframe_functions import roll_time_series
from tsfresh.feature_selection.relevance import calculate_relevance_table

In [8]:
data2_train=r"C:\Users\65962\Desktop\JUPYTER\CMAPSSData\train_FD002.txt"

columns = [
    'unit_number', 'time_in_cycles', 'op_setting_1', 'op_setting_2', 'op_setting_3',
] + [f'sensor_{i}' for i in range(1, 22)]  # 26 columns in total

data2_traindf= pd.read_csv(data2_train, delim_whitespace=True, header=None, names=columns)

# Display the first few rows of the dataset
print(data2_traindf.head())

  data2_traindf= pd.read_csv(data2_train, delim_whitespace=True, header=None, names=columns)


   unit_number  time_in_cycles  op_setting_1  op_setting_2  op_setting_3  \
0            1               1       34.9983        0.8400         100.0   
1            1               2       41.9982        0.8408         100.0   
2            1               3       24.9988        0.6218          60.0   
3            1               4       42.0077        0.8416         100.0   
4            1               5       25.0005        0.6203          60.0   

   sensor_1  sensor_2  sensor_3  sensor_4  sensor_5  ...  sensor_12  \
0    449.44    555.32   1358.61   1137.23      5.48  ...     183.06   
1    445.00    549.90   1353.22   1125.78      3.91  ...     130.42   
2    462.54    537.31   1256.76   1047.45      7.05  ...     164.22   
3    445.00    549.51   1354.03   1126.38      3.91  ...     130.72   
4    462.54    537.07   1257.71   1047.93      7.05  ...     164.31   

   sensor_13  sensor_14  sensor_15  sensor_16  sensor_17  sensor_18  \
0    2387.72    8048.56     9.3461       0.02

In [9]:
data2_traindf['max_cycle'] = data2_traindf.groupby('unit_number')['time_in_cycles'].transform('max')

# Define the function to calculate piecewise linear RUL
def piecewise_linear_rul(row, start=125):
     if row['time_in_cycles'] <= start:
        # Before degradation starts, RUL decreases gently
        return row['max_cycle'] - row['time_in_cycles']
     else:
         return max(0, row['max_cycle'] - row['time_in_cycles'])

# Apply the piecewise linear RUL calculation to the dataset
data2_traindf['piecewise_rul'] = data2_traindf.apply(piecewise_linear_rul, axis=1)
print(data2_traindf[['unit_number', 'time_in_cycles', 'max_cycle', 'piecewise_rul']].head())

   unit_number  time_in_cycles  max_cycle  piecewise_rul
0            1               1        149          148.0
1            1               2        149          147.0
2            1               3        149          146.0
3            1               4        149          145.0
4            1               5        149          144.0


In [17]:
# Apply the rolling window function with a window size of 40
window_size = 40
fourtyrolled_df = roll_time_series(data2_traindf,            # pass the dataframe
    max_timeshift=window_size - 1,   # window size for rolling
    column_id='unit_number',     # unit identifier for engines
    column_sort='time_in_cycles', # time column for sorting
    rolling_direction=1          # direction for rolling
)
fourtyrolled_df.head()

Rolling: 100%|█████████████████████████████████████████████████████████████████████████| 38/38 [00:08<00:00,  4.30it/s]


Unnamed: 0,unit_number,time_in_cycles,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,max_cycle,piecewise_rul,id
0,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,9.3461,0.02,334,2223,100.0,14.73,8.8071,149,148.0,"(1, 1)"
260,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,9.3461,0.02,334,2223,100.0,14.73,8.8071,149,148.0,"(1, 2)"
261,1,2,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,3.91,...,9.3774,0.02,330,2212,100.0,10.41,6.2665,149,147.0,"(1, 2)"
780,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,9.3461,0.02,334,2223,100.0,14.73,8.8071,149,148.0,"(1, 3)"
781,1,2,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,3.91,...,9.3774,0.02,330,2212,100.0,10.41,6.2665,149,147.0,"(1, 3)"


In [18]:
# Apply the rolling window function with a window size of 50
window_size = 50
fiftyrolled_df = roll_time_series(data2_traindf,            # pass the dataframe
    max_timeshift=window_size - 1,   # window size for rolling
    column_id='unit_number',     # unit identifier for engines
    column_sort='time_in_cycles', # time column for sorting
    rolling_direction=1          # direction for rolling
)
fiftyrolled_df.head()

Rolling: 100%|█████████████████████████████████████████████████████████████████████████| 38/38 [00:09<00:00,  4.21it/s]


Unnamed: 0,unit_number,time_in_cycles,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,max_cycle,piecewise_rul,id
0,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,9.3461,0.02,334,2223,100.0,14.73,8.8071,149,148.0,"(1, 1)"
260,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,9.3461,0.02,334,2223,100.0,14.73,8.8071,149,148.0,"(1, 2)"
261,1,2,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,3.91,...,9.3774,0.02,330,2212,100.0,10.41,6.2665,149,147.0,"(1, 2)"
780,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,9.3461,0.02,334,2223,100.0,14.73,8.8071,149,148.0,"(1, 3)"
781,1,2,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,3.91,...,9.3774,0.02,330,2212,100.0,10.41,6.2665,149,147.0,"(1, 3)"


In [19]:
# Apply the rolling window function with a window size of 60
window_size = 60
sixtyrolled_df = roll_time_series(data2_traindf,            # pass the dataframe
    max_timeshift=window_size - 1,   # window size for rolling
    column_id='unit_number',     # unit identifier for engines
    column_sort='time_in_cycles', # time column for sorting
    rolling_direction=1          # direction for rolling
)
sixtyrolled_df.head()

Rolling: 100%|█████████████████████████████████████████████████████████████████████████| 38/38 [00:09<00:00,  4.07it/s]


Unnamed: 0,unit_number,time_in_cycles,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,max_cycle,piecewise_rul,id
0,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,9.3461,0.02,334,2223,100.0,14.73,8.8071,149,148.0,"(1, 1)"
260,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,9.3461,0.02,334,2223,100.0,14.73,8.8071,149,148.0,"(1, 2)"
261,1,2,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,3.91,...,9.3774,0.02,330,2212,100.0,10.41,6.2665,149,147.0,"(1, 2)"
780,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,9.3461,0.02,334,2223,100.0,14.73,8.8071,149,148.0,"(1, 3)"
781,1,2,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,3.91,...,9.3774,0.02,330,2212,100.0,10.41,6.2665,149,147.0,"(1, 3)"


In [21]:
#let's try with window size 40
selected_columns = ['unit_number', 'time_in_cycles', 'id', 'sensor_2', 'sensor_3', 'sensor_4', 
                    'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 
                    'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 
                    'sensor_17', 'sensor_20', 'sensor_21']
print(fourtyrolled_df)

         unit_number  time_in_cycles  op_setting_1  op_setting_2  \
0                  1               1       34.9983        0.8400   
260                1               1       34.9983        0.8400   
261                1               2       41.9982        0.8408   
780                1               1       34.9983        0.8400   
781                1               2       41.9982        0.8408   
...              ...             ...           ...           ...   
1938715          260             312       20.0037        0.7000   
1938716          260             313       10.0022        0.2510   
1938717          260             314       25.0041        0.6200   
1938718          260             315       25.0033        0.6220   
1938719          260             316       35.0036        0.8400   

         op_setting_3  sensor_1  sensor_2  sensor_3  sensor_4  sensor_5  ...  \
0               100.0    449.44    555.32   1358.61   1137.23      5.48  ...   
260             100.0  

In [22]:
#let's try with 50
selected_columns = ['unit_number', 'time_in_cycles', 'id', 'sensor_2', 'sensor_3', 'sensor_4', 
                    'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 
                    'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 
                    'sensor_17', 'sensor_20', 'sensor_21']
print(fiftyrolled_df)

         unit_number  time_in_cycles  op_setting_1  op_setting_2  \
0                  1               1       34.9983        0.8400   
260                1               1       34.9983        0.8400   
261                1               2       41.9982        0.8408   
780                1               1       34.9983        0.8400   
781                1               2       41.9982        0.8408   
...              ...             ...           ...           ...   
2358395          260             312       20.0037        0.7000   
2358396          260             313       10.0022        0.2510   
2358397          260             314       25.0041        0.6200   
2358398          260             315       25.0033        0.6220   
2358399          260             316       35.0036        0.8400   

         op_setting_3  sensor_1  sensor_2  sensor_3  sensor_4  sensor_5  ...  \
0               100.0    449.44    555.32   1358.61   1137.23      5.48  ...   
260             100.0  

In [23]:
#let's try with 60
selected_columns = ['unit_number', 'time_in_cycles', 'id', 'sensor_2', 'sensor_3', 'sensor_4', 
                    'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 
                    'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 
                    'sensor_17', 'sensor_20', 'sensor_21']
print(sixtyrolled_df)

         unit_number  time_in_cycles  op_setting_1  op_setting_2  \
0                  1               1       34.9983        0.8400   
260                1               1       34.9983        0.8400   
261                1               2       41.9982        0.8408   
780                1               1       34.9983        0.8400   
781                1               2       41.9982        0.8408   
...              ...             ...           ...           ...   
2752075          260             312       20.0037        0.7000   
2752076          260             313       10.0022        0.2510   
2752077          260             314       25.0041        0.6200   
2752078          260             315       25.0033        0.6220   
2752079          260             316       35.0036        0.8400   

         op_setting_3  sensor_1  sensor_2  sensor_3  sensor_4  sensor_5  ...  \
0               100.0    449.44    555.32   1358.61   1137.23      5.48  ...   
260             100.0  