In [7]:
import pandas as pd
import os
from sklearn.preprocessing import StandardScaler, MinMaxScaler, PolynomialFeatures
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import numpy as np
from tsfresh import extract_features
from tsfresh import select_features
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.utilities.dataframe_functions import roll_time_series
from tsfresh.feature_selection.relevance import calculate_relevance_table

In [24]:
data1_train=r"C:\Users\65962\Desktop\JUPYTER\CMAPSSData\train_FD001.txt"

columns = [
    'unit_number', 'time_in_cycles', 'op_setting_1', 'op_setting_2', 'op_setting_3',
] + [f'sensor_{i}' for i in range(1, 22)]  # 26 columns in total

data1_traindf= pd.read_csv(data1_train, delim_whitespace=True, header=None, names=columns)

# Display the first few rows of the dataset
print(data1_traindf.head())

   unit_number  time_in_cycles  op_setting_1  op_setting_2  op_setting_3  \
0            1               1       -0.0007       -0.0004         100.0   
1            1               2        0.0019       -0.0003         100.0   
2            1               3       -0.0043        0.0003         100.0   
3            1               4        0.0007        0.0000         100.0   
4            1               5       -0.0019       -0.0002         100.0   

   sensor_1  sensor_2  sensor_3  sensor_4  sensor_5  ...  sensor_12  \
0    518.67    641.82   1589.70   1400.60     14.62  ...     521.66   
1    518.67    642.15   1591.82   1403.14     14.62  ...     522.28   
2    518.67    642.35   1587.99   1404.20     14.62  ...     522.42   
3    518.67    642.35   1582.79   1401.87     14.62  ...     522.86   
4    518.67    642.37   1582.85   1406.22     14.62  ...     522.19   

   sensor_13  sensor_14  sensor_15  sensor_16  sensor_17  sensor_18  \
0    2388.02    8138.62     8.4195       0.03

  data1_traindf= pd.read_csv(data1_train, delim_whitespace=True, header=None, names=columns)


In [25]:
data1_traindf['max_cycle'] = data1_traindf.groupby('unit_number')['time_in_cycles'].transform('max')

# Define the function to calculate piecewise linear RUL
def piecewise_linear_rul(row, start=125):
     if row['time_in_cycles'] <= start:
        # Before degradation starts, RUL decreases gently
        return row['max_cycle'] - row['time_in_cycles']
     else:
         return max(0, row['max_cycle'] - row['time_in_cycles'])

# Apply the piecewise linear RUL calculation to the dataset
data1_traindf['piecewise_rul'] = data1_traindf.apply(piecewise_linear_rul, axis=1)
print(data1_traindf[['unit_number', 'time_in_cycles', 'max_cycle', 'piecewise_rul']].head())

   unit_number  time_in_cycles  max_cycle  piecewise_rul
0            1               1        192          191.0
1            1               2        192          190.0
2            1               3        192          189.0
3            1               4        192          188.0
4            1               5        192          187.0


In [27]:
# Apply the rolling window function with a window size of 40
window_size = 40
onefourtyrolled_df = roll_time_series(data1_traindf,            # pass the dataframe
    max_timeshift=window_size - 1,   # window size for rolling
    column_id='unit_number',     # unit identifier for engines
    column_sort='time_in_cycles', # time column for sorting
    rolling_direction=1          # direction for rolling
)
onefourtyrolled_df.head()

Rolling: 100%|█████████████████████████████████████████████████████████████████████████| 37/37 [00:05<00:00,  7.17it/s]


Unnamed: 0,unit_number,time_in_cycles,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,max_cycle,piecewise_rul,id
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,8.4195,0.03,392,2388,100.0,39.06,23.419,192,191.0,"(1, 1)"
100,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,8.4195,0.03,392,2388,100.0,39.06,23.419,192,191.0,"(1, 2)"
101,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,8.4318,0.03,392,2388,100.0,39.0,23.4236,192,190.0,"(1, 2)"
300,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,8.4195,0.03,392,2388,100.0,39.06,23.419,192,191.0,"(1, 3)"
301,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,8.4318,0.03,392,2388,100.0,39.0,23.4236,192,190.0,"(1, 3)"


In [29]:
# Apply the rolling window function with a window size of 50
window_size = 50
onefiftyrolled_df = roll_time_series(data1_traindf,            # pass the dataframe
    max_timeshift=window_size - 1,   # window size for rolling
    column_id='unit_number',     # unit identifier for engines
    column_sort='time_in_cycles', # time column for sorting
    rolling_direction=1          # direction for rolling
)
onefiftyrolled_df.head()

Rolling: 100%|█████████████████████████████████████████████████████████████████████████| 37/37 [00:05<00:00,  6.86it/s]


Unnamed: 0,unit_number,time_in_cycles,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,max_cycle,piecewise_rul,id
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,8.4195,0.03,392,2388,100.0,39.06,23.419,192,191.0,"(1, 1)"
100,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,8.4195,0.03,392,2388,100.0,39.06,23.419,192,191.0,"(1, 2)"
101,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,8.4318,0.03,392,2388,100.0,39.0,23.4236,192,190.0,"(1, 2)"
300,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,8.4195,0.03,392,2388,100.0,39.06,23.419,192,191.0,"(1, 3)"
301,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,8.4318,0.03,392,2388,100.0,39.0,23.4236,192,190.0,"(1, 3)"


In [30]:
# Apply the rolling window function with a window size of 60
window_size = 60
onesixtyrolled_df = roll_time_series(data1_traindf,            # pass the dataframe
    max_timeshift=window_size - 1,   # window size for rolling
    column_id='unit_number',     # unit identifier for engines
    column_sort='time_in_cycles', # time column for sorting
    rolling_direction=1          # direction for rolling
)
onesixtyrolled_df.head()

Rolling: 100%|█████████████████████████████████████████████████████████████████████████| 37/37 [00:05<00:00,  6.92it/s]


Unnamed: 0,unit_number,time_in_cycles,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,max_cycle,piecewise_rul,id
0,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,8.4195,0.03,392,2388,100.0,39.06,23.419,192,191.0,"(1, 1)"
100,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,8.4195,0.03,392,2388,100.0,39.06,23.419,192,191.0,"(1, 2)"
101,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,8.4318,0.03,392,2388,100.0,39.0,23.4236,192,190.0,"(1, 2)"
300,1,1,-0.0007,-0.0004,100.0,518.67,641.82,1589.7,1400.6,14.62,...,8.4195,0.03,392,2388,100.0,39.06,23.419,192,191.0,"(1, 3)"
301,1,2,0.0019,-0.0003,100.0,518.67,642.15,1591.82,1403.14,14.62,...,8.4318,0.03,392,2388,100.0,39.0,23.4236,192,190.0,"(1, 3)"


In [31]:
#let's try with window size 40
selected_columns = ['unit_number', 'time_in_cycles', 'id', 'sensor_2', 'sensor_3', 'sensor_4', 
                    'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 
                    'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 
                    'sensor_17', 'sensor_20', 'sensor_21']
print(onefourtyrolled_df)

        unit_number  time_in_cycles  op_setting_1  op_setting_2  op_setting_3  \
0                 1               1       -0.0007       -0.0004         100.0   
100               1               1       -0.0007       -0.0004         100.0   
101               1               2        0.0019       -0.0003         100.0   
300               1               1       -0.0007       -0.0004         100.0   
301               1               2        0.0019       -0.0003         100.0   
...             ...             ...           ...           ...           ...   
624835          100             196       -0.0004       -0.0003         100.0   
624836          100             197       -0.0016       -0.0005         100.0   
624837          100             198        0.0004        0.0000         100.0   
624838          100             199       -0.0011        0.0003         100.0   
624839          100             200       -0.0032       -0.0005         100.0   

        sensor_1  sensor_2 

In [32]:
#let's try with 50
selected_columns = ['unit_number', 'time_in_cycles', 'id', 'sensor_2', 'sensor_3', 'sensor_4', 
                    'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 
                    'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 
                    'sensor_17', 'sensor_20', 'sensor_21']
print(onefiftyrolled_df)

        unit_number  time_in_cycles  op_setting_1  op_setting_2  op_setting_3  \
0                 1               1       -0.0007       -0.0004         100.0   
100               1               1       -0.0007       -0.0004         100.0   
101               1               2        0.0019       -0.0003         100.0   
300               1               1       -0.0007       -0.0004         100.0   
301               1               2        0.0019       -0.0003         100.0   
...             ...             ...           ...           ...           ...   
756045          100             196       -0.0004       -0.0003         100.0   
756046          100             197       -0.0016       -0.0005         100.0   
756047          100             198        0.0004        0.0000         100.0   
756048          100             199       -0.0011        0.0003         100.0   
756049          100             200       -0.0032       -0.0005         100.0   

        sensor_1  sensor_2 

In [33]:
#let's try with 60
selected_columns = ['unit_number', 'time_in_cycles', 'id', 'sensor_2', 'sensor_3', 'sensor_4', 
                    'sensor_7', 'sensor_8', 'sensor_9', 'sensor_11', 
                    'sensor_12', 'sensor_13', 'sensor_14', 'sensor_15', 
                    'sensor_17', 'sensor_20', 'sensor_21']
print(onesixtyrolled_df)

        unit_number  time_in_cycles  op_setting_1  op_setting_2  op_setting_3  \
0                 1               1       -0.0007       -0.0004         100.0   
100               1               1       -0.0007       -0.0004         100.0   
101               1               2        0.0019       -0.0003         100.0   
300               1               1       -0.0007       -0.0004         100.0   
301               1               2        0.0019       -0.0003         100.0   
...             ...             ...           ...           ...           ...   
824455          100             196       -0.0004       -0.0003         100.0   
824456          100             197       -0.0016       -0.0005         100.0   
824457          100             198        0.0004        0.0000         100.0   
824458          100             199       -0.0011        0.0003         100.0   
824459          100             200       -0.0032       -0.0005         100.0   

        sensor_1  sensor_2 