## Original Datasetの処理
- Original Dataset: https://data.nasa.gov/Aerospace/CMAPSS-Jet-Engine-Simulated-Data/ff5v-kuh6/about_data

In [1]:
import pandas as pd

In [3]:
df = pd.read_csv('../data_original/train_FD002.txt', header=None, sep=' ')
print(df.shape)
df.head()

(53759, 28)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,18,19,20,21,22,23,24,25,26,27
0,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,8048.56,9.3461,0.02,334,2223,100.0,14.73,8.8071,,
1,1,2,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,3.91,...,8072.3,9.3774,0.02,330,2212,100.0,10.41,6.2665,,
2,1,3,24.9988,0.6218,60.0,462.54,537.31,1256.76,1047.45,7.05,...,7864.87,10.8941,0.02,309,1915,84.93,14.08,8.6723,,
3,1,4,42.0077,0.8416,100.0,445.0,549.51,1354.03,1126.38,3.91,...,8068.66,9.3528,0.02,329,2212,100.0,10.59,6.4701,,
4,1,5,25.0005,0.6203,60.0,462.54,537.07,1257.71,1047.93,7.05,...,7861.23,10.8963,0.02,309,1915,84.93,14.13,8.5286,,


In [4]:
df = df.iloc[:, :-2]   # 最後2列が不要
column_names = ['unit_ID','cycles','setting_1','setting_2','setting_3','T2','T24','T30','T50','P2','P15','P30','Nf',
           'Nc','epr','Ps30','phi','NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32' ]
df.columns = column_names
df.head()

Unnamed: 0,unit_ID,cycles,setting_1,setting_2,setting_3,T2,T24,T30,T50,P2,...,phi,NRf,NRc,BPR,farB,htBleed,Nf_dmd,PCNfR_dmd,W31,W32
0,1,1,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,5.48,...,183.06,2387.72,8048.56,9.3461,0.02,334,2223,100.0,14.73,8.8071
1,1,2,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,3.91,...,130.42,2387.66,8072.3,9.3774,0.02,330,2212,100.0,10.41,6.2665
2,1,3,24.9988,0.6218,60.0,462.54,537.31,1256.76,1047.45,7.05,...,164.22,2028.03,7864.87,10.8941,0.02,309,1915,84.93,14.08,8.6723
3,1,4,42.0077,0.8416,100.0,445.0,549.51,1354.03,1126.38,3.91,...,130.72,2387.61,8068.66,9.3528,0.02,329,2212,100.0,10.59,6.4701
4,1,5,25.0005,0.6203,60.0,462.54,537.07,1257.71,1047.93,7.05,...,164.31,2028.0,7861.23,10.8963,0.02,309,1915,84.93,14.13,8.5286


In [5]:
# 'unit_ID', 'cycles'で並べ替えておく
df = df.sort_values(['unit_ID', 'cycles'])

In [7]:
# cyclesの逆数である、time_to_deterioration（交換までの残りCycle）を作成
time_to_deterioration = []
for uid in df['unit_ID'].unique():
    tmp = df[df['unit_ID']==uid]
    time_to_deterioration.extend((tmp.shape[0] - tmp['cycles']).to_list())

len(time_to_deterioration)

53759

In [8]:
df['time_to_deterioration'] = time_to_deterioration

In [11]:
df_ttd = df[['unit_ID', 'cycles', 'time_to_deterioration']]
print(df_ttd.shape)
df_ttd.head()

(53759, 3)


Unnamed: 0,unit_ID,cycles,time_to_deterioration
0,1,1,148
1,1,2,147
2,1,3,146
3,1,4,145
4,1,5,144


In [14]:
df_ttd[df_ttd['unit_ID']==1]

# time_to_deteriorationは、cycles時点における残りの交換までのCycleを示す

Unnamed: 0,unit_ID,cycles,time_to_deterioration
0,1,1,148
1,1,2,147
2,1,3,146
3,1,4,145
4,1,5,144
...,...,...,...
144,1,145,4
145,1,146,3
146,1,147,2
147,1,148,1


In [13]:
df_final = pd.merge(df_ttd, df.drop('time_to_deterioration', axis=1), on=['unit_ID','cycles'], how='left')
print(df_final.shape)
df_final.head()

(53759, 27)


Unnamed: 0,unit_ID,cycles,time_to_deterioration,setting_1,setting_2,setting_3,T2,T24,T30,T50,...,phi,NRf,NRc,BPR,farB,htBleed,Nf_dmd,PCNfR_dmd,W31,W32
0,1,1,148,34.9983,0.84,100.0,449.44,555.32,1358.61,1137.23,...,183.06,2387.72,8048.56,9.3461,0.02,334,2223,100.0,14.73,8.8071
1,1,2,147,41.9982,0.8408,100.0,445.0,549.9,1353.22,1125.78,...,130.42,2387.66,8072.3,9.3774,0.02,330,2212,100.0,10.41,6.2665
2,1,3,146,24.9988,0.6218,60.0,462.54,537.31,1256.76,1047.45,...,164.22,2028.03,7864.87,10.8941,0.02,309,1915,84.93,14.08,8.6723
3,1,4,145,42.0077,0.8416,100.0,445.0,549.51,1354.03,1126.38,...,130.72,2387.61,8068.66,9.3528,0.02,329,2212,100.0,10.59,6.4701
4,1,5,144,25.0005,0.6203,60.0,462.54,537.07,1257.71,1047.93,...,164.31,2028.0,7861.23,10.8963,0.02,309,1915,84.93,14.13,8.5286


In [15]:
df_final.to_csv('../data_modified/iot.csv', index=False)