In [None]:
%load_ext autoreload
%autoreload 2

In [20]:
import pandas as pd # 2.x to support pyarrow
import pyarrow as pa
import openpyxl # for reading xlsx with structure
import numpy as np
import plotly.express as px
import math
import re
from datetime import timedelta, datetime
import os

import tensorflow as tf
from tensorflow import keras

In [None]:
x_structure = pd.read_parquet('../data/pipeline/x_structure.parquet')
y_structure = pd.read_parquet('../data/pipeline/y_structure.parquet')

In [None]:
x_train_pretty_1h = pd.read_parquet('../data/pipeline/x_train_pretty_1h.parquet')
x_test_pretty_1h = pd.read_parquet('../data/pipeline/x_test_pretty_1h.parquet')
y_train_tte_1h = pd.read_parquet('../data/pipeline/y_train_tte_1h.parquet')

In [48]:
left_train = x_train_pretty_1h.set_index(['ИМЯ МАШИНЫ', 'DT']).astype('float64').sort_index().ffill()[x_structure.index]
left_test = x_test_pretty_1h.set_index(['ИМЯ МАШИНЫ', 'DT']).astype('float64').sort_index().ffill()[x_structure.index]
left_test_raw = x_test_pretty_1h.set_index(['ИМЯ МАШИНЫ', 'DT']).astype('float64').sort_index()[x_structure.index]
left_stats = left_train.describe()
left_train = left_train / left_stats.loc['std']
left_test = left_test / left_stats.loc['std']
MAX_TTE = 31 * 24 * 60 * 60

In [21]:
def train_model(target_place_name_and_type):
    model_path = f'../dist/models/{target_place_name_and_type}.h5'
    if os.path.isfile(model_path):
        return

    right = y_train_tte_1h.set_index(['ИМЯ МАШИНЫ', 'DT'])[[target_place_name_and_type]].astype('float64') / MAX_TTE
    data = pd.merge(left_train, right, left_index=True, right_index=True)

    batches = None
    for machine in x_structure.columns:
        seq = data.loc[machine].sort_index().astype('float64').ffill().fillna(0)
        X = seq[x_structure.index]
        Y = seq.drop(x_structure.index, axis=1)
        machine_examples = keras.utils.timeseries_dataset_from_array(X, Y, sequence_length=24 * 7, sequence_stride=24, seed=1337)
        if batches is None:
            batches = machine_examples
        else:
            batches = batches.concatenate(machine_examples)

    model = keras.Sequential([
        keras.Input((24*7, len(x_structure.index))),
        keras.layers.Dense(24*3, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid'),
    ])

    model.compile(loss='mse', optimizer='adam')
    # model.summary()

    print(str(datetime.now()) + ' -- ' + target_place_name_and_type)
    history = model.fit(batches, epochs=1000, verbose=0)
    # px.line(history.history).show()

    model.save(model_path, save_format='h5')
    return model

for place in y_structure.index:
    for type in ['TTE M1', 'TTE M3']:
        train_model(place + ' ' + type)

2023-05-28 04:52:17.094053 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.3 TTE M3
2023-05-28 04:58:43.800252 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.4 TTE M1
2023-05-28 05:05:51.258724 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.4 TTE M3
2023-05-28 05:12:47.044575 -- ГАЗОВАЯ ЗАДВИЖКА TTE M1
2023-05-28 05:19:57.852832 -- ГАЗОВАЯ ЗАДВИЖКА TTE M3
2023-05-28 05:26:33.614446 -- ГСМ TTE M1
2023-05-28 05:33:24.591779 -- ГСМ TTE M3
2023-05-28 05:40:08.886552 -- ДВИГАТЕЛЬ ПУСКОВОГО МАСЛОНАСОСА TTE M1
2023-05-28 05:47:20.789494 -- ДВИГАТЕЛЬ ПУСКОВОГО МАСЛОНАСОСА TTE M3
2023-05-28 05:53:59.946186 -- ДВИГАТЕЛЬ РЕЗЕРВНОГО МАСЛОНАСОСА TTE M1
2023-05-28 06:00:50.594437 -- ДВИГАТЕЛЬ РЕЗЕРВНОГО МАСЛОНАСОСА TTE M3
2023-05-28 06:07:33.621898 -- ЗАДВИЖКА TTE M1
2023-05-28 06:15:11.377793 -- ЗАДВИЖКА TTE M3
2023-05-28 06:23:56.958429 -- ЗАП. И РЕГ. АРМАТУРА TTE M1
2023-05-28 06:33:02.633579 -- ЗАП. И РЕГ. АРМАТУРА TTE M3
2023-05-28 06:42:26.199837 -- ЗАПОРНАЯ АРМАТУРА TTE M1
2023-05-28 06:52:12.932920 -- ЗАПОРНАЯ АРМАТУРА TTE M3
2023

KeyboardInterrupt: 

In [71]:
def validate_model(target_place_name_and_type, machine):
    model = keras.models.load_model(f'../dist/models/{target_place_name_and_type}.h5')
    right = y_train_tte_1h.set_index(['ИМЯ МАШИНЫ', 'DT'])[[target_place_name_and_type]].astype('float64') / MAX_TTE

    input_df = left_train.loc[machine].astype('float64').ffill()
    result = pd.DataFrame(index=right.loc[machine].index)
    result['ПРОГНОЗ'] = 0

    x = 0
    while x < len(input_df) - 24*7 - 24:
        window = input_df.iloc[x:x+24*7]
        if len(window) < 24:
            break
        input = np.array([window])
        output = model.predict(input, verbose=0)
        result['ПРОГНОЗ'].iloc[x+24*7:x+24*7+24] = output[0][-24:].reshape((24))

        print("{:3.2f}%".format(100 * x / len(input_df)))
        x += 24

    px.line(right.loc[machine].merge(result, left_index=True, right_index=True).ewm(com=24).mean()).show()

validate_model('РОТОР TTE M1', 'ЭКСГАУСТЕР А/М №5')

0.00%
0.09%
0.19%
0.28%
0.37%
0.46%
0.56%
0.65%
0.74%
0.83%
0.93%
1.02%
1.11%
1.20%
1.30%
1.39%
1.48%
1.57%
1.67%
1.76%
1.85%
1.94%
2.04%
2.13%
2.22%
2.31%
2.41%
2.50%
2.59%
2.68%
2.78%
2.87%
2.96%
3.05%
3.15%
3.24%
3.33%
3.42%
3.52%
3.61%
3.70%
3.79%
3.89%
3.98%
4.07%
4.16%
4.26%
4.35%
4.44%
4.54%
4.63%
4.72%
4.81%
4.91%
5.00%
5.09%
5.18%
5.28%
5.37%
5.46%
5.55%
5.65%
5.74%
5.83%
5.92%
6.02%
6.11%
6.20%
6.29%
6.39%
6.48%
6.57%
6.66%
6.76%
6.85%
6.94%
7.03%
7.13%
7.22%
7.31%
7.40%
7.50%
7.59%
7.68%
7.77%
7.87%
7.96%
8.05%
8.14%
8.24%
8.33%
8.42%
8.51%
8.61%
8.70%
8.79%
8.89%
8.98%
9.07%
9.16%
9.26%
9.35%
9.44%
9.53%
9.63%
9.72%
9.81%
9.90%
10.00%
10.09%
10.18%
10.27%
10.37%
10.46%
10.55%
10.64%
10.74%
10.83%
10.92%
11.01%
11.11%
11.20%
11.29%
11.38%
11.48%
11.57%
11.66%
11.75%
11.85%
11.94%
12.03%
12.12%
12.22%
12.31%
12.40%
12.49%
12.59%
12.68%
12.77%
12.86%
12.96%
13.05%
13.14%
13.24%
13.33%
13.42%
13.51%
13.61%
13.70%
13.79%
13.88%
13.98%
14.07%
14.16%
14.25%
14.35%
14.44%
14.53%
14

In [74]:
submission1_ref = pd.read_excel('../data/source/sample_submission/submission_1.xlsx', index_col=0)
submission2_ref = pd.read_parquet('../data/source/sample_submission/sample_submission_2.parquet')
submission3_ref = pd.read_parquet('../data/source/sample_submission/sample_submission_3.parquet')

In [88]:
x_test = pd.read_parquet('../data/source/X_test.parquet')

In [95]:
submission3

Unnamed: 0_level_0,ЭКСГАУСТЕР 4. ТОК РОТОРА 1,ЭКСГАУСТЕР 4. ТОК РОТОРА2,ЭКСГАУСТЕР 4. ТОК СТАТОРА,ЭКСГАУСТЕР 4. ДАВЛЕНИЕ МАСЛА В СИСТЕМЕ,ЭКСГАУСТЕР 4. ТЕМПЕРАТУРА ПОДШИПНИКА НА ОПОРЕ 1,ЭКСГАУСТЕР 4. ТЕМПЕРАТУРА ПОДШИПНИКА НА ОПОРЕ 2,ЭКСГАУСТЕР 4. ТЕМПЕРАТУРА ПОДШИПНИКА НА ОПОРЕ 3,ЭКСГАУСТЕР 4. ТЕМПЕРАТУРА ПОДШИПНИКА НА ОПОРЕ 4,ЭКСГАУСТЕР 4. ТЕМПЕРАТУРА МАСЛА В СИСТЕМЕ,ЭКСГАУСТЕР 4. ТЕМПЕРАТУРА МАСЛА В МАСЛОБЛОКЕ,...,ЭКСГАУСТЕР 9. ТЕМПЕРАТУРА ПОДШИПНИКА НА ОПОРЕ 3,ЭКСГАУСТЕР 9. ТЕМПЕРАТУРА ПОДШИПНИКА НА ОПОРЕ 4,ЭКСГАУСТЕР 9. ТЕМПЕРАТУРА МАСЛА В СИСТЕМЕ,ЭКСГАУСТЕР 9. ТЕМПЕРАТУРА МАСЛА В МАСЛОБЛОКЕ,ЭКСГАУСТЕР 9. ВИБРАЦИЯ НА ОПОРЕ 1,ЭКСГАУСТЕР 9. ВИБРАЦИЯ НА ОПОРЕ 2,ЭКСГАУСТЕР 9. ВИБРАЦИЯ НА ОПОРЕ 3,ЭКСГАУСТЕР 9. ВИБРАЦИЯ НА ОПОРЕ 3. ПРОДОЛЬНАЯ.,ЭКСГАУСТЕР 9. ВИБРАЦИЯ НА ОПОРЕ 4,ЭКСГАУСТЕР 9. ВИБРАЦИЯ НА ОПОРЕ 4. ПРОДОЛЬНАЯ.
DT,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-01-01 00:00:00,,,,,,,,,,,...,,,,,,,,,,
2022-01-01 00:00:10,,,,,,,,,,,...,,,,,,,,,,
2022-01-01 00:00:20,,,,,,,,,,,...,,,,,,,,,,
2022-01-01 00:00:30,,,,,,,,,,,...,,,,,,,,,,
2022-01-01 00:00:40,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-04-09 23:59:20,,,,,,,,,,,...,,,,,,,,,,
2023-04-09 23:59:30,,,,,,,,,,,...,,,,,,,,,,
2023-04-09 23:59:40,,,,,,,,,,,...,,,,,,,,,,
2023-04-09 23:59:50,,,,,,,,,,,...,,,,,,,,,,


In [200]:
submission1 = submission1_ref.copy()
submission1['machine'] = np.NaN
submission1['tm'] = np.NaN
submission2 = pd.DataFrame(index=x_test.index, columns=submission2_ref.columns)
submission3 = pd.DataFrame(index=x_test.index, columns=submission3_ref.columns)

def apply_model(target_place_name, machine):
    prediction_field_m1 = f'{target_place_name} TTE M1'
    prediction_field_m3 = f'{target_place_name} TTE M3'
    model_m1 = keras.models.load_model(f'../dist/models/{prediction_field_m1}.h5')
    model_m3 = keras.models.load_model(f'../dist/models/{prediction_field_m3}.h5')

    input_df = left_test.loc[machine].astype('float64').ffill()
    input_raw_df = left_test_raw.loc[machine].astype('float64')
    result = pd.DataFrame(index=left_test.loc[machine].index, columns=[prediction_field_m1, prediction_field_m3])

    print(str(datetime.now()) + ' -- ' + target_place_name, machine)

    x = 0
    while x < len(input_df) - 24*7 - 24:
        window = input_df.iloc[x:x+24*7]
        if len(window) < 24:
            break
        input = np.array([window])
        output_m1 = model_m1.predict(input, verbose=0)
        output_m3 = model_m3.predict(input, verbose=0)
        result[prediction_field_m1].iloc[x+24*7:x+24*7+24] = output_m1[0][-24:].reshape((24))
        result[prediction_field_m3].iloc[x+24*7:x+24*7+24] = output_m3[0][-24:].reshape((24))
        x += 24

    y_name = y_structure[machine].loc[target_place_name]
    upsampled = result.rolling(72).mean().resample('10s').interpolate().fillna(1)
    submission2[y_name] = upsampled[prediction_field_m3].map(lambda x: 1 if x < 0.2 else 0)
    submission3[y_name] = upsampled[prediction_field_m1] * MAX_TTE

    return upsampled

for place in y_structure.index:
    for machine in y_structure.columns:
        apply_model(place, machine)

# for place in ['РОТОР']:
#     for machine in ['ЭКСГАУСТЕР А/М №4']:
#         apply_model(place, machine)

2023-05-28 16:23:50.743629 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.1 ЭКСГАУСТЕР А/М №4
2023-05-28 16:24:46.004572 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.1 ЭКСГАУСТЕР А/М №5
2023-05-28 16:25:41.891578 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.1 ЭКСГАУСТЕР А/М №6
2023-05-28 16:26:36.231667 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.1 ЭКСГАУСТЕР А/М №7
2023-05-28 16:27:31.685559 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.1 ЭКСГАУСТЕР А/М №8
2023-05-28 16:28:25.490294 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.1 ЭКСГАУСТЕР А/М №9
2023-05-28 16:29:18.992819 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.2 ЭКСГАУСТЕР А/М №4
2023-05-28 16:30:11.971262 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.2 ЭКСГАУСТЕР А/М №5
2023-05-28 16:31:05.729600 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.2 ЭКСГАУСТЕР А/М №6
2023-05-28 16:31:59.032512 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.2 ЭКСГАУСТЕР А/М №7
2023-05-28 16:32:52.009945 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.2 ЭКСГАУСТЕР А/М №8
2023-05-28 16:33:45.946120 -- ВК 310С ВИБРОПРЕОБРАЗОВАТЕЛЬ Т.2 ЭКСГАУСТЕР А/М №9
2023-05-28 16:34:39.064353 -

KeyboardInterrupt: 

In [None]:
submission2

In [None]:
submission3

In [198]:
# y_name = y_structure[y_structure.columns[0]].loc[y_structure.index[0]]
# y_name = y_structure['ЭКСГАУСТЕР А/М №4'].loc['РОТОР']
# y_name
# submission2[[y_name]].sort_values(y_name)

Unnamed: 0_level_0,Y_ЭКСГАУСТЕР А/М №4_РОТОР ЭКСГ. №4
DT,Unnamed: 1_level_1
2022-01-01 00:00:00,0
2022-11-03 21:44:50,0
2022-11-03 21:45:00,0
2022-11-03 21:45:10,0
2022-11-03 21:45:20,0
...,...
2022-03-08 18:13:40,1
2022-03-08 18:13:30,1
2022-03-08 18:13:20,1
2022-03-08 18:15:30,1
