In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd # 2.x to support pyarrow
import pyarrow as pa
import openpyxl # for reading xlsx with structure
import numpy as np
import plotly.express as px
import math
import re
from datetime import timedelta, datetime

import tensorflow as tf
from tensorflow import keras

In [None]:
x_structure = pd.read_parquet('../data/pipeline/x_structure.parquet')
y_structure = pd.read_parquet('../data/pipeline/y_structure.parquet')

In [None]:
x_train_pretty_1h = pd.read_parquet('../data/pipeline/x_train_pretty_1h.parquet')
x_test_pretty_1h = pd.read_parquet('../data/pipeline/x_test_pretty_1h.parquet')
y_train_tte_1h = pd.read_parquet('../data/pipeline/y_train_tte_1h.parquet')

In [None]:
left = x_train_pretty_1h.set_index(['ИМЯ МАШИНЫ', 'DT']).astype('float64').sort_index().ffill()[x_structure.index]
left_stats = left.describe()
left = left / left_stats.loc['std']
MAX_TTE = 31 * 24 * 60 * 60

In [None]:
def train_model(target_place_name_and_type):
    right = y_train_tte_1h.set_index(['ИМЯ МАШИНЫ', 'DT'])[[target_place_name_and_type]].astype('float64') / MAX_TTE
    data = pd.merge(left, right, left_index=True, right_index=True)

    batches = None
    for machine in x_structure.columns:
        seq = data.loc[machine].sort_index().astype('float64').ffill().fillna(0)
        X = seq[x_structure.index]
        Y = seq.drop(x_structure.index, axis=1)
        machine_examples = keras.utils.timeseries_dataset_from_array(X, Y, sequence_length=24 * 7, sequence_stride=24, seed=1337)
        if batches is None:
            batches = machine_examples
        else:
            batches = batches.concatenate(machine_examples)

    model = keras.Sequential([
        keras.Input((24*7, len(x_structure.index))),
        keras.layers.Dense(24*3, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid'),
    ])

    model.compile(loss='mse', optimizer='adam')
    # model.summary()

    print(str(datetime.now()) + ' -- ' + target_place_name_and_type)
    history = model.fit(batches, epochs=1000, verbose=0)
    # px.line(history.history).show()

    model.save(f'../dist/models/{target_place_name_and_type}.h5', save_format='h5')
    return model

for place in y_structure.index:
    for type in ['TTE M1', 'TTE M3']:
        train_model(place + ' ' + type)

In [None]:
def validate_model(target_place_name_and_type, machine):
    model = keras.models.load_model(f'../dist/models/{target_place_name_and_type}.h5')
    right = y_train_tte_1h.set_index(['ИМЯ МАШИНЫ', 'DT'])[[target_place_name_and_type]].astype('float64') / MAX_TTE

    input_df = left.loc[machine].astype('float64').ffill()
    result = pd.DataFrame(index=right.loc[machine].index)
    result['ПРОГНОЗ'] = 0

    x = 0
    while x < len(input_df) - 24*7 - 24:
        window = input_df.iloc[x:x+24*7]
        if len(window) < 24:
            break
        input = np.array([window])
        output = model.predict(input, verbose=0)
        result['ПРОГНОЗ'].iloc[x+24*7:x+24*7+24] = output[0][-24:].reshape((24))

        print("{:3.2f}%".format(100 * x / len(input_df)))
        x += 24

    px.line(right.loc[machine].merge(result, left_index=True, right_index=True).rolling(72).mean()).show()

validate_model('РЕДУКТОР ГАЗ. ЗАДВИЖКИ TTE M3', 'ЭКСГАУСТЕР А/М №5')