# Krafthack 2022 Baseline model

In [38]:
from ae_adx_client.ae_adx_client import AdxClient
import pandas as pd
import plotly.express as px
import numpy as np
from matplotlib import pyplot as plt
import os
from sklearn.linear_model import LinearRegression

In [None]:
%load_ext autoreload
%autoreload 2

In [39]:
from data_helpers import get_operating_signal_names, get_tensile_sensor_names
from datetime import datetime as dt

In [40]:
start_date = dt(year=1970,month=9,day=11)
end_date = dt(year=1971,month=1,day=25)

steady_data = pd.read_parquet('../data/steady_data.parquet')
steady_data_for_linear_fit = steady_data.loc[start_date:end_date,get_tensile_sensor_names()]
steady_data_for_linear_fit = steady_data_for_linear_fit.resample('1s').interpolate()

unsteady_data = pd.read_parquet('../data/unsteady_data.parquet')

In [41]:
px.line(steady_data.loc[:,'Turbine_Pressure Spiral Casing'].sample(10000).sort_index())

In [42]:
steady_data_for_linear_fit['elapsed_time'] = steady_data_for_linear_fit.index-start_date
steady_data_for_linear_fit['elapsed_time'] = steady_data_for_linear_fit.elapsed_time / pd.to_timedelta(1,'s')
steady_data_for_linear_fit.head()

Unnamed: 0_level_0,Bolt_1_Tensile,Bolt_2_Tensile,Bolt_3_Tensile,Bolt_4_Tensile,Bolt_5_Tensile,Bolt_6_Tensile,elapsed_time
timepoints,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1970-09-11 00:00:00,1474.11034,1427.72196,1623.672827,1592.000401,1627.568274,1638.021667,0.0
1970-09-11 00:00:01,1474.178909,1427.745722,1623.634306,1591.981665,1627.573384,1638.096755,1.0
1970-09-11 00:00:02,1474.12579,1427.762865,1623.6548,1591.98549,1627.538896,1638.040184,2.0
1970-09-11 00:00:03,1474.042509,1427.678622,1623.572884,1591.971247,1627.57917,1638.019175,3.0
1970-09-11 00:00:04,1474.137001,1427.774177,1623.64933,1591.939368,1627.501739,1638.012018,4.0


In [43]:
baseline_models = {}
for tensile_sensor in get_tensile_sensor_names():
    linear_model = LinearRegression()
    x = steady_data_for_linear_fit["elapsed_time"].values
    x = x.reshape(-1,1)
    y = steady_data_for_linear_fit[tensile_sensor].values
    y = y.reshape(-1,1)
    baseline_models[tensile_sensor] = linear_model.fit(
        X=x,
        y=y
    )


In [44]:
import pickle

with open('baseline_models.pkl','wb') as f:
    pickle.dump(baseline_models,f)

In [66]:
for tensile_sensor_name in get_tensile_sensor_names():
    linear_model = baseline_models.get(tensile_sensor_name)
    x_data = steady_data_for_linear_fit['elapsed_time'].values.reshape(-1,1)
    linear_prediction = linear_model.predict(X=x_data)
    plot_df = steady_data_for_linear_fit.loc[:,[tensile_sensor_name]]
    plot_df['linear_prediction'] = linear_prediction
    fig = px.line(plot_df.sample(10000).sort_index(),title=f"Baseline model for {tensile_sensor_name}")
    fig.show()
    

In [46]:
def detrend_tensile_data(df: pd.DataFrame,baseline_models: dict, start_date: dt, end_date: dt):
    """Detrend the tensile sensor data with model

    Args:
        df (pd.DataFrame): data with trends
        baseline_models (dict): dictionary with trained baseline models
    """
    df = df.copy()

    if 'elapsed_time' not in df.columns:
        
        df['elapsed_time'] = df.index-start_date
        df['elapsed_time'] = df.elapsed_time / pd.to_timedelta(1,'s')

    for tensile_sensor_name in get_tensile_sensor_names():

        x_data = df['elapsed_time'].values.reshape(-1,1)
        linear_model = baseline_models.get(tensile_sensor_name)
        df['trend'] = linear_model.predict(X=x_data)
        df.loc[:,tensile_sensor_name] = df[tensile_sensor_name]-df['trend']
        df.drop(columns=['trend'])
        df = df.loc[start_date:end_date,:]

    return df

In [47]:
detrended_steady_data = detrend_tensile_data(
    df=steady_data, 
    baseline_models=baseline_models,
    start_date=start_date,
    end_date=end_date
)



In [48]:
detrended_unsteady_data = detrend_tensile_data(
    df = unsteady_data,
    baseline_models=baseline_models,
    start_date=start_date,
    end_date=end_date
)

In [62]:
detrended_steady_data.head()

Unnamed: 0_level_0,Unit_4_Power,Unit_4_Reactive Power,Turbine_Guide Vane Opening,Turbine_Rotational Speed,Turbine_Pressure Drafttube,Turbine_Pressure Spiral Casing,Bolt_1_Steel tmp,Bolt_1_Torsion,Bolt_1_Tensile,Bolt_2_Torsion,...,Bolt_4_Torsion,Bolt_4_Tensile,Bolt_5_Torsion,Bolt_5_Tensile,Bolt_6_Torsion,Bolt_6_Tensile,lower_bearing_vib_vrt,turbine_bearing_vib_vrt,elapsed_time,trend
timepoints,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1970-09-11 00:00:00,216.010229,2.213785,72.92726,107.993752,152.625396,5204.692439,9.116564,161.624242,4.962772,132.720051,...,227.37423,-0.922153,286.24507,-5.521569,166.092997,-3.766545,,,0.0,1641.788212
1970-09-11 00:00:01,215.986387,2.430907,72.857113,107.993752,152.426614,5204.90702,9.116155,161.641194,5.031327,132.716088,...,227.351792,-0.940889,286.227038,-5.516459,166.113711,-3.69146,,,1.0,1641.788216
1970-09-11 00:00:02,215.962544,2.64803,72.786965,107.993752,152.221141,5205.337213,9.117806,161.646854,4.978194,132.706999,...,227.370264,-0.937066,286.243309,-5.550948,166.106216,-3.748036,,,2.0,1641.78822
1970-09-11 00:00:03,215.938701,2.865152,72.716817,107.993752,152.015668,5205.767405,9.117833,161.632999,4.894897,132.720516,...,227.3756,-0.95131,286.247199,-5.510674,166.093641,-3.769048,,,3.0,1641.788224
1970-09-11 00:00:04,215.914859,3.082275,72.64667,107.993752,151.810195,5206.197597,9.117341,161.632617,4.989375,132.704046,...,227.359891,-0.98319,286.232739,-5.588106,166.10296,-3.776209,,,4.0,1641.788227


In [51]:
training_columns = get_operating_signal_names() + get_tensile_sensor_names()
print(training_columns)
detrended_steady_data.loc[:,training_columns].dropna().head()

['Unit_4_Power', 'Unit_4_Reactive Power', 'Turbine_Guide Vane Opening', 'Turbine_Pressure Drafttube', 'Turbine_Pressure Spiral Casing', 'Turbine_Rotational Speed', 'Bolt_1_Tensile', 'Bolt_2_Tensile', 'Bolt_3_Tensile', 'Bolt_4_Tensile', 'Bolt_5_Tensile', 'Bolt_6_Tensile']


Unnamed: 0_level_0,Unit_4_Power,Unit_4_Reactive Power,Turbine_Guide Vane Opening,Turbine_Pressure Drafttube,Turbine_Pressure Spiral Casing,Turbine_Rotational Speed,Bolt_1_Tensile,Bolt_2_Tensile,Bolt_3_Tensile,Bolt_4_Tensile,Bolt_5_Tensile,Bolt_6_Tensile
timepoints,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1970-09-11 00:00:00,216.010229,2.213785,72.92726,152.625396,5204.692439,107.993752,4.962772,-7.615786,-2.839481,-0.922153,-5.521569,-3.766545
1970-09-11 00:00:01,215.986387,2.430907,72.857113,152.426614,5204.90702,107.993752,5.031327,-7.59203,-2.878009,-0.940889,-5.516459,-3.69146
1970-09-11 00:00:02,215.962544,2.64803,72.786965,152.221141,5205.337213,107.993752,4.978194,-7.574893,-2.857521,-0.937066,-5.550948,-3.748036
1970-09-11 00:00:03,215.938701,2.865152,72.716817,152.015668,5205.767405,107.993752,4.894897,-7.659141,-2.939444,-0.95131,-5.510674,-3.769048
1970-09-11 00:00:04,215.914859,3.082275,72.64667,151.810195,5206.197597,107.993752,4.989375,-7.563591,-2.863004,-0.98319,-5.588106,-3.776209


In [52]:
detrended_steady_data.loc[:,training_columns].dropna().to_parquet('../data/detrended_steady_data.parquet')
detrended_unsteady_data.loc[:,training_columns].dropna().to_parquet('../data/detrended_unsteady_data.parquet')

In [65]:
col = get_tensile_sensor_names()[0]

px.scatter(
    detrended_steady_data.sample(20000),
    x="Unit_4_Power",
    y=col,
    title=f"De-trended {col} values plottet against Active Power",
    height=800,
    width=800
)
