# Prediciton Timer Demo

# Prediction Timer Demo

In [1]:
import time

start = time.time()
import sys

import numpy as np
import pandas as pd

# files are contained in a sibling folder
sys.path.append("..")

import model.clean as cl
import model.model_trainer as mt
import model.model_predict as mp
from model.influx_interact import influx_class

from tensorflow.random import set_seed
set_seed(2)
print("time taken to load packages : ", time.time() - start)

time taken to load packages :  6.486962080001831


In [17]:
# END TIME FOR TRAINING SET BECOMES PREDICTING'S START TIME
START_TIME = 1613109600
END_TIME = 1613196000

In [21]:
TIME_STEP_SIZES = {
    "Campus Energy Centre Campus HW Main Meter Flow":15,
}
THRESHOLD_RATIOS = {
    "Campus Energy Centre Campus HW Main Meter Flow": 1.72,
}
measurement_name = "test_timer"

In [22]:
start = time.time()

model_path = "./test_env_models/"
scaler_path = "./test_env_standardizers/"
percentile_path = "./test_env_loss_percentiles/"

# setup InfluxDB client
token = "mytoken"
org = "UBC"
bucket = "MDS2021"
url = "http://localhost:8086"

influx_conn = influx_class(
    org=org,
    url=url,
    bucket=bucket,
    token=token,
)

influx_read_df = influx_conn.make_query(
    location="Campus Energy Centre",
    measurement="READINGS",
    id=["Campus Energy Centre Campus HW Main Meter Flow"],
    start=START_TIME,
    end=END_TIME,
)

print("time taken to read from influx ", time.time() - start)

time taken to read from influx  0.15479683876037598


In [23]:
dfs_for_test = cl.split_sensors(influx_read_df)

for key, df in dfs_for_test.items():
    prediction_start_time = time.time()
    dfs_for_test[key]["Stand_Val"] = cl.std_val_predict(
        dfs_for_test[key][["Value"]],
        dfs_for_test[key]["ID"].any(),
        scaler_path,
    )
    

    # keeps external packages updated in
    import importlib
    importlib.reload(mp)

    # sets up sequencing
    time_steps = TIME_STEP_SIZES[key]
    window_size = 1
    x_train, y_train = mt.create_sequences(df["Stand_Val"], df["Stand_Val"], time_steps, window_size)
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

    # set up lists for passing to predict
    timestamps = df["DateTime"].tail(len(df) - x_train.shape[1]+1).values
    val_nums = df["Value"].tail(len(df) - x_train.shape[1]+1).values

    # gets training loss percentile for threshold setting
    loss_percentile = cl.load_loss_percentile(key, file_path=percentile_path)
    threshold = THRESHOLD_RATIOS[key] * loss_percentile


    # predicting and prediction formatting
    pred_df = mp.make_prediction(
        key,
        x_train,
        timestamps,
        threshold,
        val_nums,
        model_path,
        anomaly_type="realtime_anomaly"
    )
    pred_df = pred_df[["uniqueID", "val_num", "realtime_anomaly"]]
    print("time taken to make predictions ", time.time()- prediction_start_time)

    write_start_time = time.time()
    influx_conn.write_data(pred_df, measurement_name, tags=["uniqueID", "realtime_anomaly"])
    print("time taken to write predictions ", time.time()- write_start_time)
influx_conn.client.close()

time taken to make predictions  1.2060463428497314
time taken to write predictions  0.05940604209899902
