In [1]:
from influx_interact import *
from clean import *
import pandas as pd
import random
import numpy as np
import os
from model_trainer import *
from tensorflow import keras


# Example of InfluxDB download.

In [2]:
os.environ['SKYSPARK'] = "omUybYZ3QkGvuXXy0VwT-7hoO2SEFzhckXJ5k32K_GvG47yHQAi9JzZ1bii6r1HD5NKux3ZhHlKAyUfj6i61bA=="

In [3]:
#Define a few variables with the name of your bucket, organization, and token.
bucket = "SKYSPARK"
org = "UBC"
#UDL provides public users READ access to the InfluxDB 2.0 instance via this token
token = os.getenv('SKYSPARK')
url="http://206.12.92.81:8086"

In [4]:
test = influx_class(org, url, bucket, token)
id = ["r:p:ubcv:r:205b0392-31f31280 Campus Energy Centre Campus HW Main Meter Power", "r:p:ubcv:r:205b1697-84986d73 Campus Energy Centre Campus HW Main Meter Leaving Water Temperature", "r:p:ubcv:r:205b0343-70d7c00c Campus Energy Centre Campus HW Main Meter Energy", "r:p:ubcv:r:205b03d6-b9859d31 Campus Energy Centre Campus HW Main Meter Flow"]

In [5]:
testdf = test.make_query("Campus Energy Centre", id)

In [6]:
# split sensor
main_bucket = split_sensors(testdf)

In [7]:
main_bucket['r:p:ubcv:r:205b0343-70d7c00c Campus Energy Centre Campus HW Main Meter Energy'].head()

Unnamed: 0,DateTime,Value,navName,ID,unit
0,2020-06-23 01:30:00+00:00,0.5,Energy,r:p:ubcv:r:205b0343-70d7c00c Campus Energy Cen...,MWh
1,2020-06-23 01:45:00+00:00,0.5,Energy,r:p:ubcv:r:205b0343-70d7c00c Campus Energy Cen...,MWh
2,2020-06-23 02:00:00+00:00,0.59375,Energy,r:p:ubcv:r:205b0343-70d7c00c Campus Energy Cen...,MWh
3,2020-06-23 02:15:00+00:00,0.40625,Energy,r:p:ubcv:r:205b0343-70d7c00c Campus Energy Cen...,MWh
4,2020-06-23 02:30:00+00:00,0.59375,Energy,r:p:ubcv:r:205b0343-70d7c00c Campus Energy Cen...,MWh


# Using CSV Data

Since we don't have trained data on the above dataframe we will be using a modified csv file for example.

### Setting up Training Data
This would be done through influx_interact

In [8]:
training_data = pd.read_csv('../../data/testing-data/trained_bucket.csv')

training_data = training_data.rename(columns={'Anomaly':'AH'})
# Setup our training data to only have human labelled anomalies 
# and no machine labelled ones
# Remember True and False on 'AH' are verified true or false
training_data['AM'] = False

### Setting up Main Bucket
This would be done through influx_interact

In [9]:
col = 'Campus Energy Centre Campus HW Main Meter Energy'
main_bucket[col] = pd.read_csv('../../data/testing-data/main_bucket.csv')



In [10]:
main_bucket[col].head()

Unnamed: 0,Datetime,Value,ID
0,2020-01-01 7:58,9.6,Campus Energy Centre Campus HW Main Meter Power
1,2020-01-01 8:00,9.6,Campus Energy Centre Campus HW Main Meter Power
2,2020-01-01 8:01,10.3,Campus Energy Centre Campus HW Main Meter Power
3,2020-01-01 8:04,11.0,Campus Energy Centre Campus HW Main Meter Power
4,2020-01-01 8:07,10.3,Campus Energy Centre Campus HW Main Meter Power


### Standardize data

In [11]:
for key, df in main_bucket.items():
    main_bucket[key]['Stad_Val'] = standardize_values(df[['Value']])

In [12]:
main_bucket[col].head()

Unnamed: 0,Datetime,Value,ID,Stad_Val
0,2020-01-01 7:58,9.6,Campus Energy Centre Campus HW Main Meter Power,-0.487452
1,2020-01-01 8:00,9.6,Campus Energy Centre Campus HW Main Meter Power,-0.487452
2,2020-01-01 8:01,10.3,Campus Energy Centre Campus HW Main Meter Power,-0.406726
3,2020-01-01 8:04,11.0,Campus Energy Centre Campus HW Main Meter Power,-0.326
4,2020-01-01 8:07,10.3,Campus Energy Centre Campus HW Main Meter Power,-0.406726


### Combining Training and Main Bucket

This would usually be added to the for loop for every sensor but seeing how we only have one set of training data in the example.

In [13]:
training_data.head()

Unnamed: 0,Datetime,Value,ID,AH,AM
0,2020-01-01 7:58,9.6,Campus Energy Centre Campus HW Main Meter Power,,False
1,2020-01-01 8:00,9.6,Campus Energy Centre Campus HW Main Meter Power,,False
2,2020-01-01 8:01,10.3,Campus Energy Centre Campus HW Main Meter Power,,False
3,2020-01-01 8:04,11.0,Campus Energy Centre Campus HW Main Meter Power,,False
4,2020-01-01 8:07,10.3,Campus Energy Centre Campus HW Main Meter Power,,False


In [14]:
joined_df = add_anomalies(main_bucket[col],training_data)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  iloc._setitem_with_indexer(indexer, value)


In [15]:
joined_df.head()

Unnamed: 0,AH,AM,Datetime,ID,Stad_Val,Value
0,,False,2020-01-01 07:58:00,Campus Energy Centre Campus HW Main Meter Power,-0.487452,9.6
1,,False,2020-01-01 08:00:00,Campus Energy Centre Campus HW Main Meter Power,-0.487452,9.6
2,,False,2020-01-01 08:01:00,Campus Energy Centre Campus HW Main Meter Power,-0.406726,10.3
3,,False,2020-01-01 08:04:00,Campus Energy Centre Campus HW Main Meter Power,-0.326,11.0
4,,False,2020-01-01 08:07:00,Campus Energy Centre Campus HW Main Meter Power,-0.406726,10.3


### Group Check

This would look if the dataframe sensor belongs to a group and return the name of the group. Returns None if the sensor does not.  This can be modified for however we plan to set up model groups.  Need to talk to Mitch about this.

In [16]:
group_check(joined_df)

'Group A'

### Split Data
Splits data into abnomral and normal data for training purposes

In [17]:
norm_df, ab_df = split_normal(joined_df)

In [18]:
norm_df.head()

Unnamed: 0,AH,AM,Datetime,ID,Stad_Val,Value
0,,False,2020-01-01 07:58:00,Campus Energy Centre Campus HW Main Meter Power,-0.487452,9.6
1,,False,2020-01-01 08:00:00,Campus Energy Centre Campus HW Main Meter Power,-0.487452,9.6
2,,False,2020-01-01 08:01:00,Campus Energy Centre Campus HW Main Meter Power,-0.406726,10.3
3,,False,2020-01-01 08:04:00,Campus Energy Centre Campus HW Main Meter Power,-0.326,11.0
4,,False,2020-01-01 08:07:00,Campus Energy Centre Campus HW Main Meter Power,-0.406726,10.3


In [19]:
ab_df.head()

Unnamed: 0,AH,AM,Datetime,ID,Stad_Val,Value
0,True,True,2020-01-08 16:03:00,Campus Energy Centre Campus HW Main Meter Power,-1.594552,0.0
1,True,True,2020-01-08 16:04:00,Campus Energy Centre Campus HW Main Meter Power,-1.017937,5.0
2,True,True,2020-03-15 07:17:00,Campus Energy Centre Campus HW Main Meter Power,-1.594552,0.0
3,True,True,2020-03-15 07:19:00,Campus Energy Centre Campus HW Main Meter Power,-1.594552,0.0
4,True,True,2020-03-25 22:47:00,Campus Energy Centre Campus HW Main Meter Power,-1.594552,0.0


Looking at the dataframe produced.  Just need to chat with Mitch about what he needs coming into his detection model.

In [20]:
x_train, y_train = create_sequences(norm_df[['Value']], norm_df['Value'])

In [21]:
norm_dict = {norm_df['ID'].any():{"x_train": x_train, "y_train": y_train, "x_test": _, "y_test": _, "train": norm_df, "test": _ }}

In [22]:
fit_models(norm_dict, './models')

Epoch 1/2
Epoch 2/2
