In [1]:
import os
import plotly.express as px
import pandas as pd
import numpy as np
import json

from utils import *
from train_eval import *
import wandb
wandb.login()

import warnings
warnings.filterwarnings('ignore')

# Set seed
np.random.seed(42)

# Set working directory
os.chdir(r"..") # should be the git repo root directory, checking below:
print("Current working directory: " + os.getcwd())
assert os.getcwd()[-8:] == "WattCast"
dir_path = os.path.join(os.getcwd(), 'data', 'clean_data')
model_dir = os.path.join(os.getcwd(), 'models')



Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mnikolaushouben[0m ([33mwattcast[0m). Use [1m`wandb login --relogin`[0m to force relogin


Current working directory: c:\Users\nik\Desktop\Berkeley_Projects\WattCast


In [2]:
def train_eval_light():

    wandb.init(project="WattCast_tuning")
    wandb.config.update(config_run)
    config = wandb.config

    print("Getting data...")

    pipeline, ts_train_piped, ts_val_piped, ts_test_piped, ts_train_weather_piped, ts_val_weather_piped, ts_test_weather_piped, trg_train_inversed, trg_val_inversed, trg_test_inversed = data_pipeline(config)

    print("Getting model instance...")
    model = get_model(config)
    model, runtime = train_models([model], ts_train_piped, ts_train_weather_piped, ts_val_piped, ts_val_weather_piped)

    print("Evaluating model...")
    predictions, score = predict_testset(model[0], 
                                  ts_test_piped[config.longest_ts_test_idx], 
                                  ts_test_weather_piped[config.longest_ts_test_idx],
                                  config.n_lags, config.n_ahead, config.eval_stride, pipeline,
                                  )


    print("Plotting predictions...")
    df_compare = pd.concat([trg_test_inversed.pd_dataframe(), predictions], axis=1).dropna()
    df_compare.columns = ['target', 'prediction']
    fig = px.line(df_compare, title='Predictions vs. Test Set')

    wandb.log({'eval_loss': score})
    wandb.log({'predictions': fig})
    wandb.finish()



In [3]:
# See what keys are in the h5py data file
get_hdf_keys(dir_path)

({'1_county.h5': ['Los_Angeles', 'New_York', 'Sacramento'],
  '2_town.h5': ['town_0', 'town_1', 'town_2'],
  '3_village.h5': [],
  '4_neighborhood.h5': ['germany'],
  '5_household.h5': ['household_0', 'household_1', 'household_2'],
  '6_apartment.h5': ['apartment_0', 'apartment_1', 'apartment_2']},
 {'1_county.h5': ['60min'],
  '2_town.h5': ['15min', '60min'],
  '4_neighborhood.h5': ['15min', '60min'],
  '5_household.h5': ['15min', '60min'],
  '6_apartment.h5': ['15min', '5min', '60min']})

In [4]:
# run parameters

sweeps = 20

scale_location_pairs = (
    # ('1_county', 'Sacramento'),
    # ('1_county', 'New_York'),
    # ('2_town', 'town_1'),
    # ('2_town', 'town_2'),
    ('3_village', 'village_1'),
    ('3_village', 'village_2'),
    #('2_town', 'town_0'),
    #('3_village', 'village_0'),
    #('4_neighborhood', 'germany'),
    #('5_household', 'household_0'),
      )




models = [
        'rf',
        'xgb', 
        'gru', 
        'lgbm',  
        'nbeats',
        #'transformer',
        #'tft'
        ]

for scale, location in scale_location_pairs:

    for model in models:
        # place holder initialization of config file (will be updated in train_eval_light()
        config_run = {
            'spatial_scale': scale,
            'temp_resolution': 60,
            'location': location,
            'model': model,
            'horizon_in_hours': 24,
            'lookback_in_hours': 24,
            'boxcox': True,
            'liklihood': None,
            'weather': True,
            'holiday': True,
            'datetime_encodings': False,
        }

        with open(f'sweep_configurations/config_sweep_{model}.json', 'r') as fp:
            sweep_config = json.load(fp)                  

        sweep_config['name'] = model + 'sweep' + config_run['spatial_scale'] + '_' + config_run['location'] + '_' + str(config_run['temp_resolution'])

        sweep_id = wandb.sweep(sweep_config, project="WattCast_tuning")
        wandb.agent(sweep_id, train_eval_light, count=sweeps)


Create sweep with ID: yuvw2w3x
Sweep URL: https://wandb.ai/wattcast/Wattcast_tuning/sweeps/yuvw2w3x


[34m[1mwandb[0m: Agent Starting Run: td3bykr6 with config:
[34m[1mwandb[0m: 	datetime_encodings: 1
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	min_samples_leaf: 1
[34m[1mwandb[0m: 	min_samples_split: 2
[34m[1mwandb[0m: 	n_estimators: 200
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…



Getting data...


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run td3bykr6 errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: [32m[41mERROR[0m Run td3bykr6 errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: Agent Starting Run: 1mgz8865 with config:
[34m[1mwandb[0m: 	datetime_encodings: 0
[34m[1mwandb[0m: 	max_depth: 2
[34m[1mwandb[0m: 	min_samples_leaf: 5
[34m[1mwandb[0m: 	min_samples_split: 5
[34m[1mwandb[0m: 	n_estimators: 500
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…



Getting data...


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run 1mgz8865 errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 1mgz8865 errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: Agent Starting Run: hih07cap with config:
[34m[1mwandb[0m: 	datetime_encodings: 1
[34m[1mwandb[0m: 	max_depth: 15
[34m[1mwandb[0m: 	min_samples_leaf: 1
[34m[1mwandb[0m: 	min_samples_split: 15
[34m[1mwandb[0m: 	n_estimators: 1000
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…



Getting data...


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run hih07cap errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: [32m[41mERROR[0m Run hih07cap errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: Agent Starting Run: zdm3elus with config:
[34m[1mwandb[0m: 	datetime_encodings: 1
[34m[1mwandb[0m: 	max_depth: 10
[34m[1mwandb[0m: 	min_samples_leaf: 1
[34m[1mwandb[0m: 	min_samples_split: 15
[34m[1mwandb[0m: 	n_estimators: 50
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01691666666883975, max=1.0)…



Getting data...


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run zdm3elus errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: [32m[41mERROR[0m Run zdm3elus errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: Agent Starting Run: spm5ufag with config:
[34m[1mwandb[0m: 	datetime_encodings: 0
[34m[1mwandb[0m: 	max_depth: 5
[34m[1mwandb[0m: 	min_samples_leaf: 2
[34m[1mwandb[0m: 	min_samples_split: 5
[34m[1mwandb[0m: 	n_estimators: 10
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01691666666495924, max=1.0)…



Getting data...


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run spm5ufag errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: [32m[41mERROR[0m Run spm5ufag errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: Agent Starting Run: rqz9av94 with config:
[34m[1mwandb[0m: 	datetime_encodings: 1
[34m[1mwandb[0m: 	max_depth: 2
[34m[1mwandb[0m: 	min_samples_leaf: 2
[34m[1mwandb[0m: 	min_samples_split: 10
[34m[1mwandb[0m: 	n_estimators: 500
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…



Getting data...


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run rqz9av94 errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: [32m[41mERROR[0m Run rqz9av94 errored: KeyError('No object named village_1/60min/train_target in the file')
Detected 5 failed runs in a row at start, killing sweep.
[34m[1mwandb[0m: [32m[41mERROR[0m Detected 5 failed runs in a row at start, killing sweep.
[34m[1mwandb[0m: To change this value set WANDB_AGENT_MAX_INITIAL_FAILURES=val


Create sweep with ID: elze0278
Sweep URL: https://wandb.ai/wattcast/Wattcast_tuning/sweeps/elze0278


[34m[1mwandb[0m: Agent Starting Run: l2ggjss5 with config:
[34m[1mwandb[0m: 	datetime_encodings: 0
[34m[1mwandb[0m: 	learning_rate: 0.1
[34m[1mwandb[0m: 	max_depth: 3
[34m[1mwandb[0m: 	min_child_weight: 1
[34m[1mwandb[0m: 	n_estimators: 1000
[34m[1mwandb[0m: 	objective: reg:squarederror
[34m[1mwandb[0m: 	reg_lambda: 0.5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…



Getting data...


VBox(children=(Label(value='0.000 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

Run l2ggjss5 errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: [32m[41mERROR[0m Run l2ggjss5 errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: Agent Starting Run: tddot65n with config:
[34m[1mwandb[0m: 	datetime_encodings: 0
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	max_depth: 12
[34m[1mwandb[0m: 	min_child_weight: 10
[34m[1mwandb[0m: 	n_estimators: 500
[34m[1mwandb[0m: 	objective: reg:pseudohubererror
[34m[1mwandb[0m: 	reg_lambda: 0.3
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…



Getting data...


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run tddot65n errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: [32m[41mERROR[0m Run tddot65n errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: ej22zey8 with config:
[34m[1mwandb[0m: 	datetime_encodings: 1
[34m[1mwandb[0m: 	learning_rate: 0.05
[34m[1mwandb[0m: 	max_depth: 12
[34m[1mwandb[0m: 	min_child_weight: 10
[34m[1mwandb[0m: 	n_estimators: 1000
[34m[1mwandb[0m: 	objective: reg:pseudohubererror
[34m[1mwandb[0m: 	reg_lambda: 0.3
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01691666666883975, max=1.0)…



Getting data...


VBox(children=(Label(value='0.000 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.0, max…

Run ej22zey8 errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: [32m[41mERROR[0m Run ej22zey8 errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: Agent Starting Run: 2j399o91 with config:
[34m[1mwandb[0m: 	datetime_encodings: 1
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 12
[34m[1mwandb[0m: 	min_child_weight: 10
[34m[1mwandb[0m: 	n_estimators: 500
[34m[1mwandb[0m: 	objective: reg:squarederror
[34m[1mwandb[0m: 	reg_lambda: 0.5
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016933333330477276, max=1.0…



Getting data...


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run 2j399o91 errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 2j399o91 errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: Agent Starting Run: 74xgrdz0 with config:
[34m[1mwandb[0m: 	datetime_encodings: 0
[34m[1mwandb[0m: 	learning_rate: 0.3
[34m[1mwandb[0m: 	max_depth: 12
[34m[1mwandb[0m: 	min_child_weight: 1
[34m[1mwandb[0m: 	n_estimators: 100
[34m[1mwandb[0m: 	objective: reg:pseudohubererror
[34m[1mwandb[0m: 	reg_lambda: 0.3
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016933333330477276, max=1.0…



Getting data...


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

Run 74xgrdz0 errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: [32m[41mERROR[0m Run 74xgrdz0 errored: KeyError('No object named village_1/60min/train_target in the file')
[34m[1mwandb[0m: Agent Starting Run: 8n4rd050 with config:
[34m[1mwandb[0m: 	datetime_encodings: 1
[34m[1mwandb[0m: 	learning_rate: 0.2
[34m[1mwandb[0m: 	max_depth: 12
[34m[1mwandb[0m: 	min_child_weight: 5
[34m[1mwandb[0m: 	n_estimators: 1000
[34m[1mwandb[0m: 	objective: reg:squarederror
[34m[1mwandb[0m: 	reg_lambda: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01691666666883975, max=1.0)…



Getting data...


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

[34m[1mwandb[0m: Ctrl + C detected. Stopping sweep.


Create sweep with ID: nmsmq4jn
Sweep URL: https://wandb.ai/wattcast/Wattcast_tuning/sweeps/nmsmq4jn


[34m[1mwandb[0m: Agent Starting Run: crx1oeeh with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	datetime_encodings: 0
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	hidden_dim: 128
[34m[1mwandb[0m: 	lr: 0.1
[34m[1mwandb[0m: 	n_rnn_layers: 1
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.




VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.016666666666666666, max=1.0…