# Daily training of PSCO short term load forecast model

## Setup

In [77]:
import pandas as pd
import requests
from datetime import datetime
import pickle

import eeweather
import pytz

from tensorflow.keras.models import load_model

## Hyperparameter

In [78]:
# eia
URL_EIA_01 = 'http://api.eia.gov/series/?api_key='
URL_EIA_02 = '&series_id='
API_KEY = 'API_KEY'
SERIES_ID = 'EBA.PSCO-ALL.D.H'
NAME = 'load'

# denver international airport
LAT = 39.8328
LON = -104.6575

# nn
PATH_MODEL = '/home/ubuntu/github/airflow-automation/03_models/nn.h5'
PATH_SCALER = '/home/ubuntu/github/airflow-automation/03_models/scaler.pkl'

## Model

In [79]:
model = load_model(PATH_MODEL)

## Scaler

In [80]:
scaler = pickle.load(open(PATH_SCALER, 'rb'))

## Load data

In [81]:
url = URL_EIA_01 + API_KEY + URL_EIA_02 + SERIES_ID
print(url)

response = requests.get(url)
print('status_code', response.status_code)

data = response.json()['series'][0]['data']
df = pd.DataFrame(data, columns = ['period', 'value'])
df.index = pd.to_datetime(df['period'])
df = df.sort_index()

df_utc = df.copy()

df = df.tz_convert('America/Denver')
df = df.tz_localize(None)
df = df.loc[~df.index.duplicated(keep = 'first')]
df = df.rename(columns = {'value': NAME})
df = df.drop('period', axis = 1)
df = df.sort_index()

df_load = df.copy()

http://api.eia.gov/series/?api_key=1f251657d4404b4115a5d852206cceb1&series_id=EBA.PSCO-ALL.D.H
status_code 200


In [82]:
df_load.head(2)

Unnamed: 0_level_0,load
period,Unnamed: 1_level_1
2015-07-01 01:00:00,4875.0
2015-07-01 02:00:00,4618.0


In [83]:
df_load.tail(2)

Unnamed: 0_level_0,load
period,Unnamed: 1_level_1
2020-07-22 17:00:00,8719.0
2020-07-22 18:00:00,8364.0


## Temperature data

In [84]:
ranked_stations = eeweather.rank_stations(LAT, LON)
station, warnings = eeweather.select_station(ranked_stations)
print('station.name', station.name)
print('warnings', warnings)

start_date = datetime(df_utc.index[0].year, df_utc.index[0].month, df_utc.index[0].day, tzinfo = pytz.UTC)
end_date = datetime(df_utc.index[-1].year, df_utc.index[-1].month, df_utc.index[-1].day, tzinfo = pytz.UTC)
print(start_date, end_date)
tempC = station.load_isd_hourly_temp_data(start_date, end_date)
tempF = (tempC[0] * 1.8 + 32).to_frame(name = 'temp_f')

tempF = tempF.tz_convert('America/Denver')
tempF = tempF.tz_localize(None)
tempF = tempF.loc[~tempF.index.duplicated(keep = 'first')]

tempF = tempF.dropna()

df_temp = tempF.copy()

station.name DENVER INTERNATIONAL AIRPORT
2015-07-01 00:00:00+00:00 2020-07-23 00:00:00+00:00


In [85]:
df_temp.head(2)

Unnamed: 0,temp_f
2015-06-30 18:00:00,82.23764
2015-06-30 19:00:00,81.21632


In [86]:
df_temp.tail(2)

Unnamed: 0,temp_f
2020-07-21 23:00:00,70.62998
2020-07-22 00:00:00,66.76844


## Make model data

In [87]:
df_model = df_load.copy()
df_model['temp_f'] = df_temp['temp_f']
df_model['year'] = df_model.index.year
df_model['month'] = df_model.index.month
df_model['day'] = df_model.index.day
df_model['hour'] = df_model.index.hour
df_model['dow'] = df_model.index.dayofweek

## Update scaler

In [88]:
scaler.fit(df_model[['temp_f', 'year', 'month', 'day', 'hour', 'dow']])

StandardScaler()

## Training data

In [89]:
df_model = df_model.copy()
df_model = df_model.dropna()
df_model = df_model.tail(24)

y = df_model.pop('load')
X = df_model

print(y.shape)
print(X.shape)

## Normalize

In [91]:
X_scaled = scaler.transform(X)

## Train model

In [92]:
model.fit(X_scaled, y, epochs = 1, verbose = 1)



<tensorflow.python.keras.callbacks.History at 0x7f1e04625a90>

## Save model

In [None]:
model.save(PATH_MODEL)

## Save scaler

In [93]:
pickle.dump(scaler, open(PATH_SCALER, 'wb'))

## Automation timestamp

In [37]:
print(datetime.now())

2020-07-23 00:58:53.255738
