In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor, plot_tree

from weather_electric.aggregate import load_aggregate_csv
from weather_electric.models import (
    model_mse,
    create_mean_model,
    create_hourly_model,
    create_temp_model,
    create_scikit_model,
    usage_per_temp,
    usage_per_hour,
    zero_model,
)

In [None]:
dataset = load_aggregate_csv('../../data/aggregate.csv')

In [None]:
print('MSE of mean prediction:', model_mse(dataset, create_mean_model(dataset)))

In [None]:
# hourly_mse = model_mse(dataset, create_hourly_model(dataset))
# print('MSE of hourly prediction:', hourly_mse)

plt.figure('Average Hourly Usage')
hourly_avgs = usage_per_hour(dataset)
plt.bar(hourly_avgs.keys(), hourly_avgs.values())
plt.xlabel('Hour')
plt.ylabel('Mean usage (kW)')
plt.show()

In [None]:
temp_usages = usage_per_temp(dataset)
temp_mse = model_mse(dataset, create_temp_model(dataset))
print('MSE of temperature prediction:', temp_mse)

xs, ys = zip(*sorted(temp_usages.items()))
xs=np.array(xs)
ys=np.array(ys)

plt.figure('Average Hourly Usage')
plt.bar(xs, ys)
plt.xlabel('Temperature (F)')
plt.ylabel('Mean usage (kW)')
plt.show()

svr = SVR(gamma=0.1, C=0.2)
svr_model = create_scikit_model(dataset, svr, ['temp'])
print('MSE of temp SVR:', model_mse(dataset, svr_model))

plt.figure('Hourly Usage SVR Fit')
plt.plot(xs, svr.predict(xs[:,None]))
plt.xlabel('Temperature (F)')
plt.ylabel('Predicted usage (kW)')
plt.show()

In [None]:
weather_fields = [
    'temp',
    'cloudcover',
    'wind_speed',
    'precip_inches',
    'humidity',
    'pressure',
    'uv_index',
]
hourly_model = create_hourly_model(dataset)

def mse_for_field(name, residual):
    model = LinearRegression()
    fn = create_scikit_model(
        dataset,
        model,
        [name],
        residual=hourly_model if residual else zero_model,
    )
    return model_mse(dataset, fn)

linear_mses = [mse_for_field(field, False) for field in weather_fields]
residual_mses = [mse_for_field(field, True) for field in weather_fields]

ys, xs = zip(*sorted(zip(linear_mses, weather_fields)))
ys_residual = [residual_mses[weather_fields.index(x)] for x in xs]
baseline_mse = model_mse(dataset, create_mean_model(dataset))

ax = plt.figure()
plt.bar(xs, ys, label='Base model')
plt.bar(xs, ys_residual, label='Hourly residual')
plt.xticks(rotation='vertical')
plt.ylim(min(ys_residual) * 0.9, baseline_mse * 1.01)
plt.xlabel('Variable')
plt.ylabel('Linear regression MSE')
plt.legend()
plt.show()

In [None]:
model = LinearRegression()
residual_temp_model = create_scikit_model(dataset, model, ['temp'], residual=hourly_model)
print('residual temp coeffs', model.coef_)
print('residual temp MSE:', model_mse(dataset, residual_temp_model))

model = LinearRegression()
residual_weather_model = create_scikit_model(dataset, model, weather_fields, residual=hourly_model)
print('residual weather coeffs', list(zip(weather_fields, model.coef_)))
print('residual weather MSE:', model_mse(dataset, residual_weather_model))

In [None]:
tree = DecisionTreeRegressor(max_depth=3)
tree_fn = create_scikit_model(dataset, tree, ['temp', 'hour'])
print('tree MSE:', model_mse(dataset, tree_fn))
plot_tree(tree)
plt.savefig('tree.svg')