# David Pugh way

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import linear_model, metrics, model_selection

In [None]:
%matplotlib inline

## Prepare datasets

In [None]:
df = pd.read_csv("../data/final/EGEDA_2019_09_25_tidy.csv",
                index_col=['Economy','Year'])
df.columns

In [None]:
df = df.loc[:,['Fuel Code','15.1.2 Residential']]
df.head()

In [None]:
df = df.pivot_table(index=['Economy','Year'],columns=['Fuel Code'],values='15.1.2 Residential')
df.head()

In [None]:
df.columns

In [None]:
df = df.loc[:,['Tot','Coal','Heat','Elec','Gas','Oil','PetPD','RenB']]
df.head()

In [None]:
# read in macro stuff -- this will come from the macro model output later!

df_macro = pd.read_csv("../data/raw/Residential/AUS_Data.csv",
                index_col=['Economy','Year'])
df_macro.head()

In [None]:
df_macro = df_macro.loc[:,['GDP','POP']]
df_macro.tail()

In [None]:
# need to use consistent economy names
df_macro = df_macro.reset_index()
df_macro = (df_macro.replace({"01_AUS":"AUS"})
          .set_index(["Economy","Year"]))
df_macro.head()

In [None]:
# add macro to df

df = df.join(df_macro)
df = df.loc[["AUS"]]

In [None]:
df.head()

In [None]:
df.columns

In [None]:
df_future = pd.read_csv("../data/raw/Residential/MacroAssumptions_datetime.csv",
                        index_col=['Economy'])
df_future.head()

In [None]:
# replace this with macro model output

df2 = df_future.drop(columns=["Year"],axis=1)
df2 = df2.rename(columns={"Year2":"Year","Population":"POP"})
df2 = df2.reset_index(drop=False).set_index(['Economy','Year'])

#df2 = df2.drop(columns=["Year"],axis=1)
df2.head()

In [None]:
# need to use consistent economy names
df2 = df2.reset_index()
df2 = (df2.replace({"01_AUS":"AUS"})
          .set_index(["Economy","Year"]))
df2.info()

In [None]:
df2.columns

## Fit models

In [None]:
#targets = ['RETT','RECL','RECP','REPP','REKR','REGD','RELP']
targets = ['Tot','Coal','Gas','Elec']
features = ['GDP','POP']

demand_models = {}
for economy, _df in df.groupby(level="Economy"):
    
    _processed_data = (_df.dropna(how="all", axis=1)
                         .dropna(how="any", axis=0))

    _targets = _processed_data.columns.isin(targets)
    _processed_target = _processed_data.iloc[:, _targets]
    _processed_features = _processed_data.loc[:, features]
    
    # regularized modeling pipeline (that guards against over-fitting to the training data!)
    #_cv = model_selection.TimeSeriesSplit(n_splits=10) # since we are doing time-series forecasting!
    #_prng = np.random.RandomState(42)
    #_regressor = linear_model.MultiTaskElasticNetCV(cv=_cv, normalize=True, max_iter=1000, random_state=_prng)
    
    # non-regularized linear regression (to see the impact of over-fitting to training data!)
    _regressor = linear_model.LinearRegression()
    
    demand_models[economy] = _regressor.fit(_processed_features, _processed_target)

## Training Errors

In [None]:
predictions = {}
training_error = {}

for economy, _df in df.groupby(level="Economy"):
    
    _processed_data = (_df.dropna(how="all", axis=1)
                         .dropna(how="any", axis=0))
    
    _targets = _processed_data.columns.isin(targets)
    _processed_target = _processed_data.iloc[:, _targets]
    _processed_features = _processed_data.loc[:, features]
    
    # here we make the predictions
    _predictions = demand_models[economy].predict(_processed_features)
    _predictions_df = pd.DataFrame(data=_predictions,
                                   index=_processed_target.index,
                                   columns=_processed_target.columns)
    predictions[economy] = _predictions_df
    
    # here we compute the RMSE (units are ktoe)
    _rmse = metrics.mean_squared_error(_processed_target, _predictions_df)**0.5
    training_error[economy] = _rmse

In [None]:
training_error['AUS']

In [None]:
def plot_training_errors(Economy):
    _processed_data = (df.loc[Economy, :]
                           .dropna(how="all", axis=1)
                           .dropna(how="any", axis=0))

    _targets = _processed_data.columns.isin(targets)
    _processed_target = _processed_data.loc[:, _targets]
    _predictions = predictions[economy].loc[Economy]
    
    _, n_targets = _processed_target.shape
    fig, axes = plt.subplots(1, n_targets, figsize=(20, 6))
    for n in range(n_targets):
        axes[n].plot(_processed_target.iloc[:, n], label="Target")
        axes[n].plot(_predictions.iloc[:, n], label="predictions")
        axes[n].set_ylabel("KTOE")
        axes[n].set_xlabel("Year")
        axes[n].set_title(_processed_target.columns[n])
        
    fig.tight_layout()
    axes[0].legend()
    
    return fig, axes

In [None]:
_ = plot_training_errors("AUS")


## Compute the forecasts from 2017 to 2050

In [None]:
df2.head()

In [None]:
prepared_df2 = df2.loc[['AUS']]
prepared_df2.head()

In [None]:
targets

In [None]:
forecasts = {}
for economy, _df in prepared_df2.groupby(level="Economy"):
    
    _processed_data = (_df.dropna(how="all", axis=1)
                         .dropna(how="any", axis=0))

    #_targets = _processed_data.columns.isin(targets)
    #_processed_target = _processed_data.iloc[:, _targets]
    _processed_features = _processed_data.loc[:, features]
    
    _predictions = demand_models[economy].predict(_processed_features)
    _predictions_df = pd.DataFrame(data=_predictions,
                                   index=_processed_data.index,
                                   columns=targets)
    forecasts[economy] = _predictions_df

In [None]:
def plot_forecasts(economy):
    _processed_data = (df.loc[economy, :]
                           .dropna(how="all", axis=1)
                           .dropna(how="any", axis=0))

    _targets = _processed_data.columns.isin(targets)
    _processed_target = _processed_data.iloc[:, _targets]
    _predictions2 = predictions[economy].loc[economy]    
    _predictions = forecasts[economy].loc[economy]
    
    _, n_targets = _processed_target.shape
     
    fig, axes = plt.subplots(1, n_targets, figsize=(20, 6))
    for n in range(n_targets):
        axes[n].plot(_processed_target.iloc[:, n], 'o',label="Target")
        axes[n].plot(_predictions2.iloc[:, n], label="historical")
        axes[n].plot(_predictions.iloc[:, n], label="forecast")
        axes[n].set_ylabel("KTOE")
        axes[n].set_xlabel("Year")
        axes[n].set_title(_processed_target.columns[n])
        
    fig.tight_layout()
    axes[0].legend()
    
    return fig, axes

In [None]:
_ = plot_forecasts("AUS")

In [None]:
forecasts['AUS'].tail()