# Residential demand estimation

In [None]:
%matplotlib inline

In [None]:
# import packages

import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("../data/raw/Residential/AUS_data.csv",
                index_col=['Economy','Year'])

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.info()

In [None]:
# split in to training and test sets

feature_list = ["GDP","CSGDP","POP","POPU","GDPC"]

train = (df.loc['01_AUS',:]
           .loc[1980:2000])
train.head()

In [None]:
test = (df.loc['01_AUS',:]
          .loc[2001:2016])
test.tail()

In [None]:
# training
y = train.loc[:,"RETT"]
X_train = train.loc[:,feature_list]

# test
X_test = test.loc[:,feature_list]

In [None]:
X_train.head()

In [None]:
X_test.tail()

In [None]:
model = sm.OLS(y,X_train)

In [None]:
fit_model = model.fit()

In [None]:
fit_model.summary()

In [None]:
# test model

results = fit_model.predict(X_test)

In [None]:
results

In [None]:
fig, ax = plt.subplots()
ax.plot(train.loc[:,'RETT'], 'o', label="Train")
ax.plot(results, 'o', label="Test")
ax.plot(df.loc['01_AUS',"RETT"], '-g', label="Actual")
ax.legend(loc="best")

#### next steps

- add lags?

## ARIMA

In [None]:
from statsmodels.tsa.arima_model import ARIMA

In [None]:
df = pd.read_csv("../data/raw/Residential/AUS_simple.csv",
                index_col=['Economy','Date'])

In [None]:
df.head()

In [None]:
df.info()

In [None]:
feature_list = ["GDP","CSGDP","POP","POPU","GDPC"]

train = (df.loc['01_AUS',:]
           .iloc[1:21]
           .drop(columns="Year"))
train.tail()

In [None]:
y = train[['RETT']]
X_train = train.drop(columns=y)
X_train.tail()

In [None]:
model = ARIMA(y, (0,1,0),X_train, freq='A')

In [None]:
model_fit = model.fit(disp=0)

In [None]:
print(model_fit.summary())

In [None]:
test = (df.loc['01_AUS',:]
           .iloc[21:37]
           .drop(columns="Year"))
test.tail()

In [None]:
X_test = test.drop(columns="RETT")
X_test.head()

In [None]:
forecast = model_fit.forecast(steps=16,exog=X_test)

In [None]:
forecast = model_fit.forecast?

In [None]:
forecast = model_fit.forecast

In [None]:
forecast = model_fit.predict

In [None]:
ARIMA?

# Prophet

In [None]:
from fbprophet import Prophet

In [None]:
df = pd.read_csv("../data/raw/Residential/AUS_data.csv")

In [None]:
df.head()

In [None]:
df['ds'] = pd.to_datetime(df.Year,format='%Y')

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
simple_df = df[['RETT','GDP','POP','ds']]

In [None]:
simple_df.head()

In [None]:
simple_df = simple_df.rename(columns={"RETT":"y"})
simple_df.head()

In [None]:
m = Prophet(daily_seasonality=False,
            weekly_seasonality=False,
            yearly_seasonality=False,
            seasonality_mode='additive',
            growth='linear')

m.add_regressor('GDP')
m.add_regressor('POP')
m.fit(simple_df)

In [None]:
# add future GDP

df_future_macro = pd.read_csv('../data/raw/Industry/MacroAssumptions_datetime.csv',
                             index_col=['Economy'])

df_future_macro = df_future_macro.loc['AUS',:]
df_future_macro = df_future_macro.reset_index(drop=True)

df_future_macro.head()

In [None]:
df_future_macro = df_future_macro.rename(columns={"Population":"POP"})
df_future_macro.head()

In [None]:
future = m.make_future_dataframe(periods=34,
                                 freq = 'Y',
                                 include_history=True)
future.head()

In [None]:
future.tail()

In [None]:
df1 = simple_df.drop(columns=['y','ds'])
df1.head()

In [None]:
df2 = df_future_macro.drop(columns=['Year'])
df2.head()

In [None]:
df_future = pd.concat([df1,df2],
                      ignore_index=True)

df_future.head()

In [None]:
df_future.tail()

In [None]:
future = future.join(df_future)

In [None]:
future.head()

In [None]:
future.tail()

In [None]:
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

In [None]:
fig1 = m.plot(forecast)

In [None]:
future.iloc[20:40,:]

# SciKit-Learn

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
model = LinearRegression()

In [None]:
model.fit(simple_df[["GDP","POP"]],simple_df[['ds']])

In [None]:
model.fit(X_train,y)

In [None]:
predictions = model.predict(X_test)

In [None]:
predictions

In [None]:
model2 = LinearRegression()

In [None]:
model2.fit(simple_df[["GDP","POP"]],simple_df[['y']])

In [None]:
predictions2 = model2.predict(future[["GDP","POP"]])
predictions2

In [None]:
fig, ax = plt.subplots()
ax.plot(future[['ds']],predictions2, 'o', label="Predictions")
ax.plot(simple_df[['ds']],simple_df[["y"]], 'o', label="Actual")
ax.legend(loc="best")

# Using own functions

In [None]:
def run_regression(models, economies, df, x, y):
    """
    Perform linear regression for one or multiple economies.
    economy = list of economies
    models = {economy: LinearRegression() for economy in economies}
    The function returns a dictionary of economy-model pairs. That is,
    each economy will have its own set of coefficients.
    """
    for economy, model in models.items():
            (model.fit(df.loc[economy, x],
                df.loc[economy, y]))
    return models  

In [None]:
def run_prediction(models, economies, df, ResultsColumn):
    """
    Use coefficients from run_regression to generate predictions.
    Pass a dataframe df with the X and Y data. 
    ResultsColumn = name of prediction results
    """
    df_list =[]
    # run predictions
    for economy, model in models.items():
            #years = df['Year']
            #years.reset_index(drop=True, inplace=True)
            prediction = model.predict(df.loc[economy,:])
            df_name = pd.DataFrame(prediction, columns=ResultsColumn)
            #df_name.insert(loc=0,column='Year',value=years)
            df_name.insert(loc=0,column='Economy',value=economy)
            #df_list.append(df_name)
            
            # adding to the input df
            df2 = df.loc[economy,:]
            df2.insert(loc=0,column="Results",value=prediction)
            df2.insert(loc=0,column='Economy',value=economy)
            df2 = df2.reset_index()
            df2 = df2.set_index(['Economy','Year'])
            df_list.append(df2)

    # combine individual economy dataframes to one dataframe
    dfResults = pd.concat(df_list, sort=True)
    return dfResults
#    return df2

In [None]:
# create plotting function

def plot2(economies, df, figurename, Plotylabel, share_x, share_y):
    """
    Line plot for 21 economies. 
    Economies = economies to plot
    df = dataframe of data to plot. Note: each line must be a column.
    Plotylabel = y label for graph
    share_x = share the x axis (true or False)
    share_y = share the y axis (True or False)
    """
    print('Preparing plots...')
    # Create the 'figure'
    plt.style.use('tableau-colorblind10')
    
    # multiple line plot
    fig, axes = plt.subplots(nrows=3, ncols=7, sharex=share_x, sharey=share_y, figsize=(16,12))
    for ax, economy,num in zip(axes.flatten(), economies, range(1,22)):
        print('Creating plot for %s...' %economy)
        df11=df[df['Economy']==economy]
    
        for column in df11.drop(['Economy','Year'], axis=1):
            ax.plot(df11['Year'], df11[column], marker='', linewidth=1.5, label=economy)
            ax.set_title(economy)
            ax.set_ylabel(Plotylabel)
        ax.label_outer()
    
    #plt.tight_layout()
    fig.legend( list(df.drop(['Economy','Year'], axis=1)), bbox_to_anchor=(0,0,1,0.25), loc='lower center', ncol=9)
    fig.savefig(figurename,dpi=200)
    print('Figure saved as %s' % figurename)
    print('Preparing to show the figure...')
    plt.show()

In [None]:
df = pd.read_csv("../data/raw/Residential/AUS_data.csv",
                index_col=['Economy','Year'])

In [None]:
df.head()

In [None]:
economies = df.index.unique(level=0)
economies

In [None]:
# get list of economies and create economy-model pairs

models = {economy: LinearRegression() for economy in economies}

In [None]:
x = ['GDP','POP']
y = ['RETT']
df.head()

In [None]:
run_regression(models,economies,df,x,y)

In [None]:
# make predictions

df_future = pd.read_csv("../data/raw/Residential/MacroAssumptions_datetime-AUS.csv",
                        index_col=['Economy'])
df_future.head()

In [None]:
df2 = df_future.drop(columns=["Year"],axis=1)
df2 = df2.rename(columns={"Year2":"Year","Population":"POP"})
df2 = df2.reset_index(drop=False).set_index(['Economy','Year'])

#df2 = df2.drop(columns=["Year"],axis=1)
ResultsColumn = ["y_prediction"]
df2.head()

In [None]:
results = run_prediction(models, economies, df2, ResultsColumn)
results = results.rename(columns={"Results":"Predicted RETT"})

In [None]:
results.tail()

In [None]:
x = results.loc['01_AUS']
x.head()

In [None]:
fig, ax = plt.subplots()
ax.plot(x.index,x[['Predicted RETT']], 'o', label="Predictions")
#ax.plot(simple_df[['ds']],simple_df[["y"]], 'o', label="Actual")
#ax.legend(loc="best")

# Cleaned up using own functions

In [None]:
%matplotlib inline

In [None]:
# import packages

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [None]:
def run_regression(models, economies, df, x, y):
    """
    Perform linear regression for one or multiple economies.
    economy = list of economies
    models = {economy: LinearRegression() for economy in economies}
    The function returns a dictionary of economy-model pairs. That is,
    each economy will have its own set of coefficients.
    """
    for economy, model in models.items():
            (model.fit(df.loc[economy, x],
                df.loc[economy, y]))
    return models  

In [None]:
def run_prediction(models, economies, df, ResultsColumn):
    """
    Use coefficients from run_regression to generate predictions.
    Pass a dataframe df with the X and Y data. 
    ResultsColumn = name of prediction results
    """
    df_list =[]
    # run predictions
    for economy, model in models.items():
            # make prediction
            prediction = model.predict(df.loc[economy,:])
            
            # adding to the input df
            df2 = df.loc[economy,:]
            df2.insert(loc=0,column="Results",value=prediction)
            df2.insert(loc=0,column='Economy',value=economy)
            df2 = df2.reset_index()
            df2 = df2.set_index(['Economy','Year'])
            df_list.append(df2)

    # combine individual economy dataframes to one dataframe
    dfResults = pd.concat(df_list, sort=True)
    return dfResults

In [None]:
df = pd.read_csv("../data/raw/Residential/AUS_Data.csv",
                index_col=['Economy','Year'])

In [None]:
economies = df.index.unique(level=0)
economies

In [None]:
# get list of economies and create economy-model pairs

models = {economy: LinearRegression() for economy in economies}

In [None]:
x = ['GDP','POP']
y = ['RETT']
df.tail()

In [None]:
run_regression(models,economies,df,x,y)

In [None]:
# make predictions

df_future = pd.read_csv("../data/raw/Residential/MacroAssumptions_datetime.csv",
                        index_col=['Economy'])
df_future.tail()

In [None]:
df2 = df_future.drop(columns=["Year"],axis=1)
df2 = df2.rename(columns={"Year2":"Year","Population":"POP"})
df2 = df2.reset_index(drop=False).set_index(['Economy','Year'])

#df2 = df2.drop(columns=["Year"],axis=1)
ResultsColumn = ["y_prediction"]
df2.tail()

In [None]:
results = run_prediction(models, economies, df2, ResultsColumn)
results = results.rename(columns={"Results":"Predicted RETT"})
results.tail()

In [None]:
x = results.loc['01_AUS']
x.head()

In [None]:
fig, ax = plt.subplots()
ax.plot(x.index,x[['Predicted RETT']], 'o', label="Predictions")