# Residential demand estimation

In [None]:
%matplotlib inline

In [1]:
# import packages

import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("../data/raw/Residential/AUS_data.csv",
                index_col=['Economy','Year'])

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.info()

In [None]:
# split in to training and test sets

feature_list = ["GDP","CSGDP","POP","POPU","GDPC"]

train = (df.loc['01_AUS',:]
           .loc[1980:2000])
train.head()

In [None]:
test = (df.loc['01_AUS',:]
          .loc[2001:2016])
test.tail()

In [None]:
# training
y = train.loc[:,"RETT"]
X_train = train.loc[:,feature_list]

# test
X_test = test.loc[:,feature_list]

In [None]:
X_train.head()

In [None]:
X_test.tail()

In [None]:
model = sm.OLS(y,X_train)

In [None]:
fit_model = model.fit()

In [None]:
fit_model.summary()

In [None]:
# test model

results = fit_model.predict(X_test)

In [None]:
results

In [None]:
fig, ax = plt.subplots()
ax.plot(train.loc[:,'RETT'], 'o', label="Train")
ax.plot(results, 'o', label="Test")
ax.plot(df.loc['01_AUS',"RETT"], '-g', label="Actual")
ax.legend(loc="best")

#### next steps

- add lags?

## ARIMA

In [None]:
from statsmodels.tsa.arima_model import ARIMA

In [None]:
df = pd.read_csv("../data/raw/Residential/AUS_simple.csv",
                index_col=['Economy','Date'])

In [None]:
df.head()

In [None]:
df.info()

In [None]:
feature_list = ["GDP","CSGDP","POP","POPU","GDPC"]

train = (df.loc['01_AUS',:]
           .iloc[1:21]
           .drop(columns="Year"))
train.tail()

In [None]:
y = train[['RETT']]
X_train = train.drop(columns=y)
X_train.tail()

In [None]:
model = ARIMA(y, (0,1,0),X_train, freq='A')

In [None]:
model_fit = model.fit(disp=0)

In [None]:
print(model_fit.summary())

In [None]:
test = (df.loc['01_AUS',:]
           .iloc[21:37]
           .drop(columns="Year"))
test.tail()

In [None]:
X_test = test.drop(columns="RETT")
X_test.head()

In [None]:
forecast = model_fit.forecast(steps=16,exog=X_test)

In [None]:
forecast = model_fit.forecast?

In [None]:
forecast = model_fit.forecast

In [None]:
forecast = model_fit.predict

In [None]:
ARIMA?

# Prophet

In [2]:
from fbprophet import Prophet

In [3]:
df = pd.read_csv("../data/raw/Residential/AUS_data.csv")

In [4]:
df.head()

Unnamed: 0,Economy,Year,RETT,RECL,RECP,REPP,REKR,REGD,RELP,RENG,...,CSLP,CSNG,CSGL,CSOT,CSEL,GDP,CSGDP,POP,POPU,GDPC
0,01_AUS,1980,6040,19,39,785,574,91,120,1103,...,95,387,0,22,1332,439.4,251.3,14.69,12.57,29907.53
1,01_AUS,1981,6098,18,31,586,385,81,120,1184,...,92,419,0,24,1447,454.07,263.59,14.93,12.79,30419.25
2,01_AUS,1982,6305,17,22,541,340,76,125,1318,...,92,486,0,24,1509,469.18,272.66,15.18,13.0,30912.0
3,01_AUS,1983,6309,15,16,473,280,68,125,1367,...,95,522,0,23,1565,458.76,271.43,15.37,13.16,29849.91
4,01_AUS,1984,6411,14,14,441,246,64,131,1418,...,103,552,0,19,1591,479.78,281.19,15.54,13.31,30865.99


In [5]:
df['ds'] = pd.to_datetime(df.Year,format='%Y')

In [6]:
df.head()

Unnamed: 0,Economy,Year,RETT,RECL,RECP,REPP,REKR,REGD,RELP,RENG,...,CSNG,CSGL,CSOT,CSEL,GDP,CSGDP,POP,POPU,GDPC,ds
0,01_AUS,1980,6040,19,39,785,574,91,120,1103,...,387,0,22,1332,439.4,251.3,14.69,12.57,29907.53,1980-01-01
1,01_AUS,1981,6098,18,31,586,385,81,120,1184,...,419,0,24,1447,454.07,263.59,14.93,12.79,30419.25,1981-01-01
2,01_AUS,1982,6305,17,22,541,340,76,125,1318,...,486,0,24,1509,469.18,272.66,15.18,13.0,30912.0,1982-01-01
3,01_AUS,1983,6309,15,16,473,280,68,125,1367,...,522,0,23,1565,458.76,271.43,15.37,13.16,29849.91,1983-01-01
4,01_AUS,1984,6411,14,14,441,246,64,131,1418,...,552,0,19,1591,479.78,281.19,15.54,13.31,30865.99,1984-01-01


In [7]:
df.tail()

Unnamed: 0,Economy,Year,RETT,RECL,RECP,REPP,REKR,REGD,RELP,RENG,...,CSNG,CSGL,CSOT,CSEL,GDP,CSGDP,POP,POPU,GDPC,ds
32,01_AUS,2012,10454,0,0,377,0,24,353,3244,...,1053,10,29,5243,1220.18,801.23,22.73,19.41,53673.1,2012-01-01
33,01_AUS,2013,10485,0,0,385,0,23,362,3329,...,1048,9,46,5417,1252.09,821.19,23.13,19.78,54137.19,2013-01-01
34,01_AUS,2014,10329,0,0,379,0,24,355,3416,...,1130,9,50,5444,1284.25,839.55,23.48,20.1,54705.73,2014-01-01
35,01_AUS,2015,10491,0,1,369,0,26,343,3488,...,1214,10,47,5772,1314.26,863.02,23.82,20.41,55183.74,2015-01-01
36,01_AUS,2016,10518,0,1,342,0,0,342,3564,...,1256,10,58,5789,1351.67,894.55,24.19,20.76,55875.1,2016-01-01


In [8]:
simple_df = df[['RETT','GDP','POP','ds']]

In [9]:
simple_df.head()

Unnamed: 0,RETT,GDP,POP,ds
0,6040,439.4,14.69,1980-01-01
1,6098,454.07,14.93,1981-01-01
2,6305,469.18,15.18,1982-01-01
3,6309,458.76,15.37,1983-01-01
4,6411,479.78,15.54,1984-01-01


In [10]:
simple_df = simple_df.rename(columns={"RETT":"y"})
simple_df.head()

Unnamed: 0,y,GDP,POP,ds
0,6040,439.4,14.69,1980-01-01
1,6098,454.07,14.93,1981-01-01
2,6305,469.18,15.18,1982-01-01
3,6309,458.76,15.37,1983-01-01
4,6411,479.78,15.54,1984-01-01


In [11]:
m = Prophet(daily_seasonality=False,
            weekly_seasonality=False,
            yearly_seasonality=False,
            seasonality_mode='additive',
            growth='linear')

m.add_regressor('GDP')
m.add_regressor('POP')
m.fit(simple_df)

<fbprophet.forecaster.Prophet at 0x21b983bc088>

In [12]:
# add future GDP

df_future_macro = pd.read_csv('../data/raw/Industry/MacroAssumptions_datetime.csv',
                             index_col=['Economy'])

df_future_macro = df_future_macro.loc['AUS',:]
df_future_macro = df_future_macro.reset_index(drop=True)

df_future_macro.head()

Unnamed: 0,Year,GDP,Population
0,12/31/2017,1157.283926,24.451
1,12/31/2018,1201.142573,24.772
2,12/31/2019,1245.039044,25.089
3,12/31/2020,1289.224526,25.398
4,12/31/2021,1333.639686,25.7


In [13]:
df_future_macro = df_future_macro.rename(columns={"Population":"POP"})
df_future_macro.head()

Unnamed: 0,Year,GDP,POP
0,12/31/2017,1157.283926,24.451
1,12/31/2018,1201.142573,24.772
2,12/31/2019,1245.039044,25.089
3,12/31/2020,1289.224526,25.398
4,12/31/2021,1333.639686,25.7


In [14]:
future = m.make_future_dataframe(periods=34,
                                 freq = 'Y',
                                 include_history=True)
future.head()

Unnamed: 0,ds
0,1980-01-01
1,1981-01-01
2,1982-01-01
3,1983-01-01
4,1984-01-01


In [15]:
future.tail()

Unnamed: 0,ds
66,2045-12-31
67,2046-12-31
68,2047-12-31
69,2048-12-31
70,2049-12-31


In [16]:
df1 = simple_df.drop(columns=['y','ds'])
df1.head()

Unnamed: 0,GDP,POP
0,439.4,14.69
1,454.07,14.93
2,469.18,15.18
3,458.76,15.37
4,479.78,15.54


In [17]:
df2 = df_future_macro.drop(columns=['Year'])
df2.head()

Unnamed: 0,GDP,POP
0,1157.283926,24.451
1,1201.142573,24.772
2,1245.039044,25.089
3,1289.224526,25.398
4,1333.639686,25.7


In [18]:
df_future = pd.concat([df1,df2],
                      ignore_index=True)

df_future.head()

Unnamed: 0,GDP,POP
0,439.4,14.69
1,454.07,14.93
2,469.18,15.18
3,458.76,15.37
4,479.78,15.54


In [19]:
df_future.tail()

Unnamed: 0,GDP,POP
66,2670.775153,32.225
67,2731.887132,32.468
68,2794.170024,32.71
69,2857.579294,32.95
70,2922.072575,33.187


In [20]:
future = future.join(df_future)

In [21]:
future.head()

Unnamed: 0,ds,GDP,POP
0,1980-01-01,439.4,14.69
1,1981-01-01,454.07,14.93
2,1982-01-01,469.18,15.18
3,1983-01-01,458.76,15.37
4,1984-01-01,479.78,15.54


In [22]:
future.tail()

Unnamed: 0,ds,GDP,POP
66,2045-12-31,2670.775153,32.225
67,2046-12-31,2731.887132,32.468
68,2047-12-31,2794.170024,32.71
69,2048-12-31,2857.579294,32.95
70,2049-12-31,2922.072575,33.187


In [None]:
forecast = m.predict(future)
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

In [None]:
fig1 = m.plot(forecast)

In [None]:
future.iloc[20:40,:]

# SciKit-Learn

In [None]:
from sklearn.linear_model import LinearRegression

In [None]:
model = LinearRegression()

In [None]:
model.fit(simple_df[["GDP","POP"]],simple_df[['ds']])

In [None]:
model.fit(X_train,y)

In [None]:
predictions = model.predict(X_test)

In [None]:
predictions

In [None]:
model2 = LinearRegression()

In [None]:
model2.fit(simple_df[["GDP","POP"]],simple_df[['y']])

In [None]:
predictions2 = model2.predict(future[["GDP","POP"]])
predictions2

In [None]:
fig, ax = plt.subplots()
ax.plot(future[['ds']],predictions2, 'o', label="Predictions")
ax.plot(simple_df[['ds']],simple_df[["y"]], 'o', label="Actual")
ax.legend(loc="best")

# Using own functions

In [None]:
def run_regression(models, economies, df, x, y):
    """
    Perform linear regression for one or multiple economies.
    economy = list of economies
    models = {economy: LinearRegression() for economy in economies}
    The function returns a dictionary of economy-model pairs. That is,
    each economy will have its own set of coefficients.
    """
    for economy, model in models.items():
            (model.fit(df.loc[economy, x],
                df.loc[economy, y]))
    return models  

In [None]:
def run_prediction(models, economies, df, ResultsColumn):
    """
    Use coefficients from run_regression to generate predictions.
    Pass a dataframe df with the X and Y data. 
    ResultsColumn = name of prediction results
    """
    df_list =[]
    # run predictions
    for economy, model in models.items():
            #years = df['Year']
            #years.reset_index(drop=True, inplace=True)
            prediction = model.predict(df.loc[economy,:])
            df_name = pd.DataFrame(prediction, columns=ResultsColumn)
            #df_name.insert(loc=0,column='Year',value=years)
            df_name.insert(loc=0,column='Economy',value=economy)
            #df_list.append(df_name)
            
            # adding to the input df
            df2 = df.loc[economy,:]
            df2.insert(loc=0,column="Results",value=prediction)
            df2.insert(loc=0,column='Economy',value=economy)
            df2 = df2.reset_index()
            df2 = df2.set_index(['Economy','Year'])
            df_list.append(df2)

    # combine individual economy dataframes to one dataframe
    dfResults = pd.concat(df_list, sort=True)
    return dfResults
#    return df2

In [None]:
# create plotting function

def plot2(economies, df, figurename, Plotylabel, share_x, share_y):
    """
    Line plot for 21 economies. 
    Economies = economies to plot
    df = dataframe of data to plot. Note: each line must be a column.
    Plotylabel = y label for graph
    share_x = share the x axis (true or False)
    share_y = share the y axis (True or False)
    """
    print('Preparing plots...')
    # Create the 'figure'
    plt.style.use('tableau-colorblind10')
    
    # multiple line plot
    fig, axes = plt.subplots(nrows=3, ncols=7, sharex=share_x, sharey=share_y, figsize=(16,12))
    for ax, economy,num in zip(axes.flatten(), economies, range(1,22)):
        print('Creating plot for %s...' %economy)
        df11=df[df['Economy']==economy]
    
        for column in df11.drop(['Economy','Year'], axis=1):
            ax.plot(df11['Year'], df11[column], marker='', linewidth=1.5, label=economy)
            ax.set_title(economy)
            ax.set_ylabel(Plotylabel)
        ax.label_outer()
    
    #plt.tight_layout()
    fig.legend( list(df.drop(['Economy','Year'], axis=1)), bbox_to_anchor=(0,0,1,0.25), loc='lower center', ncol=9)
    fig.savefig(figurename,dpi=200)
    print('Figure saved as %s' % figurename)
    print('Preparing to show the figure...')
    plt.show()

In [None]:
df = pd.read_csv("../data/raw/Residential/AUS_data.csv",
                index_col=['Economy','Year'])

In [None]:
df.head()

In [None]:
economies = df.index.unique(level=0)
economies

In [None]:
# get list of economies and create economy-model pairs

models = {economy: LinearRegression() for economy in economies}

In [None]:
x = ['GDP','POP']
y = ['RETT']
df.head()

In [None]:
run_regression(models,economies,df,x,y)

In [None]:
# make predictions

df_future = pd.read_csv("../data/raw/Residential/MacroAssumptions_datetime-AUS.csv",
                        index_col=['Economy'])
df_future.head()

In [None]:
df2 = df_future.drop(columns=["Year"],axis=1)
df2 = df2.rename(columns={"Year2":"Year","Population":"POP"})
df2 = df2.reset_index(drop=False).set_index(['Economy','Year'])

#df2 = df2.drop(columns=["Year"],axis=1)
ResultsColumn = ["y_prediction"]
df2.head()

In [None]:
results = run_prediction(models, economies, df2, ResultsColumn)
results = results.rename(columns={"Results":"Predicted RETT"})

In [None]:
results.tail()

In [None]:
x = results.loc['01_AUS']
x.head()

In [None]:
fig, ax = plt.subplots()
ax.plot(x.index,x[['Predicted RETT']], 'o', label="Predictions")
#ax.plot(simple_df[['ds']],simple_df[["y"]], 'o', label="Actual")
#ax.legend(loc="best")

# Cleaned up using own functions

In [None]:
%matplotlib inline

In [None]:
# import packages

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [None]:
def run_regression(models, economies, df, x, y):
    """
    Perform linear regression for one or multiple economies.
    economy = list of economies
    models = {economy: LinearRegression() for economy in economies}
    The function returns a dictionary of economy-model pairs. That is,
    each economy will have its own set of coefficients.
    """
    for economy, model in models.items():
            (model.fit(df.loc[economy, x],
                df.loc[economy, y]))
    return models  

In [None]:
def run_prediction(models, economies, df, ResultsColumn):
    """
    Use coefficients from run_regression to generate predictions.
    Pass a dataframe df with the X and Y data. 
    ResultsColumn = name of prediction results
    """
    df_list =[]
    # run predictions
    for economy, model in models.items():
            # make prediction
            prediction = model.predict(df.loc[economy,:])
            
            # adding to the input df
            df2 = df.loc[economy,:]
            df2.insert(loc=0,column="Results",value=prediction)
            df2.insert(loc=0,column='Economy',value=economy)
            df2 = df2.reset_index()
            df2 = df2.set_index(['Economy','Year'])
            df_list.append(df2)

    # combine individual economy dataframes to one dataframe
    dfResults = pd.concat(df_list, sort=True)
    return dfResults

In [None]:
df = pd.read_csv("../data/raw/Residential/AUS_Data.csv",
                index_col=['Economy','Year'])

In [None]:
economies = df.index.unique(level=0)
economies

In [None]:
# get list of economies and create economy-model pairs

models = {economy: LinearRegression() for economy in economies}

In [None]:
x = ['GDP','POP']
y = ['RETT']
df.tail()

In [None]:
run_regression(models,economies,df,x,y)

In [None]:
# make predictions

df_future = pd.read_csv("../data/raw/Residential/MacroAssumptions_datetime.csv",
                        index_col=['Economy'])
df_future.tail()

In [None]:
df2 = df_future.drop(columns=["Year"],axis=1)
df2 = df2.rename(columns={"Year2":"Year","Population":"POP"})
df2 = df2.reset_index(drop=False).set_index(['Economy','Year'])

#df2 = df2.drop(columns=["Year"],axis=1)
ResultsColumn = ["y_prediction"]
df2.tail()

In [None]:
results = run_prediction(models, economies, df2, ResultsColumn)
results = results.rename(columns={"Results":"Predicted RETT"})
results.tail()

In [None]:
x = results.loc['01_AUS']
x.head()

In [None]:
fig, ax = plt.subplots()
ax.plot(x.index,x[['Predicted RETT']], 'o', label="Predictions")