# Agriculture demand model

In [1]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
from pandas.tseries.offsets import MonthEnd

In [2]:
ag_historical = pd.read_csv("../data/intermediate/ag_df.csv",
                      index_col=['Economy'])

In [5]:
ag_historical.tail()

Unnamed: 0_level_0,Year,Coal,Elec,Gas,Heat,Nuc,Oil,Oth,PetP,RenB,RenG,RenH,RenMSW,RenNRE,RenO,RenS,RenW,Tot
Economy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
VN,2013,20.72,133.558,0.0,0.0,0.0,0.0,0.0,449.031,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,603.309
VN,2014,20.16,162.798,0.0,0.0,0.0,0.0,0.0,431.7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,614.658
VN,2015,25.2,200.208,0.0,0.0,0.0,0.0,311.0,446.676692,311.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,983.084692
VN,2016,12.717267,311.234,0.0,0.0,0.0,0.0,318.76459,409.945392,318.76459,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1052.661248
VN,2017,12.717267,374.702,0.0,0.0,0.0,0.0,315.0,444.988927,315.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1147.408194


In [4]:
ag_historical.info()

<class 'pandas.core.frame.DataFrame'>
Index: 988 entries, APEC to VN
Data columns (total 18 columns):
Year      988 non-null int64
Coal      988 non-null float64
Elec      988 non-null float64
Gas       988 non-null float64
Heat      988 non-null float64
Nuc       988 non-null float64
Oil       988 non-null float64
Oth       988 non-null float64
PetP      988 non-null float64
RenB      988 non-null float64
RenG      988 non-null float64
RenH      988 non-null float64
RenMSW    988 non-null float64
RenNRE    988 non-null float64
RenO      988 non-null float64
RenS      988 non-null float64
RenW      988 non-null float64
Tot       988 non-null float64
dtypes: float64(17), int64(1)
memory usage: 146.7+ KB


In [6]:
fuel_list = ag_historical.columns.values.tolist()
fuel_list.remove('Year')
fuel_list

['Coal',
 'Elec',
 'Gas',
 'Heat',
 'Nuc',
 'Oil',
 'Oth',
 'PetP',
 'RenB',
 'RenG',
 'RenH',
 'RenMSW',
 'RenNRE',
 'RenO',
 'RenS',
 'RenW',
 'Tot']

### Make year column YYYY-MM-DD format for Prophet

In [7]:
ag_historical['ds'] = pd.to_datetime(ag_historical['Year'], format="%Y") + MonthEnd(12)
ag_historical.head()

Unnamed: 0_level_0,Year,Coal,Elec,Gas,Heat,Nuc,Oil,Oth,PetP,RenB,RenG,RenH,RenMSW,RenNRE,RenO,RenS,RenW,Tot,ds
Economy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
APEC,1980,0.0,1277.0,274.0,0.0,0.0,0.0,0.0,22262.835542,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23813.835542,1980-12-31
APEC,1981,0.0,1315.0,294.0,0.0,0.0,0.0,0.0,21370.625349,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22979.625349,1981-12-31
APEC,1982,0.235337,1556.464825,335.0,0.0,0.0,0.0,0.0,24505.103516,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,26396.803679,1982-12-31
APEC,1983,0.24368,1537.304501,321.0,0.0,0.0,0.0,0.0,25470.589141,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,27329.137323,1983-12-31
APEC,1984,0.0,1619.306698,379.0,0.0,0.0,0.0,0.0,26670.624279,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,28668.930977,1984-12-31


In [8]:
df_macro = pd.read_csv('../data/raw/Industry/MacroHistorical.csv',
                       index_col='Economy')
df_macro['ds'] = pd.to_datetime(df_macro['Year'], format="%Y") + MonthEnd(12)
df_macro.head()

Unnamed: 0_level_0,Year,GDP,Population,ds
Economy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AUS,1990,515.317626,17.042,1990-12-31
AUS,1991,513.320531,17.272,1991-12-31
AUS,1992,515.592569,17.486,1992-12-31
AUS,1993,536.417759,17.688,1993-12-31
AUS,1994,557.759322,17.883,1994-12-31


In [9]:
df = pd.merge(ag_historical,df_macro,how='left',on=['Economy','ds','Year'])

In [11]:
df.tail()

Unnamed: 0_level_0,Year,Coal,Elec,Gas,Heat,Nuc,Oil,Oth,PetP,RenB,...,RenH,RenMSW,RenNRE,RenO,RenS,RenW,Tot,ds,GDP,Population
Economy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
VN,2013,20.72,133.558,0.0,0.0,0.0,0.0,0.0,449.031,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,603.309,2013-12-31,484.409445,91.497
VN,2014,20.16,162.798,0.0,0.0,0.0,0.0,0.0,431.7,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,614.658,2014-12-31,513.394833,92.545
VN,2015,25.2,200.208,0.0,0.0,0.0,0.0,311.0,446.676692,311.0,...,0.0,0.0,0.0,0.0,0.0,0.0,983.084692,2015-12-31,547.685956,93.572
VN,2016,12.717267,311.234,0.0,0.0,0.0,0.0,318.76459,409.945392,318.76459,...,0.0,0.0,0.0,0.0,0.0,0.0,1052.661248,2016-12-31,581.7017,94.569
VN,2017,12.717267,374.702,0.0,0.0,0.0,0.0,315.0,444.988927,315.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1147.408194,2017-12-31,,


In [None]:
df = (df.dropna(how="all", axis=1)
        .dropna(how="any", axis=0))

In [None]:
df.head()

In [None]:
df_elec = df.rename(columns={"Population":"POP","Elec":"y"})
df_elec.head()

In [None]:
economies = df_elec.index.unique()
economies

In [None]:
models ={}
for economy in economies:
    m = Prophet(daily_seasonality=False,
                   weekly_seasonality=False,
                   yearly_seasonality=False,
                   seasonality_mode='additive',
                   growth='linear')
    m.add_regressor('GDP')
    m.add_regressor('POP')
    models[economy] = m

In [None]:
models

### fit models

In [None]:
for economy,model in models.items():
    model.fit(df_elec.loc[economy])  

### read in future macro data

In [None]:
df_future_macro = pd.read_csv('../data/raw/Industry/MacroAssumptions.csv',
                             index_col='Economy')
df_future_macro['ds'] = pd.to_datetime(df_future_macro['Year'], format="%Y") + MonthEnd(12)
df_future_macro.head()

In [None]:
df_future_macro.tail()

### create regressors for 1980-2050

In [None]:
regressors_hist = df_elec.drop(columns=['y'])
regressors_hist.head()

In [None]:
regressors_hist.tail()

In [None]:
regressors_fut = df_future_macro.drop(columns=['Year'])
regressors_fut = regressors_fut.rename(columns={"Population": "POP"})

In [None]:
regressors_fut.head()

In [None]:
regressors_fut.tail()

In [None]:
_regressors_list =[]

for economy in economies:
    _regressors = pd.concat([regressors_hist.loc[economy],regressors_fut.loc[economy]],
                      ignore_index=False, sort=False)
    _regressors_list.append(_regressors)
regressors = pd.concat(_regressors_list)

### run model (make prediction)

In [None]:
pred_list =[]
for economy,model in models.items():
    forecast = model.predict(regressors.loc[economy])
    forecast.insert(loc=0,column='Economy',value=economy)
    forecast = forecast.set_index(['Economy'])
    pred_list.append(forecast)
results = pd.concat(pred_list, sort=False)

In [None]:
results['Year'] = results['ds'].dt.year
results[['Year', 'yhat', 'yhat_lower', 'yhat_upper']].to_csv ('../data/final/ag-results.csv', header=True)

In [None]:
results['yhat'] = np.where(results['yhat'] < 0, 0,results['yhat'])

In [None]:
results.head()

### plot results

In [None]:
for economy,model in models.items():
    fig1 = model.plot(results.loc[economy])