# Steel Regression Model

In [None]:
import pandas as pd
import numpy as np
from fbprophet import Prophet
from pandas.tseries.offsets import MonthEnd

### read in historical data

In [None]:
df_prod = pd.read_csv('../data/raw/Industry/SteelHistorical.csv')
df_prod.head()

In [None]:
df_prod.info()

### Make year column YYYY-MM-DD format for Prophet

In [None]:
df_prod = df_prod.set_index(['Economy'])
df_prod.head()

In [None]:
df_prod['ds'] = pd.to_datetime(df_prod['Year'], format="%Y") + MonthEnd(12)
df_prod.head()

### read in historical macro data

In [None]:
df_macro = pd.read_csv('../data/raw/Industry/MacroHistorical.csv')
df_macro.head()

In [None]:
df_macro['ds']=pd.to_datetime(df_macro['Year'],format='%Y')
df_macro['ds'] = pd.to_datetime(df_macro['ds'], format="%Y%m") + MonthEnd(12)
df_macro = df_macro.set_index(['Economy'])
df_macro.head()

In [None]:
df_macro['GDP_per_capita'] = df_macro['GDP'].div(df_macro['Population'])
df = pd.merge(df_prod,df_macro,how='left',on=['Economy','ds','Year'])

In [None]:
df.head()

In [None]:
df['ln_prod_per_cap'] = df['SteelConsumption'].div(df['Population'])
df['ln_prod_per_cap'] = np.log(df['ln_prod_per_cap'])

df['ln_GDP_per_cap'] = np.log(df['GDP_per_capita'])

df = df.rename(columns={"ln_prod_per_cap":"y"})

df.head()

In [None]:
economies = df.index.unique()
economies

In [None]:
economies

In [None]:
models ={}
for economy in economies:
    m = Prophet(daily_seasonality=False,
                   weekly_seasonality=False,
                   yearly_seasonality=False,
                   seasonality_mode='additive',
                   growth='linear')
    m.add_regressor('ln_GDP_per_cap')
    models[economy] = m

In [None]:
models

### fit models

In [None]:
for economy,model in models.items():
    model.fit(df.loc[economy])  

### add future macro data

In [None]:
df_future_macro = pd.read_csv('../data/raw/Industry/MacroAssumptions.csv',
                             index_col=['Economy'])
df_future_macro['GDP_per_capita'] = df_future_macro['GDP'].div(df_future_macro['Population'])
df_future_macro['ln_GDP_per_cap'] = np.log(df_future_macro['GDP_per_capita'])
df_future_macro.head()

In [None]:
df_future_macro['ds'] = pd.to_datetime(df_future_macro['Year'], format="%Y") + MonthEnd(12)
df_future_macro.head()

In [None]:
df_future_macro.tail()

### create regressors for 1990-2050

In [None]:
regressors_hist = df
regressors_fut = df_future_macro

#regressors_hist = df.drop(columns=['Year','SteelConsumption','GDP','Population','GDP_per_capita','y'])
#regressors_fut = df_future_macro.drop(columns=['Year','GDP','Population','GDP_per_capita'])


_regressors_list =[]

for economy in economies:
    _regressors = pd.concat([regressors_hist.loc[economy],regressors_fut.loc[economy]],
                      ignore_index=False, sort=False)
    _regressors_list.append(_regressors)
regressors = pd.concat(_regressors_list)

### run model (make prediction)

In [None]:
pred_list =[]
for economy,model in models.items():
    forecast = model.predict(regressors.loc[economy])
    forecast.insert(loc=0,column='Economy',value=economy)
    forecast = forecast.set_index(['Economy'])
    pred_list.append(forecast)
results = pd.concat(pred_list, sort=False)

In [None]:
results[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]

In [None]:
results['Year'] = results['ds'].dt.year
#results[['Year', 'yhat', 'yhat_lower', 'yhat_upper']].to_csv ('../data/final/steel_results.csv', header=True)

### plot results

In [None]:
for economy,model in models.items():
    fig1 = model.plot(results.loc[economy])

In [None]:
results.info()

In [None]:
regressors.info()

In [None]:
_a = results[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
_b = regressors[['ds','Year','GDP','Population']]

_b.head()

In [None]:
#final_results = pd.merge(_a,_b,how='outer',on='ds')

final_results = pd.merge(_a,_b,left_index=True, right_index=True)

In [None]:
final_results

In [None]:
final_results['estimated production - thousand tons per capita'] = (np.exp(final_results['yhat'])).div(1000)

In [None]:
final_results['estimated production - tons'] = np.multiply(final_results['estimated production - thousand tons per capita'],final_results['Population'])

In [None]:
final_results.to_csv ('../data/final/steel_results.csv', header=True)