In [96]:
import requests
import pandas as pd
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from helpers import *

#### Download time series

In [97]:
indicator_code = "NY.GDP.MKTP.CN"
country_code = "afg"
data = download_time_series_data(indicator_code, country_code)

#### Convert to dataframe

In [98]:
data = convert_to_dataframe(data)
# print(df)

In [99]:
# drop the last row (year 2022) as it was zero
data.drop(data.tail(1).index,inplace=True) # drop last n rows

In [100]:
# change the name of the column and replace missing values by interpolation
data.rename(columns = {'value':'gdp'}, inplace = True)
data['gdp'].interpolate(inplace=True)

# Print the modified DataFrame
# print(data)

In [101]:
# Differencing the data
data_diff = data.diff().dropna()
# Set the frequency of the data
data_diff.index.freq = 'AS-JAN'

#### Create the models and finally the ensemble

In [102]:
# Split the DataFrame into training and forecast sets
train_df = data_diff.loc[:'2021']
forecast_df = pd.date_range(start='2022', end='2030', freq='AS-JAN')

# Fit the ARIMA model
arima_model = ARIMA(train_df['gdp'], order=(1, 0, 0))
arima_model_fit = arima_model.fit()

# Fit the SARIMA model
sarima_model = SARIMAX(train_df['gdp'], order=(1, 0, 0), seasonal_order=(1, 0, 0, 12))
sarima_model_fit = sarima_model.fit()

# Forecast using the ARIMA model
arima_forecast = arima_model_fit.forecast(steps=len(forecast_df))

# Forecast using the SARIMA model
sarima_forecast = sarima_model_fit.forecast(steps=len(forecast_df))

# Revert the ARIMA forecast
arima_forecast_reverted = arima_forecast.cumsum() + data.iloc[-1]['gdp']

# Revert the SES forecast
sarima_forecast_reverted = sarima_forecast.cumsum() + data.iloc[-1]['gdp']

# Ensemble the forecasts
ensemble_forecast = (arima_forecast_reverted + sarima_forecast_reverted) / 2

# Create a DataFrame to store the ensemble forecast
forecast_df = pd.DataFrame({'ensemble_forecast': ensemble_forecast}, index=test_df)

# # Print the ensemble forecast
# print(forecast_df)

#### Concatenate the results to the original data

In [103]:
# Create a DataFrame for the ensemble forecast
forecast_df = pd.DataFrame({'Ensemble_Forecast': ensemble_forecast}, index=test_df)

# Rename the column in the forecast dataframe
forecast_df.columns = ['gdp']

# Concatenate the original data and the forecast vertically
result_df = pd.concat([data, forecast_df], axis=0)

# # Print the result
# print(result_df)

In [104]:
# Create a new column indicating the data type
# result_df['data_type'] = ['Original'] * len(data) + ['Forecast'] * len(forecast_df)

# Add a new index column 'data_type' based on the date index
result_df['data_type'] = pd.cut(result_df.index, bins=[pd.Timestamp('1972-12-31'), pd.Timestamp('2021-12-31'), pd.Timestamp('2030-12-31')], labels=['Original', 'Forecast'])

# Set the MultiIndex
result_df.set_index('data_type', append=True, inplace=True)
result_df.index.names = ['date', 'data_type']
result_df = result_df.swaplevel()

# # Print the DataFrame
# print(result_df)

#### Save the results to disk

In [105]:
# Save the dataframe to a CSV file
result_df.to_csv('forecast_data.csv', index=False)

print("Forecast data saved to forecast_data.csv")

Forecast data saved to forecast_data.csv
