# Prophet Model


In [10]:
!pip install convertdate



In [11]:
!pip install fbprophet



In [12]:
import pickle

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
pd.set_option('display.max_rows', None)
import datetime
from plotly.subplots import make_subplots

import requests
from bs4 import BeautifulSoup

from tqdm.auto import tqdm

import seaborn as sns
import plotly.express as px

from fbprophet import Prophet
from fbprophet.diagnostics import cross_validation, performance_metrics
from fbprophet.plot import plot_cross_validation_metric

In [13]:
df = pd.read_csv('data/processed_data/covid-19-state-level.csv', index_col=0)

df['date'] = pd.to_datetime(df['date'])

In [21]:
extra_features = set(df.columns) - {'cases', 'date', 'deaths', 'fips', 'state'}

In [22]:
def extract_value(data, date, col):
    values = data[data['ds'] == date]
    if len(values) == 1:
        return values.iloc[0][col]
    else:
        raise ValueError('Invalid date value: {}'.format(date))

In [23]:
help(Prophet.add_regressor)

Help on function add_regressor in module fbprophet.forecaster:

add_regressor(self, name, prior_scale=None, standardize='auto', mode=None)
    Add an additional regressor to be used for fitting and predicting.
    
    The dataframe passed to `fit` and `predict` will have a column with the
    specified name to be used as a regressor. When standardize='auto', the
    regressor will be standardized unless it is binary. The regression
    coefficient is given a prior with the specified scale parameter.
    Decreasing the prior scale will add additional regularization. If no
    prior scale is provided, self.holidays_prior_scale will be used.
    Mode can be specified as either 'additive' or 'multiplicative'. If not
    specified, self.seasonality_mode will be used. 'additive' means the
    effect of the regressor will be added to the trend, 'multiplicative'
    means it will multiply the trend.
    
    Parameters
    ----------
    name: string name of the regressor.
    prior_scale: op

In [24]:
def prediction(state, data, periods, target_col, feature_cols=None, test_size=None):
    # Function that takes in the data frame, storeID, and number of future period forecast
    # The function then generates date/columns columns in Prophet format
    # The function then makes time series predictions
    data = data.reset_index()  
    data = data[data['state'] == state]
    if feature_cols is None:
        feature_cols = []
    else:
        assert isinstance(feature_cols, list) and len(feature_cols) > 0, 'features should be non empty list'
        data[feature_cols] = data[feature_cols].shift(1)
        data = data.dropna()
    # select data belonging to a state   
    # create data features time series.
    data = data[['date', target_col] + feature_cols] \
        .rename(columns={'date': 'ds', target_col : 'y'}) \
        .sort_values('ds')
    if test_size is not None:
        data, future = data[:-test_size], data[-test_size:]
    # Init Prophet
    model = Prophet()
    # add additional feature column names
    for fc in feature_cols:
        model.add_regressor(fc)
    model.fit(data)
    # Predict future with available future data
    if test_size is not None:
        # Predict new cases        
        forecast = model.predict(future)
        f_str = '_'.join(feature_cols)
        s_str = '_'.join(state.lower().split(' '))
        y_label = state + '_' + target_col
        # Plot
        fig1 = model.plot(forecast, xlabel='COVID-19 Cases', ylabel=y_label)
        fig1.savefig('output/images/{}-{}-f_{}-prophetplot.png'.format(target_col, s_str, f_str))
        # 
        fig2 = model.plot_components(forecast)
        fig2.savefig('output/images/{}_trend-{}-f_{}-prophetplot.png'.format(target_col, s_str, f_str))
    return model

In [25]:
top_states = ['California', 'New York', 'Florida', 'New Jersey']

feature_search_space = [list(extra_features)] + [list(extra_features - {fcol}) for fcol in extra_features]

lst_res = []

pbar = tqdm(feature_search_space)

pbar.set_description('Exploring Feature Space')

for feature_cols in pbar:
    for state in top_states:
        model = prediction(state, df, 10, 'cases', feature_cols=feature_cols)
        ## Forecasts are made over a certain horizon, which we denote H.
        df_cv = cross_validation(model, initial='60 days', period='1 days', horizon = '7 days')
        df_p = performance_metrics(df_cv)
        ## Plot the cross validation metric
        # fig = plot_cross_validation_metric(df_cv, metric='mae')
        # plt.show()
        ## Add the results
        lst_res.append((state, feature_cols, df_cv, df_p))

HBox(children=(FloatProgress(value=0.0, max=7.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 94 forecasts with cutoffs between 2020-04-16 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=94.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





INFO:fbprophet:Making 79 forecasts with cutoffs between 2020-05-01 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





INFO:fbprophet:Making 79 forecasts with cutoffs between 2020-05-01 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





INFO:fbprophet:Making 76 forecasts with cutoffs between 2020-05-04 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=76.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 94 forecasts with cutoffs between 2020-04-16 00:00:00 and 2020-07-18 00:00:00





HBox(children=(FloatProgress(value=0.0, max=94.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





INFO:fbprophet:Making 79 forecasts with cutoffs between 2020-05-01 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





INFO:fbprophet:Making 79 forecasts with cutoffs between 2020-05-01 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))




INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 76 forecasts with cutoffs between 2020-05-04 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=76.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





INFO:fbprophet:Making 94 forecasts with cutoffs between 2020-04-16 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=94.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 79 forecasts with cutoffs between 2020-05-01 00:00:00 and 2020-07-18 00:00:00





HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





INFO:fbprophet:Making 79 forecasts with cutoffs between 2020-05-01 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 76 forecasts with cutoffs between 2020-05-04 00:00:00 and 2020-07-18 00:00:00





HBox(children=(FloatProgress(value=0.0, max=76.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 94 forecasts with cutoffs between 2020-04-16 00:00:00 and 2020-07-18 00:00:00





HBox(children=(FloatProgress(value=0.0, max=94.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





INFO:fbprophet:Making 79 forecasts with cutoffs between 2020-05-01 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





INFO:fbprophet:Making 79 forecasts with cutoffs between 2020-05-01 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))




INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 76 forecasts with cutoffs between 2020-05-04 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=76.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





INFO:fbprophet:Making 94 forecasts with cutoffs between 2020-04-16 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=94.0), HTML(value='')))




INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 79 forecasts with cutoffs between 2020-05-01 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))




INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 79 forecasts with cutoffs between 2020-05-01 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





INFO:fbprophet:Making 76 forecasts with cutoffs between 2020-05-04 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=76.0), HTML(value='')))




INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 94 forecasts with cutoffs between 2020-04-16 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=94.0), HTML(value='')))




INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 79 forecasts with cutoffs between 2020-05-01 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))




INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 79 forecasts with cutoffs between 2020-05-01 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))




INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 76 forecasts with cutoffs between 2020-05-04 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=76.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.





INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 94 forecasts with cutoffs between 2020-04-16 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=94.0), HTML(value='')))




INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
INFO:fbprophet:Making 79 forecasts with cutoffs between 2020-05-01 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





INFO:fbprophet:Making 79 forecasts with cutoffs between 2020-05-01 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=79.0), HTML(value='')))

INFO:fbprophet:Disabling yearly seasonality. Run prophet with yearly_seasonality=True to override this.
INFO:fbprophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.





INFO:fbprophet:Making 76 forecasts with cutoffs between 2020-05-04 00:00:00 and 2020-07-18 00:00:00


HBox(children=(FloatProgress(value=0.0, max=76.0), HTML(value='')))





In [26]:
with open('./output/model_outputs/ablation_results_full_v0.2.pickle', 'wb') as fp:
    pickle.dump(lst_res, fp)

In [27]:
with open('./output/model_outputs/ablation_results_full_v0.2.pickle', 'rb') as fp:
    ablation_results = pickle.load(fp)

In [28]:
ablation_results

[('California',
  ['parks',
   'retail_and_recreation',
   'grocery_and_pharmacy',
   'transit_stations',
   'workplaces',
   'residential'],
              ds           yhat     yhat_lower     yhat_upper       y     cutoff
  0   2020-04-17   29323.727743   29211.160724   29443.778167   29398 2020-04-16
  1   2020-04-18   30582.581765   30442.668752   30717.865386   30829 2020-04-16
  2   2020-04-19   31622.928094   31435.367097   31809.617326   31544 2020-04-16
  3   2020-04-20   32771.310394   32492.502975   33044.539872   33862 2020-04-16
  4   2020-04-21   34014.199236   33622.353623   34390.789272   35844 2020-04-16
  5   2020-04-22   35303.838212   34769.740129   35810.063292   37573 2020-04-16
  6   2020-04-23   36540.908479   35844.274844   37152.254001   39534 2020-04-16
  7   2020-04-18   30544.045254   30458.142767   30634.602490   30829 2020-04-17
  8   2020-04-19   31606.818650   31497.769741   31716.805067   31544 2020-04-17
  9   2020-04-20   32725.259809   32565.740923  