In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import numpy as np
import pandas as pd
import pdb

COUNTY-LEVEL DATASETS

In [3]:
data_path = '/content/drive/MyDrive/STATS307/Data/'
cases_df = pd.read_csv(data_path + 'covid-us-filtered.csv')
mobility_df = pd.read_csv(data_path + 'mobilitycountymice.csv')

Mobility

In [4]:
mobility_df.drop(['country_region_code', 'country_region'], axis=1, inplace=True)
mobility_df['county'] = mobility_df['sub_region_2'].apply(lambda x: x[:-7])
mobility_df['county-state'] = mobility_df['county'] + ', ' + mobility_df['sub_region_1'] + ', US'
mobility_df.rename(columns={'retail_and_recreation_percent_change_from_baseline':'retail_and_recreation',\
                    'grocery_and_pharmacy_percent_change_from_baseline':'grocery_and_pharmacy',\
                    'parks_percent_change_from_baseline':'parks',\
                    'transit_stations_percent_change_from_baseline':'transit_stations',\
                    'workplaces_percent_change_from_baseline':'workplaces',\
                    'residential_percent_change_from_baseline':'residential'}, inplace=True)
mobility_df['date'] = pd.to_datetime(mobility_df['date'])

In [5]:
mobility_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9730 entries, 0 to 9729
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   sub_region_1           9730 non-null   object        
 1   sub_region_2           9730 non-null   object        
 2   date                   9730 non-null   datetime64[ns]
 3   retail_and_recreation  9730 non-null   int64         
 4   grocery_and_pharmacy   9730 non-null   int64         
 5   parks                  9730 non-null   int64         
 6   transit_stations       9730 non-null   int64         
 7   workplaces             9730 non-null   int64         
 8   residential            9730 non-null   int64         
 9   county                 9730 non-null   object        
 10  county-state           9730 non-null   object        
dtypes: datetime64[ns](1), int64(6), object(4)
memory usage: 836.3+ KB


In [6]:
mobility_df['county-state'].unique()

array(['Los Angeles, California, US', 'New York, New York, US',
       'Suffolk, Massachusetts, US', 'Miami-Dade, Florida, US',
       'Cook, Illinois, US', 'Santa Barbara, California, US',
       'Orange, New York, US', 'Franklin, Massachusetts, US',
       'Highlands, Florida, US', 'Whiteside, Illinois, US'], dtype=object)

Filter to common dates for all counties

In [7]:
locs = cases_df.Combined_Key.values
print(locs)
dates = []
for key in locs:
  dates.append(set(mobility_df[mobility_df['county-state']==key]['date']))

dates = sorted(list(set.intersection(*dates)))

['Los Angeles, California, US' 'Santa Barbara, California, US'
 'Highlands, Florida, US' 'Miami-Dade, Florida, US' 'Cook, Illinois, US'
 'Whiteside, Illinois, US' 'Franklin, Massachusetts, US'
 'Suffolk, Massachusetts, US' 'New York, New York, US'
 'Orange, New York, US']


In [8]:
dates

[Timestamp('2020-02-15 00:00:00'),
 Timestamp('2020-02-16 00:00:00'),
 Timestamp('2020-02-17 00:00:00'),
 Timestamp('2020-02-18 00:00:00'),
 Timestamp('2020-02-19 00:00:00'),
 Timestamp('2020-02-20 00:00:00'),
 Timestamp('2020-02-21 00:00:00'),
 Timestamp('2020-02-22 00:00:00'),
 Timestamp('2020-02-23 00:00:00'),
 Timestamp('2020-02-24 00:00:00'),
 Timestamp('2020-02-25 00:00:00'),
 Timestamp('2020-02-26 00:00:00'),
 Timestamp('2020-02-27 00:00:00'),
 Timestamp('2020-02-28 00:00:00'),
 Timestamp('2020-02-29 00:00:00'),
 Timestamp('2020-03-01 00:00:00'),
 Timestamp('2020-03-02 00:00:00'),
 Timestamp('2020-03-03 00:00:00'),
 Timestamp('2020-03-04 00:00:00'),
 Timestamp('2020-03-05 00:00:00'),
 Timestamp('2020-03-06 00:00:00'),
 Timestamp('2020-03-07 00:00:00'),
 Timestamp('2020-03-08 00:00:00'),
 Timestamp('2020-03-09 00:00:00'),
 Timestamp('2020-03-10 00:00:00'),
 Timestamp('2020-03-11 00:00:00'),
 Timestamp('2020-03-12 00:00:00'),
 Timestamp('2020-03-13 00:00:00'),
 Timestamp('2020-03-

In [9]:
TRAIN_START, TRAIN_DUR = 107, 466-107
TEST_END = -288

In [10]:
print('before: {}'.format(mobility_df.shape))
mobility_df = mobility_df[mobility_df['date'].isin(dates[TRAIN_START-1: TEST_END])] #TRAIN_START-1 to adjust for .diff()
print('after: {}'.format(mobility_df.shape))

before: (9730, 11)
after: (5740, 11)


In [11]:
mobility_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5740 entries, 106 to 9441
Data columns (total 11 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   sub_region_1           5740 non-null   object        
 1   sub_region_2           5740 non-null   object        
 2   date                   5740 non-null   datetime64[ns]
 3   retail_and_recreation  5740 non-null   int64         
 4   grocery_and_pharmacy   5740 non-null   int64         
 5   parks                  5740 non-null   int64         
 6   transit_stations       5740 non-null   int64         
 7   workplaces             5740 non-null   int64         
 8   residential            5740 non-null   int64         
 9   county                 5740 non-null   object        
 10  county-state           5740 non-null   object        
dtypes: datetime64[ns](1), int64(6), object(4)
memory usage: 538.1+ KB


In [12]:
locs = cases_df.Combined_Key.values
urbans = ['Los Angeles, California, US', 'New York, New York, US', 
          'Suffolk, Massachusetts, US', 'Miami-Dade, Florida, US', 'Cook, Illinois, US']
states = list(mobility_df['sub_region_1'].unique())
X_dict = {key: None for key in locs}
for key, _ in X_dict.items():
  X_dict[key] = mobility_df[mobility_df['county-state']==key]

  # Difference
  # X_dict[key].iloc[:,3:-2] = X_dict[key].iloc[:,3:-2].diff(axis=0)
  X_dict[key] = X_dict[key].iloc[1:]
  X_dict[key]['state'] = X_dict[key]['sub_region_1']
  X_dict[key].drop(['sub_region_1', 'sub_region_2'], axis=1, inplace=True)
  # Normalize
  X_dict[key].iloc[:, 1:-3]=(X_dict[key].iloc[:, 1:-3]-X_dict[key].iloc[:, 1:-3].mean())/X_dict[key].iloc[:, 1:-3].std()
  # Encode location info
  # X_dict[key]['urban'] = 0
  # if key in urbans:
    # X_dict[key]['urban'] = 1
  
  # state = X_dict[key]['state'].values[0]
  # for s in states: 
  #  X_dict[key][s] = 0
   # if s == state:
   #   X_dict[key][s] = 1

#  X_dict[key]['dayofweek'] = X_dict[key]['date'].dt.dayofweek
#  X_dict[key]['month'] = X_dict[key]['date'].dt.month
#  X_dict[key]['year'] = X_dict[key]['date'].dt.year
#  X_dict[key]['day'] = X_dict[key]['date'].dt.day
#  X_dict[key]['year_mod'] = (X_dict[key]['year'] - X_dict[key]['year'].min()) / (X_dict[key]['year'].max() - X_dict[key]['year'].min())

  # Remove unnecessary columns
  X_dict[key].drop(['county', 'county-state', 'state'], axis=1, inplace=True)

In [13]:
for key in X_dict:
  print(key)
  X_dict[key].info()

Los Angeles, California, US
<class 'pandas.core.frame.DataFrame'>
Int64Index: 573 entries, 107 to 685
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   date                   573 non-null    datetime64[ns]
 1   retail_and_recreation  573 non-null    float64       
 2   grocery_and_pharmacy   573 non-null    float64       
 3   parks                  573 non-null    float64       
 4   transit_stations       573 non-null    float64       
 5   workplaces             573 non-null    float64       
 6   residential            573 non-null    float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 35.8 KB
Santa Barbara, California, US
<class 'pandas.core.frame.DataFrame'>
Int64Index: 573 entries, 4977 to 5555
Data columns (total 7 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   date          

In [14]:
# X = np.dstack([v.iloc[:,1:] for v in X_dict.values()])
# X.shape

Y: filter by available dates

In [15]:
Y = cases_df.drop(labels=['State', 'Country_Region', 'County','Combined_Key'], axis=1)\
    .iloc[:, 24:-28].diff(axis=1).T.iloc[1:,:].reset_index()
Y['index'] = pd.to_datetime(Y['index'])
Y = Y[Y['index'].isin(dates[TRAIN_START: TEST_END])]

In [16]:
Y['index']

106   2020-06-01
107   2020-06-02
108   2020-06-03
109   2020-06-04
110   2020-06-05
         ...    
680   2021-12-27
681   2021-12-28
682   2021-12-29
683   2021-12-30
684   2021-12-31
Name: index, Length: 573, dtype: datetime64[ns]

In [17]:
columns = cases_df['Combined_Key'].values
rename_cols = {v: columns[v] for v in range(10)}
Y = Y.rename(columns=rename_cols)
#Y = Y.drop('index', axis=1).to_numpy() # Drop index(=date) variable

In [18]:
Y

Unnamed: 0,index,"Los Angeles, California, US","Santa Barbara, California, US","Highlands, Florida, US","Miami-Dade, Florida, US","Cook, Illinois, US","Whiteside, Illinois, US","Franklin, Massachusetts, US","Suffolk, Massachusetts, US","New York, New York, US","Orange, New York, US"
106,2020-06-01,1010.0,10.0,1.0,139.0,570.0,2.0,0.0,69.0,74.0,16.0
107,2020-06-02,1208.0,33.0,2.0,85.0,1178.0,0.0,0.0,68.0,40.0,27.0
108,2020-06-03,1042.0,19.0,2.0,232.0,531.0,2.0,0.0,42.0,26.0,11.0
109,2020-06-04,1431.0,29.0,4.0,323.0,509.0,3.0,3.0,78.0,79.0,11.0
110,2020-06-05,1376.0,46.0,3.0,277.0,631.0,2.0,0.0,38.0,72.0,13.0
...,...,...,...,...,...,...,...,...,...,...,...
680,2021-12-27,7409.0,686.0,0.0,0.0,29718.0,79.0,92.0,2082.0,3201.0,317.0
681,2021-12-28,9451.0,304.0,0.0,0.0,9443.0,64.0,103.0,1242.0,5372.0,461.0
682,2021-12-29,16483.0,530.0,0.0,0.0,9766.0,59.0,77.0,2354.0,8086.0,1512.0
683,2021-12-30,20169.0,577.0,0.0,0.0,12931.0,116.0,100.0,3231.0,8112.0,1598.0


In [19]:
# delete FL and MA 



In [20]:
X_dict.keys()

dict_keys(['Los Angeles, California, US', 'Santa Barbara, California, US', 'Highlands, Florida, US', 'Miami-Dade, Florida, US', 'Cook, Illinois, US', 'Whiteside, Illinois, US', 'Franklin, Massachusetts, US', 'Suffolk, Massachusetts, US', 'New York, New York, US', 'Orange, New York, US'])

SET UP SARIMA

In [21]:
!pip install pmdarima
import pmdarima as pm
import sklearn
from sklearn.metrics import mean_squared_error as mse
from statsmodels.tsa.stattools import adfuller
from statsmodels.tools.eval_measures import rmse, aic
from statsmodels.tsa.stattools import kpss
from statsmodels.tsa.stattools import grangercausalitytests

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pmdarima
  Downloading pmdarima-2.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl (1.9 MB)
[K     |████████████████████████████████| 1.9 MB 5.1 MB/s 
Collecting statsmodels>=0.13.2
  Downloading statsmodels-0.13.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.9 MB)
[K     |████████████████████████████████| 9.9 MB 31.9 MB/s 
Installing collected packages: statsmodels, pmdarima
  Attempting uninstall: statsmodels
    Found existing installation: statsmodels 0.12.2
    Uninstalling statsmodels-0.12.2:
      Successfully uninstalled statsmodels-0.12.2
Successfully installed pmdarima-2.0.2 statsmodels-0.13.5


In [None]:
maxlag=15
test = 'ssr_chi2test'

def grangers_causation_matrix(data, variables, test='ssr_chi2test', verbose=False):    
    df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in df.columns:
        for r in df.index:
            test_result = grangercausalitytests(data[[r, c]], maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            df.loc[r, c] = min_p_value
    df.columns = [var + '_x' for var in variables]
    df.index = [var + '_y' for var in variables]
    return df

In [None]:
X_dict

{'Los Angeles, California, US':           date  retail_and_recreation  grocery_and_pharmacy     parks  \
 107 2020-06-01              -2.627705             -2.163466 -1.495582   
 108 2020-06-02              -2.274177             -0.107635 -0.903169   
 109 2020-06-03              -1.567121              1.200621  0.112398   
 110 2020-06-04              -1.449279              0.826834  0.281659   
 111 2020-06-05              -1.567121              0.639940 -0.141494   
 ..         ...                    ...                   ...       ...   
 681 2021-12-27               0.554045             -0.855210 -2.257257   
 682 2021-12-28               1.261101              0.826834 -0.649277   
 683 2021-12-29              -0.035168             -0.855210 -2.934301   
 684 2021-12-30               0.082675             -0.107635 -3.611346   
 685 2021-12-31               0.082675              3.630240 -0.903169   
 
      transit_stations  workplaces  residential  
 107         -1.394513   -0.7

In [None]:
# accumulate SARIMA results

county_model = {}
county_model_AIC = {}
county_test_MSE = {}

# loop through each county + associated Y 
for key in X_dict:
  # county = str(key)
  print(key)
  X_county = X_dict[key]
  Y_county = Y.loc[:, Y.columns.isin([key, 'index'])]
  # select X based on county
  X_county_train = X_county[(X_county['date'] >= '2020-06-01') & (X_county['date'] < '2021-06-01')].iloc[:,1:]
  X_county_test = X_county[(X_county['date'] < '2021-06-15') & (X_county['date'] >= '2021-06-01')].iloc[:,1:] 
  # select Y based on county
  Y_county_train = Y_county[(Y_county['index'] >= '2020-06-01') & (Y_county['index'] < '2021-06-01')][key]
  Y_county_test = Y_county[(Y_county['index'] < '2021-06-15') & (Y_county['index'] >= '2021-06-01')][key]
  # reset indices
  Y_county_train = Y_county_train.reset_index(drop=True) 
  X_county_train  = X_county_train.reset_index(drop=True)
  Y_county_test = Y_county_test.reset_index(drop=True) 
  X_county_test = X_county_test.reset_index(drop=True) 
  # deal with nan's, -ves, and 0's in Y
  Y_county_train = Y_county_train.replace(0, 1e-5) # add noise to 0 so they don't become infinite
  Y_county_train[Y_county_train < 0] = 1e-5
  Y_county_train = np.log(Y_county_train).replace(np.nan, 0)
  # deal with nan's, -ves, and 0's in Y_test
  Y_county_test = Y_county_test.replace(0, 1e-5) # add noise to 0 so they don't become infinite
  Y_county_test[Y_county_test < 0] = 1e-5
  Y_county_test= np.log(Y_county_test).replace(np.nan, 0)
  # when case count is 0 replace np.log with 0
  print(Y_county_test.isna().sum())
  print(Y_county_train.isna().sum())
  # ARIMA (can handle non-stationary data)
  arima = pm.auto_arima(Y_county_train, X_county_train, start_p = 0, d = None, 
                        start_q=0, max_p=5, max_d=2, max_q=5, max_order=5, m=1, seasonal=False,
                        test='adf', error_action='ignore', suppress_warnings=True, 
                        stepwise=True, trace=True)
  county_model[key] = [arima]
  county_model_AIC[key] = [arima.aic()]
  arima_ = arima.fit(Y_county_train, X_county_train, n_period=len(Y_county_train))
  y_pred = arima_.predict(n_periods=len(Y_county_test), X=X_county_test, return_conf_int=False, alpha=0.05)
  y_true = Y_county_test
  error = mse(y_pred, y_true)
  print(y_pred)
  print(y_true)
  county_test_MSE[key] = [error]


  # SARIMA with weekly seasonality (m=52)

  arima = pm.auto_arima(Y_county_train, X=X_county_train, start_p = 0, d = None, 
                        start_q=0, max_p=5, max_d=2, max_q=5, start_P = 1, D=None,
                        start_Q = 1, m=52, seasonal=True,
                        test='adf', error_action='ignore', suppress_warnings=True, 
                        stepwise=True, trace=True)
  county_model[key].append(arima)
  county_model_AIC[key].append(arima.aic())
  arima_ = arima.fit(Y_county_train, X_county_train, n_period=len(Y_county_train))
  y_pred = arima_.predict(n_periods=len(Y_county_test), X=X_county_test, return_conf_int=False, alpha=0.05)
  y_true = Y_county_test
  error = mse(y_pred, y_true)
  county_test_MSE[key].append(error) 

  # SARIMA with yearly seasonal trend (m=1)
  
  arima = pm.auto_arima(Y_county_train, X=X_county_train, start_p = 0, d = None, 
                        start_q=0, max_p=5, max_d=2, max_q=5, start_P = 1, D=None,
                        start_Q = 1, m=1, seasonal=True,
                        test='adf', error_action='ignore', suppress_warnings=True, 
                        stepwise=True, trace=True)
  county_model[key].append(arima)
  county_model_AIC[key].append(arima.aic())
  arima_ = arima.fit(Y_county_train, X_county_train, n_period=len(Y_county_train))
  y_pred = arima_.predict(n_periods=len(Y_county_test), X=X_county_test, return_conf_int=False, alpha=0.05)
  y_true = Y_county_test
  error = mse(y_pred, y_true)
  county_test_MSE[key].append(error) 

  # SARIMA with monthly seasonal trend (m=12)
  
  arima = pm.auto_arima(Y_county_train, X=X_county_train, start_p = 0, d = None, 
                        start_q=0, max_p=5, max_d=2, max_q=5, start_P = 1, D=None,
                        start_Q = 1, m=12, seasonal=True,
                        test='adf', error_action='ignore', suppress_warnings=True, 
                        stepwise=True, trace=True)
  county_model[key].append(arima)
  county_model_AIC[key].append(arima.aic())
  arima_ = arima.fit(Y_county_train, X_county_train, n_period=len(Y_county_train))
  y_pred = arima_.predict(n_periods=len(Y_county_test), X=X_county_test, return_conf_int=False, alpha=0.05)
  y_true = Y_county_test
  error = mse(y_pred, y_true)
  county_test_MSE[key].append(error) 

  # SARIMA with quarterly seasonal trend (m=4)
  
  arima = pm.auto_arima(Y_county_train, X=X_county_train, start_p = 0, d = None, 
                        start_q=0, max_p=5, max_d=2, max_q=5, start_P = 1, D=None,
                        start_Q = 1, m=4, seasonal=True,
                        test='adf', error_action='ignore', suppress_warnings=True, 
                        stepwise=True, trace=True)
  county_model[key].append(arima)
  county_model_AIC[key].append(arima.aic())
  arima_ = arima.fit(Y_county_train, X_county_train, n_period=len(Y_county_train))
  y_pred = arima_.predict(n_periods=len(Y_county_test), X=X_county_test, return_conf_int=False, alpha=0.05)
  y_true = Y_county_test
  error = mse(y_pred, y_true)
  county_test_MSE[key].append(error) 






Los Angeles, California, US
0
0
Performing stepwise search to minimize aic
 ARIMA(0,0,0)(0,0,0)[0]             : AIC=2231.262, Time=0.25 sec
 ARIMA(1,0,0)(0,0,0)[0]             : AIC=1808.366, Time=0.29 sec
 ARIMA(0,0,1)(0,0,0)[0]             : AIC=2065.348, Time=0.73 sec
 ARIMA(2,0,0)(0,0,0)[0]             : AIC=1735.949, Time=1.31 sec
 ARIMA(3,0,0)(0,0,0)[0]             : AIC=1701.535, Time=1.15 sec
 ARIMA(4,0,0)(0,0,0)[0]             : AIC=inf, Time=1.26 sec
 ARIMA(3,0,1)(0,0,0)[0]             : AIC=1641.546, Time=1.34 sec
 ARIMA(2,0,1)(0,0,0)[0]             : AIC=1638.247, Time=1.13 sec
 ARIMA(1,0,1)(0,0,0)[0]             : AIC=1637.926, Time=0.96 sec
 ARIMA(1,0,2)(0,0,0)[0]             : AIC=1638.230, Time=1.18 sec
 ARIMA(0,0,2)(0,0,0)[0]             : AIC=2002.048, Time=0.94 sec
 ARIMA(2,0,2)(0,0,0)[0]             : AIC=inf, Time=1.31 sec
 ARIMA(1,0,1)(0,0,0)[0] intercept   : AIC=1636.398, Time=1.12 sec
 ARIMA(0,0,1)(0,0,0)[0] intercept   : AIC=1644.628, Time=0.58 sec
 ARIMA(1,0,

ValueError: ignored

In [None]:
county_model

  ARIMA(order=(1, 0, 1), scoring_args={}, seasonal_order=(0, 0, 0, 52),
  ARIMA(order=(1, 0, 2), scoring_args={}, seasonal_order=(0, 0, 1, 12),
  ARIMA(order=(1, 0, 1), scoring_args={}, seasonal_order=(0, 0, 0, 4),
        with_intercept=False),
  ARIMA(order=(3, 0, 1), scoring_args={}, seasonal_order=(0, 0, 0, 52),
        with_intercept=False),
  ARIMA(order=(3, 0, 1), scoring_args={}, seasonal_order=(0, 0, 0, 12),
  ARIMA(order=(3, 0, 1), scoring_args={}, seasonal_order=(0, 0, 0, 4),
        with_intercept=False),
  ARIMA(order=(0, 1, 1), scoring_args={}, seasonal_order=(0, 0, 0, 52),
        with_intercept=False),
  ARIMA(order=(0, 1, 1), scoring_args={}, seasonal_order=(0, 0, 0, 12),
  ARIMA(order=(0, 1, 1), scoring_args={}, seasonal_order=(0, 0, 0, 4),
        with_intercept=False),
  ARIMA(order=(1, 1, 1), scoring_args={}, seasonal_order=(0, 0, 0, 52),
        with_intercept=False),
  ARIMA(order=(1, 1, 1), scoring_args={}, seasonal_order=(0, 0, 0, 12),
  ARIMA(order=(1, 1, 1), 

In [None]:
county_model_AIC 

{'Los Angeles, California, US': [1636.3980168683966,
  1636.3980168683966,
  1636.3980168683966,
  1629.989382333932,
  1636.3980168683966],
 'Santa Barbara, California, US': [1372.8777595855454,
  1372.8777595855454,
  1372.8777595855454,
  1372.8777595855454,
  1372.8777595855454],
 'Highlands, Florida, US': [1481.3351778397923,
  1481.3351778397923,
  1481.3351778397923,
  1481.3351778397923,
  1481.3351778397923],
 'Miami-Dade, Florida, US': [1476.2602781029923,
  1476.2602781029923,
  1476.2602781029923,
  1476.2602781029923,
  1476.2602781029923],
 'New York, New York, US': [1143.5569590160967,
  1193.6698013695755,
  1193.6698013695755,
  1193.6698013695755,
  1150.9044378112167],
 'Orange, New York, US': [1580.1757103401271,
  1580.1757103401271,
  1580.1757103401271,
  1580.1757103401271,
  1612.0764607468236]}

In [None]:
county_test_MSE

{'Los Angeles, California, US': [2.5944578270515,
  2.5944578270515,
  2.5944578270515,
  1.5694645705393033,
  2.5944578270515],
 'Santa Barbara, California, US': [25.5968936120649,
  25.5968936120649,
  25.5968936120649,
  25.5968936120649,
  25.5968936120649],
 'Highlands, Florida, US': [51.2339604502673,
  51.2339604502673,
  51.2339604502673,
  51.2339604502673,
  51.2339604502673],
 'Miami-Dade, Florida, US': [82.09033877134002,
  82.09033877134002,
  82.09033877134002,
  82.09033877134002,
  82.09033877134002],
 'New York, New York, US': [1.1181346751805776,
  2.1505503828524963,
  2.1505503828524963,
  2.1505503828524963,
  2.6705655573357174],
 'Orange, New York, US': [0.2832360956616919,
  0.2832360956616919,
  0.2832360956616919,
  0.2832360956616919,
  0.24618478985039238]}

# PROPHET MODEL