In [1]:
import sklearn
import pandas as pd 
import seaborn as sns
import numpy as np 
import matplotlib.pyplot as plt
from pandas_profiling import ProfileReport
from functools import partial
from pathlib import Path
import os

# Initial load and drop irrelevant files.

Creates a function to go from location_key to the real county name

In [10]:
df = pd.read_csv("../../data/covid/bq-results-20201121-185824-hysp7stoqt78.csv")
# df = pd.read_csv("../../data/usa_covid_gt20.csv")
#date
df['date'] = pd.to_datetime(df['date'])

#remove unrelated features
df = df.drop(columns=['aggregation_level','latitude','longitude','openstreetmap_id','location_geometry','datacommons_id','wikidata_id','iso_3166_1_alpha_2','iso_3166_1_alpha_3'])

#drop country and per state rows
df = df.dropna(axis='index', subset=['subregion2_code'])

#remove location names and make map
df = df.astype({'subregion2_code':int})
names = df[['subregion1_name','subregion2_name','subregion2_code']].drop_duplicates(subset=['subregion2_code'])
def codeNameConverter(code, names):
    if isinstance(code, str):
        code = int(code.split("_")[2])
    res = names[names['subregion2_code'] == code]
    return f"{res.iloc[0]['subregion2_name']}, {res.iloc[0]['subregion1_name']}"
codeToName = partial(codeNameConverter, names=names)
df = df.drop(columns=['country_code', 'country_name','subregion1_code','subregion1_name','subregion2_code','subregion2_name'])



# Clean data and interpolate columns where required

Make sure no values are missing

In [11]:
#imputations!
#assume nan for rain and snow means 0
df['rainfall_mm'].fillna(0,inplace=True)
df['snowfall_mm'].fillna(0,inplace=True)

#interpolate missing temperatures, can't remove as there are some days with covid cases
df['average_temperature_celsius'] = df['average_temperature_celsius'].interpolate()
df['minimum_temperature_celsius'] = df['minimum_temperature_celsius'].interpolate()
df['maximum_temperature_celsius'] = df['maximum_temperature_celsius'].interpolate()

#fill mobility reports with 0 for na since its a relative % to baseline
df['mobility_transit_stations'] = df['mobility_transit_stations'].interpolate()
df['mobility_retail_and_recreation'] = df['mobility_retail_and_recreation'].interpolate()
df['mobility_grocery_and_pharmacy'] = df['mobility_grocery_and_pharmacy'].interpolate()
df['mobility_residential'] = df['mobility_residential'].interpolate()
df['mobility_parks'] = df['mobility_parks'].interpolate()
df['mobility_workplaces'] = df['mobility_workplaces'].interpolate()

#drop columns with too many nan (that we couldnt impute)
df = df.dropna(thresh=len(df)*.3,axis=1)

df = df.fillna(0)

In [4]:
df.describe()

Unnamed: 0,new_confirmed,new_deceased,cumulative_confirmed,cumulative_deceased,mobility_transit_stations,mobility_retail_and_recreation,mobility_grocery_and_pharmacy,mobility_parks,mobility_residential,mobility_workplaces,average_temperature_celsius,minimum_temperature_celsius,maximum_temperature_celsius,rainfall_mm,snowfall_mm
count,1054575.0,1054575.0,1054575.0,1054575.0,1054575.0,1054575.0,1054575.0,1054575.0,1054575.0,1054575.0,1054575.0,1054575.0,1054575.0,1054575.0,1054575.0
mean,11.3087,0.235108,1010.509,31.41352,-9.421049,-10.39019,0.0311552,5.456814,5.670982,-20.46673,14.94737,9.369258,21.52437,2.186722,7.187438
std,67.68114,2.349883,5888.651,213.1499,20.51549,19.00053,13.28718,41.11924,4.680226,13.94443,9.825837,9.897392,10.16033,5.629447,42.2619
min,-2321.0,-512.0,0.0,0.0,-92.0,-100.0,-96.0,-95.0,-46.0,-92.0,-41.56944,-44.92778,-38.02778,0.0,0.0
25%,0.0,0.0,0.0,0.0,-21.0,-21.0,-7.0,-18.20769,2.646154,-29.0,7.805556,1.95,14.63333,0.0,0.0
50%,0.0,0.0,37.0,0.0,-6.5,-8.0,-0.4901961,1.202376,4.988685,-20.0,16.41111,10.11111,23.67222,0.0762,0.0
75%,4.0,0.0,358.0,7.0,3.0,1.010787,6.0,18.94041,7.797657,-11.0,23.21111,17.56667,29.73333,1.6764,0.0
max,14129.0,455.0,357541.0,7396.0,252.0,261.0,225.0,709.0,38.0,66.0,39.83333,32.55,47.73611,196.342,1750.06


In [5]:
df.count()

date                              1054575
location_key                      1054575
new_confirmed                     1054575
new_deceased                      1054575
cumulative_confirmed              1054575
cumulative_deceased               1054575
mobility_transit_stations         1054575
mobility_retail_and_recreation    1054575
mobility_grocery_and_pharmacy     1054575
mobility_parks                    1054575
mobility_residential              1054575
mobility_workplaces               1054575
average_temperature_celsius       1054575
minimum_temperature_celsius       1054575
maximum_temperature_celsius       1054575
rainfall_mm                       1054575
snowfall_mm                       1054575
dtype: int64

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1054575 entries, 0 to 1074194
Data columns (total 17 columns):
 #   Column                          Non-Null Count    Dtype         
---  ------                          --------------    -----         
 0   date                            1054575 non-null  datetime64[ns]
 1   location_key                    1054575 non-null  object        
 2   new_confirmed                   1054575 non-null  float64       
 3   new_deceased                    1054575 non-null  float64       
 4   cumulative_confirmed            1054575 non-null  float64       
 5   cumulative_deceased             1054575 non-null  float64       
 6   mobility_transit_stations       1054575 non-null  float64       
 7   mobility_retail_and_recreation  1054575 non-null  float64       
 8   mobility_grocery_and_pharmacy   1054575 non-null  float64       
 9   mobility_parks                  1054575 non-null  float64       
 10  mobility_residential            1054575 no

In [12]:
df['County_ID'] = df['location_key'].apply(lambda x : x.split("_")[2])
df['County_ID']

0          54093
1          54093
2          54093
3          54093
4          54093
           ...  
1074190    48339
1074191    48339
1074192    48339
1074193    48339
1074194    48339
Name: County_ID, Length: 1054575, dtype: object

In [13]:
df_dens = pd.read_csv("../../data/covid/USA_Population_Density.csv")
df_dens = df_dens.astype({"ID":str})
df = pd.merge(df, df_dens, how="inner", left_on=['County_ID'], right_on=['ID'])
df = df[df.columns.drop(["OBJECTID","ID","NAME","ST_ABBREV","LANDAREA"])]
df

Unnamed: 0,date,location_key,new_confirmed,new_deceased,cumulative_confirmed,cumulative_deceased,mobility_transit_stations,mobility_retail_and_recreation,mobility_grocery_and_pharmacy,mobility_parks,mobility_residential,mobility_workplaces,average_temperature_celsius,minimum_temperature_celsius,maximum_temperature_celsius,rainfall_mm,snowfall_mm,County_ID,TOTPOP_CY,POPDENS_CY
0,2020-10-28,US_WV_54093,0.0,0.0,63.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,13.631944,10.270833,17.104167,0.181429,0.0,54093,7168,17.1
1,2020-10-29,US_WV_54093,1.0,0.0,64.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,13.361111,9.520833,16.215278,13.933714,0.0,54093,7168,17.1
2,2020-10-30,US_WV_54093,1.0,0.0,65.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.256944,4.201389,12.361111,22.751143,0.0,54093,7168,17.1
3,2020-10-31,US_WV_54093,1.0,0.0,66.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,3.840278,-2.166667,11.145833,1.206500,0.0,54093,7168,17.1
4,2020-11-01,US_WV_54093,1.0,0.0,67.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,7.229167,0.208333,13.256944,0.254000,0.0,54093,7168,17.1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
923770,2020-11-18,US_TX_48337,14.0,1.0,447.0,14.0,7.491538,0.028881,-1.039711,2.157895,-0.709302,-21.549020,13.687500,5.555556,23.326389,0.000000,0.0,48337,19759,21.2
923771,2020-11-19,US_TX_48337,36.0,0.0,483.0,14.0,7.521707,0.108303,-0.898917,2.394737,-0.715116,-21.098039,15.534722,13.375000,17.493056,0.000000,0.0,48337,19759,21.2
923772,2020-11-20,US_TX_48337,19.0,0.0,502.0,14.0,7.551876,0.187726,-0.758123,2.631579,-0.720930,-20.647059,14.607986,12.204861,16.907292,0.000000,0.0,48337,19759,21.2
923773,2020-11-21,US_TX_48337,0.0,0.0,0.0,0.0,7.582046,0.267148,-0.617329,2.868421,-0.726744,-20.196078,13.681250,11.034722,16.321528,0.000000,0.0,48337,19759,21.2


In [14]:
# grouped = df_filt.groupby(['location_key']).filter(lambda x : x['new_confirmed'].count() > 200)
#num=200
#top=df[df['new_confirmed']>25].groupby('location_key').agg({'date' : np.size}).sort_values('date',ascending=False)[0:num]
#df_filt = df[df['location_key'].isin(top.index)].fillna(0)

# Urban here means non-rural, defined by the Census Bureau as areas with less than 500 people per square mile or less than 2500 people
# https://www.ers.usda.gov/topics/rural-economy-population/rural-classifications/what-is-rural/
df_filt = df[df["TOTPOP_CY"]>2500]
df_filt = df_filt[df_filt["POPDENS_CY"]>500]
df_filt = df_filt[df_filt["date"] < "2020-11-21"] # Remove last 2 days
df_filt = df_filt[df_filt.columns.drop(["TOTPOP_CY","POPDENS_CY"])] # We don't need these measures anymore
df_filt
# majorcounties = df_filt.pivot(index="date",columns="location_key", values="cumulative_confirmed")
# majorcounties = majorcounties.drop(majorcounties.tail(2).index)

# majorcounties.plot(subplots=True,layout=(num//2,2),figsize=(10,num))

Unnamed: 0,date,location_key,new_confirmed,new_deceased,cumulative_confirmed,cumulative_deceased,mobility_transit_stations,mobility_retail_and_recreation,mobility_grocery_and_pharmacy,mobility_parks,mobility_residential,mobility_workplaces,average_temperature_celsius,minimum_temperature_celsius,maximum_temperature_celsius,rainfall_mm,snowfall_mm,County_ID
14715,2020-01-01,US_VA_51087,0.0,0.0,0.0,0.0,10.352941,-4.176471,-2.882353,-6.151856,9.705882,-31.882353,6.155556,-0.750000,12.794444,0.0000,0.0,51087
14716,2020-01-02,US_VA_51087,0.0,0.0,0.0,0.0,10.078431,-4.039216,-2.862745,-6.148481,9.490196,-31.196078,6.122222,-1.488889,15.811111,0.0000,0.0,51087
14717,2020-01-03,US_VA_51087,0.0,0.0,0.0,0.0,9.803922,-3.901961,-2.843137,-6.145107,9.274510,-30.509804,10.583333,5.750000,14.694444,2.4638,0.0,51087
14718,2020-01-04,US_VA_51087,0.0,0.0,0.0,0.0,9.529412,-3.764706,-2.823529,-6.141732,9.058824,-29.823529,14.238889,9.772222,17.183333,4.3942,0.0,51087
14719,2020-01-05,US_VA_51087,0.0,0.0,0.0,0.0,9.254902,-3.627451,-2.803922,-6.138358,8.843137,-29.137255,7.333333,1.877778,14.183333,5.2832,0.0,51087
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
903821,2020-11-16,US_TX_48215,108.0,6.0,37521.0,2004.0,-23.000000,-22.000000,-16.000000,-38.000000,7.000000,-27.000000,22.761111,18.738889,27.516667,0.5588,0.0,48215
903822,2020-11-17,US_TX_48215,426.0,2.0,37947.0,2006.0,-21.000000,-24.000000,-14.000000,-34.000000,11.000000,-35.000000,20.600000,15.300000,27.000000,0.0000,0.0,48215
903823,2020-11-18,US_TX_48215,681.0,9.0,38628.0,2015.0,-20.431373,-23.490196,-13.607843,-34.080000,10.849057,-34.235294,20.077778,12.722222,28.072222,0.0000,0.0,48215
903824,2020-11-19,US_TX_48215,679.0,7.0,39307.0,2022.0,-19.862745,-22.980392,-13.215686,-34.160000,10.698113,-33.470588,18.238095,14.690476,21.960317,0.0000,0.0,48215


In [58]:
# majorcounties_smth = majorcounties.rolling(5).mean()
# majorcounties_smth.plot(subplots=True,layout=(num//2,2),figsize=(10,num))

In [34]:
for x in df_filt["location_key"].unique():
    print(codeToName(x))

Henrico County, Virginia
Loudoun County, Virginia
Prince William County, Virginia
York County, Virginia
Alexandria, Virginia
Bristol, Virginia
Buena Vista, Virginia
Charlottesville, Virginia
Chesapeake, Virginia
Colonial Heights, Virginia
Covington, Virginia
Danville, Virginia
Emporia, Virginia
Fairfax, Virginia
Falls Church, Virginia
Franklin, Virginia
Fredericksburg, Virginia
Galax, Virginia
Hampton, Virginia
Harrisonburg, Virginia
Hopewell, Virginia
Lexington, Virginia
Lynchburg, Virginia
Manassas, Virginia
Manassas Park, Virginia
Martinsville, Virginia
Newport News, Virginia
Norfolk, Virginia
Norton, Virginia
Petersburg, Virginia
Poquoson, Virginia
Portsmouth, Virginia
Radford, Virginia
Richmond, Virginia
Roanoke, Virginia
Salem, Virginia
Staunton, Virginia
Virginia Beach, Virginia
Waynesboro, Virginia
Williamsburg, Virginia
Winchester, Virginia
Clark County, Washington
King County, Washington
Kitsap County, Washington
Kenosha County, Wisconsin
Milwaukee County, Wisconsin
Racine Co

In [35]:
df_filt.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 74750 entries, 14715 to 903825
Data columns (total 18 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   date                            74750 non-null  datetime64[ns]
 1   location_key                    74750 non-null  object        
 2   new_confirmed                   74750 non-null  float64       
 3   new_deceased                    74750 non-null  float64       
 4   cumulative_confirmed            74750 non-null  float64       
 5   cumulative_deceased             74750 non-null  float64       
 6   mobility_transit_stations       74750 non-null  float64       
 7   mobility_retail_and_recreation  74750 non-null  float64       
 8   mobility_grocery_and_pharmacy   74750 non-null  float64       
 9   mobility_parks                  74750 non-null  float64       
 10  mobility_residential            74750 non-null  float64       
 1

Append population density to filtered covid data and save

In [19]:
res = df_filt[df_filt.columns.drop(["location_key"])]
res = res.reset_index(drop=True)
res = res.drop(labels=["rainfall_mm","snowfall_mm"], axis='columns')
res = res.rename(columns={"County_ID" : "county_id"})
res = res.drop(["average_temperature_celsius", "minimum_temperature_celsius", "maximum_temperature_celsius"], axis=1)
res = res.sort_values(by=["date","county_id"],ignore_index=True)
res["smoothed_cumul"] = 0.0
res["smoothed_d1"] = 0.0
res["smoothed_d2"] = 0.0
res

Unnamed: 0,date,new_confirmed,new_deceased,cumulative_confirmed,cumulative_deceased,mobility_transit_stations,mobility_retail_and_recreation,mobility_grocery_and_pharmacy,mobility_parks,mobility_residential,mobility_workplaces,county_id,smoothed_cumul,smoothed_d1,smoothed_d2
0,2020-01-01,0.0,0.0,0.0,0.0,-38.000000,0.235294,-0.882353,33.566038,7.058824,-20.941176,10003,0.0,0.0,0.0
1,2020-01-01,0.0,0.0,0.0,0.0,-21.700000,-7.793651,8.920635,9.888889,7.873016,-16.730159,11001,0.0,0.0,0.0
2,2020-01-01,0.0,0.0,0.0,0.0,12.404762,-9.235294,2.647059,4.809524,2.647059,-16.647059,12009,0.0,0.0,0.0
3,2020-01-01,0.0,0.0,0.0,0.0,-13.705882,-14.588235,-7.647059,-15.941176,6.411765,-22.294118,12011,0.0,0.0,0.0
4,2020-01-01,0.0,0.0,0.0,0.0,4.361702,-3.848375,-0.974729,4.166667,-0.761905,-13.352941,12031,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
74745,2020-11-20,24.0,1.0,2101.0,30.0,-42.294118,-19.941176,-12.000000,-8.882353,12.490566,-32.470588,53035,0.0,0.0,0.0
74746,2020-11-20,253.0,1.0,9140.0,123.0,-35.650794,-14.882353,-10.176471,-24.722222,9.944853,-28.882353,55059,0.0,0.0,0.0
74747,2020-11-20,1118.0,9.0,64259.0,698.0,-31.235294,-25.176471,-9.058824,-4.385714,12.547170,-32.058824,55079,0.0,0.0,0.0
74748,2020-11-20,337.0,2.0,13838.0,151.0,-24.809524,-14.941176,-4.888889,11.936508,9.912698,-26.176471,55101,0.0,0.0,0.0


In [39]:
res.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 74750 entries, 0 to 74749
Data columns (total 12 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   date                            74750 non-null  datetime64[ns]
 1   new_confirmed                   74750 non-null  float64       
 2   new_deceased                    74750 non-null  float64       
 3   cumulative_confirmed            74750 non-null  float64       
 4   cumulative_deceased             74750 non-null  float64       
 5   mobility_transit_stations       74750 non-null  float64       
 6   mobility_retail_and_recreation  74750 non-null  float64       
 7   mobility_grocery_and_pharmacy   74750 non-null  float64       
 8   mobility_parks                  74750 non-null  float64       
 9   mobility_residential            74750 non-null  float64       
 10  mobility_workplaces             74750 non-null  float64       
 11  co

In [8]:
def savitzky_smooth(arr:np.ndarray):
    arr = np.pad(arr,3,mode="edge")

    smoothedPeriod = []
    smoothedD1 = []
    smoothedD2 = []
    savI = range(3,len(arr)-3)
    for i in savI:
        smoothedPeriod.append(1/21*(7*arr[i]
            + 6*arr[i-1] + 6*arr[i+1]
            + 3*arr[i-2] + 3*arr[i+2]
            - 2*arr[i-3] - 2*arr[i+3]))
        smoothedD1.append(1/28*(0*arr[i]
            - 1*arr[i-1] + 1*arr[i+1]
            - 2*arr[i-2] + 2*arr[i+2]
            - 3*arr[i-3] + 3*arr[i+3]))
        smoothedD2.append(1/42*(-4*arr[i]
            - 3*arr[i-1] - 3*arr[i+1]
            + 0*arr[i-2] + 0*arr[i+2]
            + 5*arr[i-3] + 5*arr[i+3]))
    smoothedPeriod = np.array(smoothedPeriod)
    smoothedD1 = np.array(smoothedD1)
    smoothedD2 = np.array(smoothedD2)

    return smoothedPeriod,smoothedD1,smoothedD2

In [20]:
# Perform smoothing and select consecutive periods of 14+ days with at least 20 cases per day
df_smooth = pd.DataFrame()
for group,group_df in res.groupby("county_id"):
    smoothedTotal,smoothedD1,smoothedD2 = savitzky_smooth(group_df["cumulative_confirmed"].values)

    
    mobility = group_df[['mobility_transit_stations',
                    'mobility_retail_and_recreation',
                    'mobility_grocery_and_pharmacy',
                    'mobility_residential',
                    'mobility_parks',
                    'mobility_workplaces']].values
    
    mobility[10:] = mobility[:-10]
    mobility[:10] = 0.0

    group_df[['mobility_transit_stations',
                    'mobility_retail_and_recreation',
                    'mobility_grocery_and_pharmacy',
                    'mobility_residential',
                    'mobility_parks',
                    'mobility_workplaces']] = mobility


    group_df["smoothed_cumul"] = smoothedTotal
    group_df["smoothed_d1"] = smoothedD1
    group_df["smoothed_d2"] = smoothedD2
    select = (group_df.rolling(14)['smoothed_d1'].min() >= 20.0).values
     # Window is given true if all 13 days before + current day are above 20,
     # so we need to include the 13 previous days
    for i in range(len(select)-13):
        if select[i+13]:
            for j in range(13):
                select[i+j] = True
            i += 13
    group_df = group_df[select]
    df_smooth = pd.concat([df_smooth,group_df],ignore_index=True)

res = df_smooth
res

Unnamed: 0,date,new_confirmed,new_deceased,cumulative_confirmed,cumulative_deceased,mobility_transit_stations,mobility_retail_and_recreation,mobility_grocery_and_pharmacy,mobility_parks,mobility_residential,mobility_workplaces,county_id,smoothed_cumul,smoothed_d1,smoothed_d2
0,2020-03-29,11.0,1.0,141.0,3.0,-42.0,-28.0,-6.0,2.0,17.0,-39.0,10003,141.809524,21.892857,3.880952
1,2020-03-30,15.0,0.0,156.0,3.0,-39.0,-26.0,0.0,49.0,17.0,-40.0,10003,166.000000,23.750000,2.785714
2,2020-03-31,41.0,3.0,197.0,6.0,-40.0,-35.0,-9.0,78.0,11.0,-25.0,10003,191.000000,25.892857,2.642857
3,2020-04-01,29.0,0.0,226.0,6.0,-56.0,-38.0,-19.0,17.0,10.0,-30.0,10003,212.238095,38.250000,11.309524
4,2020-04-02,19.0,1.0,245.0,7.0,-52.0,-31.0,-12.0,-49.0,19.0,-45.0,10003,254.857143,46.392857,11.071429
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34516,2020-11-16,288.0,0.0,20617.0,167.0,-31.0,-23.0,-9.0,98.0,14.0,-35.0,55133,20912.666667,777.357143,245.380952
34517,2020-11-17,1141.0,2.0,21758.0,169.0,-34.5,-16.0,-1.0,144.0,4.0,-6.0,55133,21829.952381,893.464286,179.880952
34518,2020-11-18,1391.0,2.0,23149.0,171.0,-38.0,-17.0,-9.0,114.0,3.0,-10.0,55133,23075.476190,906.571429,-96.523810
34519,2020-11-19,1149.0,2.0,24298.0,173.0,-26.0,-17.0,-3.0,74.0,12.0,-32.0,55133,24195.761905,770.071429,-322.095238


In [21]:
res = res[res.columns.drop(["new_confirmed","new_deceased","cumulative_confirmed","cumulative_deceased"])]
res.to_csv("../../data/covid/usa_urban_no_density_smoothed.csv",index=False)
res

Unnamed: 0,date,mobility_transit_stations,mobility_retail_and_recreation,mobility_grocery_and_pharmacy,mobility_parks,mobility_residential,mobility_workplaces,county_id,smoothed_cumul,smoothed_d1,smoothed_d2
0,2020-03-29,-42.0,-28.0,-6.0,2.0,17.0,-39.0,10003,141.809524,21.892857,3.880952
1,2020-03-30,-39.0,-26.0,0.0,49.0,17.0,-40.0,10003,166.000000,23.750000,2.785714
2,2020-03-31,-40.0,-35.0,-9.0,78.0,11.0,-25.0,10003,191.000000,25.892857,2.642857
3,2020-04-01,-56.0,-38.0,-19.0,17.0,10.0,-30.0,10003,212.238095,38.250000,11.309524
4,2020-04-02,-52.0,-31.0,-12.0,-49.0,19.0,-45.0,10003,254.857143,46.392857,11.071429
...,...,...,...,...,...,...,...,...,...,...,...
34516,2020-11-16,-31.0,-23.0,-9.0,98.0,14.0,-35.0,55133,20912.666667,777.357143,245.380952
34517,2020-11-17,-34.5,-16.0,-1.0,144.0,4.0,-6.0,55133,21829.952381,893.464286,179.880952
34518,2020-11-18,-38.0,-17.0,-9.0,114.0,3.0,-10.0,55133,23075.476190,906.571429,-96.523810
34519,2020-11-19,-26.0,-17.0,-3.0,74.0,12.0,-32.0,55133,24195.761905,770.071429,-322.095238


In [79]:
res.iloc[200:250]

Unnamed: 0,date,mobility_transit_stations,mobility_retail_and_recreation,mobility_grocery_and_pharmacy,mobility_parks,mobility_residential,mobility_workplaces,county_id,smoothed_cumul,smoothed_d1,smoothed_d2
200,2020-10-15,-30.0,-14.0,-6.0,39.0,11.0,-37.0,10003,11864.904762,78.214286,13.47619
201,2020-10-16,-39.0,-22.0,-14.0,-34.0,14.0,-38.0,10003,11956.761905,82.107143,4.119048
202,2020-10-17,-21.0,-14.0,-2.0,83.0,2.0,-8.0,10003,12052.571429,84.392857,-5.928571
203,2020-10-18,-28.0,-16.0,-6.0,57.0,2.0,-21.0,10003,12143.142857,77.107143,-13.785714
204,2020-10-19,-37.0,-17.0,-8.0,12.0,10.0,-38.0,10003,12208.190476,69.892857,-7.02381
205,2020-10-20,-35.0,-15.0,-7.0,26.0,11.0,-38.0,10003,12265.095238,67.964286,2.595238
206,2020-10-21,-34.0,-15.0,-7.0,21.0,11.0,-37.0,10003,12326.380952,70.857143,9.095238
207,2020-10-22,-33.0,-15.0,-7.0,28.0,11.0,-37.0,10003,12404.52381,75.785714,5.238095
208,2020-10-23,-34.0,-16.0,-7.0,7.0,11.0,-37.0,10003,12480.095238,83.75,8.595238
209,2020-10-24,-24.0,-16.0,-5.0,84.0,2.0,-9.0,10003,12578.857143,86.214286,-2.428571
