# Why  are some US counties more affected than others? Exploring the spread of Covid-19 in the US counties



<img src='../Data/images/cv.jpg'>

## Import the Libraries

In [72]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from datetime import datetime, timedelta # date time

#Importing Data plotting libraries
import matplotlib.pyplot as plt     
import seaborn as sns      
import plotly.express as px
import plotly.graph_objects as go

## Load the Datasets

> Source: <a href= "https://github.com/nytimes/covid-19-data"> NY-Times Covid-19 Data </a>

In [73]:
'''
Potential Questions?
1. Why are some counties more affected than the other?
2. Are the counties in low income bracket more affected?
'''
state_level = pd.read_csv("../Data/NY_times/us-states.csv")

## Let's Analyze the state-level data

In [74]:
# No null values in the data
state_level.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5569 entries, 0 to 5568
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   date    5569 non-null   object
 1   state   5569 non-null   object
 2   fips    5569 non-null   int64 
 3   cases   5569 non-null   int64 
 4   deaths  5569 non-null   int64 
dtypes: int64(3), object(2)
memory usage: 217.7+ KB


In [75]:
state_level.head()

Unnamed: 0,date,state,fips,cases,deaths
0,2020-01-21,Washington,53,1,0
1,2020-01-22,Washington,53,1,0
2,2020-01-23,Washington,53,1,0
3,2020-01-24,Illinois,17,1,0
4,2020-01-24,Washington,53,1,0


In [76]:
print("The data we have is from {} to {}".format(state_level['date'].min(), state_level['date'].max()))
print("The date on which the first covid-19 case was recorded: {}".format(state_level[state_level['cases']>0]['date'].iloc[0]))
print("The date on which the first death from covid-19 was recorded: {}".format(state_level[state_level['deaths']>0]['date'].iloc[0]))

The data we have is from 2020-01-21 to 2020-06-11
The date on which the first covid-19 case was recorded: 2020-01-21
The date on which the first death from covid-19 was recorded: 2020-02-29


### Let's convert state names to state codes (abbreviations)

In [77]:
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

state_level['state_code'] = state_level['state'].map(us_state_abbrev)
state_level.head()

Unnamed: 0,date,state,fips,cases,deaths,state_code
0,2020-01-21,Washington,53,1,0,WA
1,2020-01-22,Washington,53,1,0,WA
2,2020-01-23,Washington,53,1,0,WA
3,2020-01-24,Illinois,17,1,0,IL
4,2020-01-24,Washington,53,1,0,WA


### Plotting a Running Map for observing the spread of COVID-19 Confirmed Cases

In [147]:
statewise_group = state_level.groupby(['date', 'state_code', 'fips'])[['cases', 'deaths']].max().reset_index()
statewise_group['ob_date'] = pd.to_datetime(statewise_group['date'])
statewise_group['ob_date'] = statewise_group['ob_date'].dt.strftime('%m/%d/%Y')
statewise_group = statewise_group.sort_values('ob_date')

statewise_group.loc[:, 'log_ConfirmedCases'] = np.log(statewise_group.cases + 1)

fig = px.choropleth(locations=statewise_group['state_code'],
                    color=statewise_group["log_ConfirmedCases"], 
                    locationmode="USA-states",
                    scope="usa",
                    animation_frame=statewise_group['ob_date'],
                    color_continuous_scale=px.colors.sequential.Viridis,
                   )

layout = go.Layout(
    title=go.layout.Title(
        text="The spread of Covid-19 cases in the US states",
        x=0.5
    ),
    font=dict(size=14),
)

fig.update_layout(layout)
fig.show()

### Plotting a Running Map for observing the fatalities of COVID-19 

In [236]:
statewise_deaths = statewise_group[statewise_group['deaths']>0]

fig = px.choropleth(locations=statewise_deaths['state_code'],
                    color=statewise_deaths["deaths"], 
                    locationmode="USA-states",
                    scope="usa",
                    animation_frame=statewise_deaths['ob_date'],
                    color_continuous_scale=px.colors.sequential.Viridis
                   )

layout = go.Layout(
    title=go.layout.Title(
        text="The deaths in the US states due to Covid-19",
        x=0.5
    ),
    font=dict(size=14),
)

fig.update_layout(layout)
fig.show()


## Let's Analyze county-level data

> Source: 
1. <a href="https://usafacts.org/visualizations/coronavirus-covid-19-spread-map/"> USA Facts </a>
2. <a href="https://www.kaggle.com/roche-data-science-coalition/uncover?"> United Network for COVID Data Exploration and Research </a>
3. <a href = "https://www.ers.usda.gov/data-products.aspx"> USDA ERS </a>

In [47]:
today_date = datetime.today()- timedelta(days=2)
today_date = today_date.strftime('%m/%d/%y')[1:]

'''
Get the relevant columns only.
Set the same column names for all the county-level data. Easy of use.
'''
county_cases = pd.read_csv("../Data/USA_facts/covid_confirmed_usafacts.csv")[['countyFIPS', 'County Name', 'State', today_date]]
county_cases.columns = ['cfips', 'county', 'state', 'cases']
county_cases = county_cases.set_index('cfips')

county_deaths = pd.read_csv("../Data/USA_facts/covid_deaths_usafacts.csv")[['countyFIPS', today_date]]
county_deaths.columns = ['cfips', 'deaths']
county_deaths = county_deaths.set_index('cfips')

county_population = pd.read_csv("../Data/USA_facts/covid_county_population_usafacts.csv")[['countyFIPS', 'population']]
county_population.columns = ['cfips', 'population']
county_population = county_population.set_index('cfips')

county_level = county_cases.join(county_deaths)
county_level = county_level.join(county_population)

county_level = county_level[county_level.index>999]
county_level = county_level[county_level['population']>0]

del county_cases, county_population, county_deaths

#Let's add a few more details such as mortality, deaths per million, cases per million 
county_level['mortality'] = county_level['deaths']/county_level['cases']
county_level['mortality'] = county_level['mortality'].fillna(0) # in case of null values

county_level['deaths_per_mil'] = county_level['deaths'] * 1000000/county_level['population']
county_level['cases_per_mil'] = county_level['cases'] * 1000000/county_level['population']

county_level = county_level.reset_index()

county_level.head()

Unnamed: 0,cfips,county,state,cases,deaths,population,mortality,deaths_per_mil,cases_per_mil
0,1001,Autauga County,AL,312,6,55869.0,0.019231,107.394083,5584.492294
1,1003,Baldwin County,AL,343,9,223234.0,0.026239,40.316439,1536.504296
2,1005,Barbour County,AL,214,1,24686.0,0.004673,40.50879,8668.881147
3,1007,Bibb County,AL,93,1,22394.0,0.010753,44.654818,4152.898098
4,1009,Blount County,AL,87,1,57826.0,0.011494,17.293259,1504.513541


In [134]:
# Adding more county level data to understand the Covid-19 spread in the US counties
county_health = pd.read_csv("../Data/uncover/us-county-health-rankings-2020.csv")[['fips', 'percent_smokers', 
'percent_adults_with_obesity', 'percent_physically_inactive', 'percent_excessive_drinking', 'percent_uninsured',
'num_primary_care_physicians', 'num_mental_health_providers', 'high_school_graduation_rate', 'percent_some_college',
'percent_unemployed', 'percent_children_in_poverty', 'life_expectancy']]
county_health = county_health.rename(columns={'fips': 'cfips'})

county_poverty = pd.read_csv("../Data/USDA/PovertyEstimates.csv")[["FIPStxt", "PCTPOVALL_2018", "MEDHHINC_2018"]]
county_poverty = county_poverty.rename(columns={'FIPStxt': 'cfips', 'PCTPOVALL_2018': 'percent_poverty', 
                                               'MEDHHINC_2018': 'median_hincome'})

county_level = county_level.merge(county_health)
county_level = county_level.merge(county_poverty)
county_level['primary_care_per_pop'] = county_level['num_primary_care_physicians'] *100 /county_level['population']
county_level['primary_care_per_pop'] = county_level['primary_care_per_pop'].fillna(0)
county_level.head()

Unnamed: 0,cfips,county,state,cases,deaths,population,mortality,deaths_per_mil,cases_per_mil,percent_smokers,...,num_primary_care_physicians,num_mental_health_providers,high_school_graduation_rate,percent_some_college,percent_unemployed,percent_children_in_poverty,life_expectancy,percent_poverty,median_hincome,primary_care_per_pop
0,1001,Autauga County,AL,312,6,55869.0,0.019231,107.394083,5584.492294,18.081557,...,25.0,13.0,90.0,62.009974,3.629079,19.3,76.879477,13.8,59338,0.044748
1,1003,Baldwin County,AL,343,9,223234.0,0.026239,40.316439,1536.504296,17.489033,...,155.0,210.0,86.361577,67.37162,3.615382,13.9,78.450258,9.8,57588,0.069434
2,1005,Barbour County,AL,214,1,24686.0,0.004673,40.50879,8668.881147,21.999985,...,8.0,2.0,81.410256,34.857649,5.171384,43.9,75.341935,30.9,34382,0.032407
3,1007,Bibb County,AL,93,1,22394.0,0.010753,44.654818,4152.898098,19.1142,...,11.0,5.0,83.763838,44.137353,3.971828,27.8,73.57182,21.8,46064,0.04912
4,1009,Blount County,AL,87,1,57826.0,0.011494,17.293259,1504.513541,19.208672,...,13.0,9.0,93.468795,53.361073,3.511157,18.0,74.145826,13.2,50412,0.022481


### Counties with most cases

In [123]:
cases_most = county_level.groupby(['county', 'state'])['cases'].max().reset_index()
cases_most = cases_most.sort_values('cases', ascending=False).head(10)

fig = px.bar(cases_most, x='cases', y='county', color='state')
fig.update_layout(
    title="Counties with most number of Cases",
    xaxis_title="Total number of cases",
    yaxis_title="Counties",
    font=dict(
        family="Courier New, monospace",
        size=15,
        color="#7f7f7f")
)
fig.show()

### Counties with most cases per million

In [122]:
cases_per_mil = county_level.groupby(['county', 'state'])['cases_per_mil'].max().reset_index()
cases_per_mil = cases_per_mil.sort_values('cases_per_mil', ascending=False).head(10)

fig = px.bar(cases_per_mil, x='cases_per_mil', y='county', color='state')
fig.update_layout(
    title="Counties with most number of Cases per million",
    xaxis_title="cases per million",
    yaxis_title="Counties",
    font=dict(
        family="Courier New, monospace",
        size=15,
        color="#7f7f7f")
)
fig.show()

### Counties with most deaths

In [125]:
deaths_most = county_level.groupby(['county', 'state'])['deaths'].max().reset_index()
deaths_most = deaths_most.sort_values('deaths', ascending=False).head(10)

fig = px.bar(deaths_most, x='deaths', y='county', color='state')
fig.update_layout(
    title="Counties with most number of Deaths",
    xaxis_title="Total number of deaths",
    yaxis_title="Counties",
    font=dict(
        family="Courier New, monospace",
        size=15,
        color="#7f7f7f")
)
fig.show()

### Counties with most deaths per million

In [145]:
deaths_per_mil = county_level.groupby(['county', 'state'])['deaths_per_mil'].max().reset_index()
deaths_per_mil = deaths_per_mil.sort_values('deaths_per_mil', ascending=False).head(10)

fig = px.bar(deaths_per_mil, x='deaths_per_mil', y='county', color='state')
fig.update_layout(
    title="Counties with most number of Deaths per million",
    xaxis_title="Total number of deaths per million",
    yaxis_title="Counties",
    font=dict(
        family="Courier New, monospace",
        size=15,
        color="#7f7f7f")
)
fig.show()

### Feature Correlation

In [146]:
county_level.iloc[:,:].corr().style.background_gradient(cmap='Reds').format("{:.3f}")

Unnamed: 0,cfips,cases,deaths,population,mortality,deaths_per_mil,cases_per_mil,percent_smokers,percent_adults_with_obesity,percent_physically_inactive,percent_excessive_drinking,percent_uninsured,num_primary_care_physicians,num_mental_health_providers,high_school_graduation_rate,percent_some_college,percent_unemployed,percent_children_in_poverty,life_expectancy,percent_poverty,primary_care_per_pop
cfips,1.0,-0.027,-0.016,-0.056,-0.009,-0.103,-0.083,-0.086,-0.031,-0.111,0.075,0.153,-0.055,-0.065,0.151,0.005,-0.107,-0.094,0.035,-0.087,-0.01
cases,-0.027,1.0,0.913,0.748,0.082,0.432,0.288,-0.142,-0.172,-0.124,0.078,-0.076,0.755,0.704,-0.098,0.129,-0.012,-0.062,0.153,-0.059,0.148
deaths,-0.016,0.913,1.0,0.568,0.111,0.483,0.246,-0.112,-0.15,-0.086,0.062,-0.081,0.577,0.545,-0.09,0.109,-0.004,-0.041,0.128,-0.039,0.122
population,-0.056,0.748,0.568,1.0,0.068,0.2,0.112,-0.188,-0.22,-0.224,0.126,-0.076,0.964,0.903,-0.116,0.197,-0.037,-0.113,0.185,-0.104,0.199
mortality,-0.009,0.082,0.111,0.068,1.0,0.423,0.066,0.013,-0.016,-0.002,0.025,-0.086,0.071,0.069,-0.031,0.046,0.056,0.012,-0.014,-0.009,0.055
deaths_per_mil,-0.103,0.432,0.483,0.2,0.423,1.0,0.553,0.029,0.005,0.021,-0.068,-0.053,0.219,0.208,-0.117,-0.025,0.1,0.121,-0.022,0.113,0.063
cases_per_mil,-0.083,0.288,0.246,0.112,0.066,0.553,1.0,0.058,0.049,0.045,-0.076,0.036,0.131,0.121,-0.108,-0.118,0.03,0.113,-0.009,0.134,0.005
percent_smokers,-0.086,-0.142,-0.112,-0.188,0.013,0.029,0.058,1.0,0.492,0.534,-0.421,0.093,-0.214,-0.206,-0.096,-0.535,0.439,0.633,-0.712,0.662,-0.222
percent_adults_with_obesity,-0.031,-0.172,-0.15,-0.22,-0.016,0.005,0.049,0.492,1.0,0.56,-0.301,0.06,-0.265,-0.26,-0.023,-0.372,0.24,0.38,-0.486,0.368,-0.263
percent_physically_inactive,-0.111,-0.124,-0.086,-0.224,-0.002,0.021,0.045,0.534,0.56,1.0,-0.481,0.232,-0.26,-0.254,0.046,-0.491,0.22,0.509,-0.571,0.455,-0.336


### Does Poverty percentage affect the spread of Covid-19

In [126]:
poverty = county_level.groupby(['county', 'state'])[['cases', 'deaths', 'percent_unemployed', 'percent_children_in_poverty',
                                                       'percent_poverty', 'median_hincome']].max().reset_index()
least_poverty = poverty.sort_values('percent_poverty').head(10)
most_poverty = poverty.sort_values('percent_poverty', ascending = False).head(10)

# Create traces
fig = go.Figure()
fig.add_trace(go.Scatter(x=least_poverty['county'], y=least_poverty['cases'],
                    mode='lines',
                    name='Least Poverty percent counties'))
fig.add_trace(go.Scatter(x=most_poverty['county'], y=most_poverty['cases'],
                    mode='lines+markers',
                    name='Most Poverty percent counties'))
# Edit the layout
fig.update_layout(title='Variation of Covid-19 cases among the top 10 counties with lowest and highest poverty percent',
                   xaxis_title='Counties',
                   yaxis_title='Total number of Cases')
fig.show()

## Let's Analyze County level data from NY times

In [221]:
us_county_data = pd.read_csv("../Data/NY_times/us-counties.csv")
us_county_data.head()

Unnamed: 0,date,county,state,fips,cases,deaths
0,2020-01-21,Snohomish,Washington,53061.0,1,0
1,2020-01-22,Snohomish,Washington,53061.0,1,0
2,2020-01-23,Snohomish,Washington,53061.0,1,0
3,2020-01-24,Cook,Illinois,17031.0,1,0
4,2020-01-24,Snohomish,Washington,53061.0,1,0


### Number of counties affected over time

In [231]:
us_county_nums = us_county_data.groupby('date')['county'].agg('count').reset_index()

fig = px.scatter(us_county_nums, x='date', y='county')
fig.update_layout(title='Number of counties affected over time',
                 xaxis_title='Date',
                 yaxis_title='Number of Counties')
fig.show()

### Number of Cases over time

In [233]:
us_cases_nums = us_county_data.groupby('date')['cases'].agg('sum').reset_index()

fig = px.scatter(us_cases_nums, x='date', y='cases')
fig.update_layout(title='Number of cases over time',
                 xaxis_title='Date',
                 yaxis_title='Number of Cases')
fig.show()

### Number of deaths over time

In [235]:
us_deaths_nums = us_county_data.groupby('date')['deaths'].agg('sum').reset_index()

fig = px.scatter(us_deaths_nums, x='date', y='deaths')
fig.update_layout(title='Number of deaths over time',
                 xaxis_title='Date',
                 yaxis_title='Number of Cases')
fig.show()

## Let's Analyze the social distancing data

> Source: <a href = "https://socialdistancing.stanford.edu/"> Stanford </a>

In [209]:
# Social Distancing data is available for only a few counties
social_distance = pd.read_csv("../Data/stanford/Crowdsourced COVID-19 Intervention Data - Stanford Team + Crowdsourced Data.csv")
social_distance.head()

Unnamed: 0,state,county,business_closed,business_closed_date,business_closed_url,business_open,business_open_date,business_open_url,school_closed,school_closed_date,...,religion_closed_url,religion_open,religion_open_date,religion_open_url,lockdown_closed,lockdown_closed_date,lockdown_closed_url,lockdown_open,lockdown_open_date,lockdown_open_url
0,Alabama,Autauga County,Yes,2020-03-28,https://governor.alabama.gov/assets/2020/04/Fi...,No,,,Yes,2020-03-15,...,https://www.wsfa.com/2020/03/13/churches-cance...,No,,,Yes,2020-04-04,https://www.wtva.com/content/news/Alabama-gove...,No,,
1,Alabama,Baldwin County,Yes,2020-03-28,https://bloximages.newyork1.vip.townnews.com/f...,No,,,Yes,2020-03-18,...,https://bloximages.newyork1.vip.townnews.com/f...,No,,,Yes,2020-04-04,https://www.alabamanews.net/2020/04/28/alabama...,No,,
2,Alabama,Madison County,Yes,2020-03-27,https://whnt.com/news/coronavirus/list-of-busi...,No,,,Yes,2020-03-19,...,https://www.waaytv.com/content/news/Church-ser...,No,,,Yes,2020-04-03,https://abc3340.com/news/coronavirus/alabama-g...,No,,
3,Alabama,Tuscaloosa County,Yes,2020-03-28,https://www.al.com/news/2020/03/all-non-essent...,No,,,Yes,2020-03-18,...,https://www.al.com/news/2020/03/religious-cong...,No,,,Yes,2020-04-04,https://www.tuscaloosanews.com/news/20200403/a...,No,,
4,Arizona,Coconino County,Yes,2020-03-31,https://www.fisherphillips.com/resources-alert...,No,,,Yes,2020-03-16,...,https://www.youtube.com/watch?v=tgU-8npxP6U&fb...,No,,,Yes,2020-03-31,https://www.12news.com/article/news/health/cor...,No,,


### Let's look at New York Data

In [202]:
social_distance_ny = social_distance[social_distance['state'] == 'New York']
social_distance_ny = social_distance_ny[social_distance_ny['business_closed_date'] == '2020-03-22']

print("""The following things were closed in New York counties by 22nd March, 2020: \n1. Businesses \n2. Schools
3. Religious Gatherings""")
social_distance_ny

The following things were closed in New York counties by 22nd March, 2020: 
1. Businesses 
2. Schools
3. Religious Gatherings


Unnamed: 0,state,county,business_closed,business_closed_date,business_closed_url,business_open,business_open_date,business_open_url,school_closed,school_closed_date,...,religion_closed_url,religion_open,religion_open_date,religion_open_url,lockdown_closed,lockdown_closed_date,lockdown_closed_url,lockdown_open,lockdown_open_date,lockdown_open_url
233,New York,Albany County,Yes,2020-03-22,https://www.albanycounty.com/departments/healt...,No,,,Yes,2020-03-14,...,https://www.albanycounty.com/departments/healt...,No,,,Yes,2020-03-22,https://www.albanycounty.com/departments/healt...,No,,
234,New York,Bronx County,Yes,2020-03-22,https://www.bloomberg.com/news/articles/2020-0...,No,,,Yes,2020-03-14,...,https://www.nytimes.com/2020/03/12/nyregion/co...,No,,,Yes,2020-03-20,https://ny.curbed.com/2020/3/20/21187022/coron...,No,,
235,New York,Chautauqua County,Yes,2020-03-22,https://covid19.healthdata.org/united-states-o...,No,,,Yes,2020-03-18,...,https://www.facebook.com/HarvestChapelFMC/?__t...,No,,,Yes,2020-03-22,https://covid19.healthdata.org/united-states-o...,No,,
238,New York,Kings County,Yes,2020-03-22,https://www.governor.ny.gov/news/governor-cuom...,No,,,Yes,2020-03-14,...,https://www.nytimes.com/2020/03/12/nyregion/co...,No,,,Yes,2020-03-22,https://gothamist.com/news/new-york-pause-coro...,No,,
240,New York,Nassau County,Yes,2020-03-22,https://www.governor.ny.gov/news/governor-cuom...,No,,,Yes,2020-03-16,...,https://www.governor.ny.gov/news/no-2023-conti...,No,,,Yes,2020-03-20,https://www.theguardian.com/us-news/2020/mar/2...,No,,
243,New York,Onondaga County,Yes,2020-03-22,https://www.syracuse.com/coronavirus/2020/03/n...,No,,,Yes,2020-03-14,...,https://ny.curbed.com/2020/3/20/21187022/coron...,No,,,Yes,2020-03-20,https://ny.curbed.com/2020/3/20/21187022/coron...,No,,
246,New York,Queens County,Yes,2020-03-22,https://www.governor.ny.gov/news/governor-cuom...,No,,,Yes,2020-03-14,...,https://abc7ny.com/religion/mass-canceled-in-b...,No,,,Yes,2020-03-22,https://www.governor.ny.gov/news/governor-cuom...,No,,
247,New York,St. Lawrence County,Yes,2020-03-22,https://www.governor.ny.gov/news/governor-cuom...,No,,,Yes,2020-03-14,...,https://www.facebook.com/MassenaCatholicCommun...,No,,,Yes,2020-03-22,https://covid19.healthdata.org/united-states-o...,No,,
248,New York,Suffolk County,Yes,2020-03-22,https://www.governor.ny.gov/news/governor-cuom...,No,,,Yes,2020-03-13,...,https://www.newsday.com/news/health/religious-...,No,,,Yes,2020-03-22,https://patch.com/new-york/new-york-city/new-y...,No,,
249,New York,Tompkins County,Yes,2020-03-22,https://ithacavoice.com/2020/03/new-york-state...,No,,,Yes,2020-03-13,...,https://www.localsyr.com/bridge-street/worship...,No,,,Yes,2020-03-22,https://tompkinscountyny.gov/health/factsheets...,No,,


### For the 11 counties in New York, let's study the spread of cases from 22nd March, 2020

In [215]:
# Before the lockdown
ny_county = us_county_data[(us_county_data['county'].isin(['Albany', 'Bronx', 'Chautauqua', 'Kings', 'Nassau',
'Onondaga', 'Queens', 'St. Lawrence', 'Suffolk', 'Tompkins', 'Ulster'])) & (us_county_data['state'] == 'New York')]
ny_county = ny_county[ny_county['date'] < '2020-03-22']

ny_county_cases = ny_county.groupby(['date', 'county'])['cases'].max().reset_index()

fig = px.line(ny_county_cases, x='date', y='cases', color='county')
fig.update_layout(title = 'Spread of Covid-19 cases before the lockdown was announced', 
                 xaxis_title='Dates',
                 yaxis_title='Number of cases')
fig.show()

In [216]:
# after the lockdown was announced
ny_county = us_county_data[(us_county_data['county'].isin(['Albany', 'Bronx', 'Chautauqua', 'Kings', 'Nassau',
'Onondaga', 'Queens', 'St. Lawrence', 'Suffolk', 'Tompkins', 'Ulster'])) & (us_county_data['state'] == 'New York')]
ny_county = ny_county[ny_county['date'] >= '2020-03-22']

ny_county_cases = ny_county.groupby(['date', 'county'])['cases'].max().reset_index()

fig = px.line(ny_county_cases, x='date', y='cases', color='county')
fig.update_layout(title = 'Spread of Covid-19 cases after the lockdown was announced', 
                 xaxis_title='Dates',
                 yaxis_title='Number of cases')
fig.show()


### Let's look at Washington's Data

In [212]:
washington_sd = social_distance[social_distance['state'] == 'Washington']
washington_sd

Unnamed: 0,state,county,business_closed,business_closed_date,business_closed_url,business_open,business_open_date,business_open_url,school_closed,school_closed_date,...,religion_closed_url,religion_open,religion_open_date,religion_open_url,lockdown_closed,lockdown_closed_date,lockdown_closed_url,lockdown_open,lockdown_open_date,lockdown_open_url
393,Washington,Benton County,Yes,2020-03-25,https://www.thenewstribune.com/news/politics-g...,No,,,Yes,2020-03-16,...,https://www.facebook.com/C3TriCities/,No,,,Yes,2020-03-25,https://www.governor.wa.gov/sites/default/file...,No,,
394,Washington,Chelan County,Yes,2020-03-17,https://www.co.chelan.wa.us/notifications/arti...,No,,,Yes,2020-03-17,...,https://www.wenatcheeworld.com/news/coronaviru...,No,,,Yes,2020-03-17,https://www.co.chelan.wa.us/notifications/arti...,No,,
395,Washington,Clark County,Yes,2020-03-25,https://www.columbian.com/news/2020/mar/24/was...,No,,,Yes,2020-03-16,...,https://www.governor.wa.gov/sites/default/file...,No,,,Yes,2020-03-25,https://www.governor.wa.gov/sites/default/file...,No,,
396,Washington,Jefferson County,Yes,2020-03-15,https://www.ptleader.com/stories/restaurants-b...,No,,,Yes,2020-03-17,...,https://www.ptleader.com/stories/covid-19-live...,No,,,Yes,2020-03-22,https://www.ptleader.com/stories/covid-19-live...,No,,
397,Washington,King County,Yes,2020-03-23,https://www.doh.wa.gov/Emergencies/BePreparedB...,No,,,Yes,2020-03-13,...,https://www.nytimes.com/2020/03/11/us/coronavi...,No,,,Yes,2020-03-23,https://medium.com/wagovernor/inslee-announces...,No,,
398,Washington,Kitsap County,Yes,2020-03-25,https://www.governor.wa.gov/sites/default/file...,No,,,Yes,2020-03-16,...,https://www.seattletimes.com/seattle-news/stre...,No,,,Yes,2020-03-25,https://www.governor.wa.gov/sites/default/file...,No,,
399,Washington,Pierce County,Yes,2020-03-23,https://medium.com/wagovernor/inslee-announces...,No,,,Yes,2020-03-17,...,https://www.thenewstribune.com/news/state/wash...,No,,,Yes,2020-03-23,https://medium.com/wagovernor/inslee-announces...,No,,
400,Washington,San Juan County,Yes,2020-03-15,https://www.ptleader.com/stories/restaurants-b...,No,,,Yes,2020-03-17,...,https://www.ptleader.com/stories/covid-19-live...,No,,,Yes,2020-03-22,https://www.ptleader.com/stories/covid-19-live...,No,,
401,Washington,Snohomish County,Yes,2020-03-23,https://medium.com/wagovernor/inslee-announces...,No,,,Yes,2020-03-17,...,https://www.king5.com/article/news/health/coro...,No,,,Yes,2020-03-22,https://mynorthwest.com/1777834/everett-mayor-...,No,,
402,Washington,Thurston County,Yes,2020-03-25,https://www.kiro7.com/news/local/coronavirus-w...,No,,,Yes,2020-03-16,...,http://www.nwcatholic.org/news/local/archbisho...,No,,,Yes,2020-03-25,https://www.governor.wa.gov/sites/default/file...,No,,


In [217]:
# before the lockdown was announced
washington_county = us_county_data[(us_county_data['county'].isin(['Benton', 'Chelan', 'Clark', 'Jefferson', 
'King', 'Kitsap', 'Pierce', 'San Juan', 'Snohomish', 'Thurston', 'Walla Walla', 'Whatcom'])) & (us_county_data['state'] == 'Washington')]
washington_county = washington_county[washington_county['date'] < '2020-03-15']

washington_county_cases = washington_county.groupby(['date', 'county'])['cases'].max().reset_index()

fig = px.line(washington_county_cases, x='date', y='cases', color='county')
fig.update_layout(title = 'Spread of Covid-19 cases before the lockdown was announced', 
                 xaxis_title='Dates',
                 yaxis_title='Number of cases')
fig.show()

In [218]:
# after the lockdown was announced
washington_county = us_county_data[(us_county_data['county'].isin(['Benton', 'Chelan', 'Clark', 'Jefferson', 
'King', 'Kitsap', 'Pierce', 'San Juan', 'Snohomish', 'Thurston', 'Walla Walla', 'Whatcom'])) & (us_county_data['state'] == 'Washington')]
washington_county = washington_county[washington_county['date'] >= '2020-03-15']

washington_county_cases = washington_county.groupby(['date', 'county'])['cases'].max().reset_index()

fig = px.line(washington_county_cases, x='date', y='cases', color='county')
fig.update_layout(title = 'Spread of Covid-19 cases after the lockdown was announced', 
                 xaxis_title='Dates',
                 yaxis_title='Number of cases')
fig.show()