In [1]:
import pandas as pd
import numpy as np
import altair as alt
from vega_datasets import data
import plotly as py
import plotly.figure_factory as ff

In [2]:
#Uploading my data
geo = pd.read_csv('4-21_distribution.csv', encoding = "ISO-8859-1")
US_states = pd.read_csv('us-states.csv')
geo.columns = map(str.lower, geo.columns)
geo["date"] = pd.to_datetime(geo['daterep'])
geo["percentagecasesincrease"] = geo['cases']/geo['popdata2018']
geo["percentagedeathsincrease"] = geo['deaths']/geo['popdata2018']

In [3]:
#Obtaining top countries covid_19 counts
dropped_columns = ["day", "month", "year", "popdata2018", 'percentagecasesincrease', 'percentagedeathsincrease']
total = geo.groupby(["countriesandterritories"], as_index = False).agg(sum).drop(columns = dropped_columns)
total.sort_values(by = "cases", ascending = False).query("cases > 1000").head(5)
totals_filtered = total.sort_values(by = "cases", ascending = False).query("cases > 5000")
totals_filtered.head(5)

Unnamed: 0,countriesandterritories,cases,deaths
198,United_States_of_America,787752,42539
177,Spain,200210,20852
98,Italy,181228,24114
74,Germany,143457,4598
195,United_Kingdom,124743,16509


In [4]:
#Filtering original table to only include countries with total cases greater than 1000
relevant_countries = total.sort_values(by = "cases", ascending = False).query("cases > 20000")['countriesandterritories'].values
filtered_geo = geo.loc[geo["countriesandterritories"].isin(relevant_countries)]
filtered_geo[filtered_geo["countriesandterritories"] == "United_States_of_America"].head(5)

Unnamed: 0,daterep,day,month,year,cases,deaths,countriesandterritories,geoid,countryterritorycode,popdata2018,continentexp,date,percentagecasesincrease,percentagedeathsincrease
11745,4/21/20,21,4,2020,28065,1857,United_States_of_America,US,USA,327167434.0,America,2020-04-21,8.6e-05,6e-06
11746,4/20/20,20,4,2020,24601,1772,United_States_of_America,US,USA,327167434.0,America,2020-04-20,7.5e-05,5e-06
11747,4/19/20,19,4,2020,32922,1856,United_States_of_America,US,USA,327167434.0,America,2020-04-19,0.000101,6e-06
11748,4/18/20,18,4,2020,30833,3770,United_States_of_America,US,USA,327167434.0,America,2020-04-18,9.4e-05,1.2e-05
11749,4/17/20,17,4,2020,31667,2299,United_States_of_America,US,USA,327167434.0,America,2020-04-17,9.7e-05,7e-06


In [5]:
#Obtaining chart that shows day by day how many cases are added 
#and what the distribution of cases are by top countries.
chart = alt.Chart(filtered_geo).mark_area().encode(
    alt.X('date:T',
        axis=alt.Axis(domain=False, tickSize=0)
    ),
    alt.Y('sum(cases):Q'),
    alt.Color('countriesandterritories:N',
        scale=alt.Scale(scheme='category20b')
    )
).interactive(
).properties(
    width = 800,
    height = 500,
    title = "Daily Covid 19 Cases in Top Countries"
)

chart.configure_title(
    fontSize=20,
    font='Courier',
    color='black'
).display(renderer = 'svg')

In [6]:
#Obtaining chart that shows day by day how many deaths are added 
#and what the distribution of cases are by top countries.
chart_deaths = alt.Chart(filtered_geo).mark_area().encode(
    alt.X('date:T',
        axis=alt.Axis(domain=False, tickSize=0)
    ),
    alt.Y('sum(deaths):Q'),
    alt.Color('countriesandterritories:N',
        scale=alt.Scale(scheme='category20b')
    )
).interactive(
).properties(
    width = 800,
    height = 500,
    title = "Daily Covid 19 Deaths in Top Countries"
)

chart_deaths.configure_title(
    fontSize=20,
    font='Courier',
    color='black'
).display(renderer = 'svg')

In [7]:
#Obtaining chart that shows day by day how many cases are added and what the distribution of cases are by top countries.
corona_cases = alt.Chart(filtered_geo).mark_bar(size = 50, color = 'darkgreen').encode(
    y = 'sum(cases):Q',
    x = alt.X('countriesandterritories:N')
).interactive(
).properties(
    width = 800,
    height = 500,
    title = "Total Covid 19 Cases in Top Countries"
)

corona_deaths = alt.Chart(filtered_geo).mark_bar(size = 50, color = 'darkred').encode(
    y = alt.Y('sum(deaths):Q'),
    x = alt.X('countriesandterritories:N',
        axis=alt.Axis(domain=False, tickSize=0))
).interactive(
).properties(
    width = 800,
    height = 500,
    title = "Total Covid 19 Cases and Deaths in Top Countries"
)

(corona_cases + corona_deaths).configure_title(
    fontSize=20,
    font='Courier',
    color='black'
).display(renderer = 'svg')

In [8]:
#Creating a table of cumulative cases and deaths in top countries
geo_cumulative = geo[['countriesandterritories', 'date', 'cases', 'deaths']]
geo_cumulative = geo_cumulative.groupby(['countriesandterritories', 'date']).sum()
geo_cumulative = geo_cumulative.groupby(level=0).cumsum().reset_index()
filtered_geo_cumulative = geo_cumulative.loc[geo_cumulative["countriesandterritories"].isin(relevant_countries)]

In [9]:
#Obtaining top countries covid_19 counts
aggregate_total = geo.groupby(["date"], as_index = False).agg(sum)
aggregate_total = aggregate_total.drop(columns = ["day", "month", "year", "popdata2018"])
aggregate_total.tail(5)

Unnamed: 0,date,cases,deaths,percentagecasesincrease,percentagedeathsincrease
108,2020-04-17,84339,8507,0.004539,0.000326
109,2020-04-18,83324,8263,0.003097,0.000242
110,2020-04-19,84121,6421,0.003637,0.000199
111,2020-04-20,74139,5145,0.002866,0.00019
112,2020-04-21,76037,5203,0.003432,0.000199


In [10]:
USA_data = filtered_geo[filtered_geo["countriesandterritories"] == "United_States_of_America"]
USA_data = USA_data.drop(columns = ['percentagecasesincrease', 'percentagedeathsincrease'])
case_percentages = USA_data.sort_values('date')['cases'].pct_change().sort_index(ascending = True).values * 100
death_percentages = USA_data.sort_values('date')['deaths'].pct_change().sort_index(ascending = True).values * 100
USA_data['casespercentagegrowth%'] = case_percentages
USA_data['deathpercentages%'] = death_percentages

In [11]:
chart_cases = alt.Chart(USA_data).mark_area(color = 'darkblue').encode(
    alt.X('date:T',
        axis=alt.Axis(domain=False, tickSize=0)
    ),
    alt.Y('casespercentagegrowth%:Q')
).interactive(
).properties(
    width = 800,
    height = 500,
    title = "Daily %Change USA Covid 19 Cases"
)

chart_cases.configure_title(
    fontSize=20,
    font='Courier',
    color='black'
).display(renderer = 'svg')

chart_deaths = alt.Chart(USA_data).mark_area(color = 'darkblue').encode(
    alt.X('date:T',
        axis=alt.Axis(domain=False, tickSize=0)
    ),
    alt.Y('deathpercentages%:Q')
).interactive(
).properties(
    width = 800,
    height = 500,
    title = "Daily %Change USA Covid 19 Cases"
)

chart_deaths.configure_title(
    fontSize=20,
    font='Courier',
    color='black'
).display(renderer = 'svg')

In [12]:
state_total = US_states.groupby('state', as_index = False).agg(max).drop(columns = ['fips'])
id = US_states.groupby('state').agg(max)['fips'].values
state_total['id'] = id
state_total = state_total[state_total['state'] != "New York"]
#state_total = state_total[state_total['state'] != "California"]
#state_total = state_total[state_total['state'] != "Washington"]
state_total = state_total[state_total['state'] != "New Jersey"]

In [17]:
highlight = alt.selection(type='single', on='mouseover',
                          fields=['countriesandterritories'], nearest=True)

base = alt.Chart(filtered_geo_cumulative).encode(
    x='date:T',
    y='cases:Q',
    color='countriesandterritories:N'
)

points = base.mark_circle().encode(
    opacity=alt.value(0)
).add_selection(
    highlight
).properties(
    width=600
).properties(
    width = 800,
    height = 500,
    title = "Cumulative Covid 19 Cases in Top Countries"
)

lines = base.mark_line().encode(
    size=alt.condition(~highlight, alt.value(5), alt.value(8))
)

(points + lines).configure_title(
    fontSize=30,
    font='Courier',
    color='black'
).display(renderer = 'svg')

In [14]:
highlight = alt.selection(type='single', on='mouseover',
                          fields=['countriesandterritories'], nearest=True)

base = alt.Chart(filtered_geo_cumulative).encode(
    x='date:T',
    y='deaths:Q',
    color='countriesandterritories:N'
)

points = base.mark_circle().encode(
    opacity=alt.value(0)
).add_selection(
    highlight
).properties(
    width=600
).properties(
    width = 800,
    height = 500,
    title = "Cumulative Covid 19 Deaths in Top Countries"
)

lines = base.mark_line().encode(
    size=alt.condition(~highlight, alt.value(5), alt.value(8))
)

(points + lines).configure_title(
    fontSize=30,
    font='Courier',
    color='black'
).display(renderer = 'svg')

In [15]:
states = alt.topo_feature(data.us_10m.url, 'states')
source = state_total
country_values = alt.pipe(source[['id', 'cases', 'state']], alt.to_values)

alt.Chart(states).mark_geoshape(stroke='gray').properties(
    width=800,
    height=500
).encode(
    color='cases:Q',
    tooltip=['id:N', 'cases:Q']
).transform_lookup(
    lookup='id',
    from_=alt.LookupData(country_values, 'id', ['cases'])
).project(
    type='albersUsa'
).display(renderer = 'svg')