In [1]:
# Run this cell to update data after doing a git pull on the COVID-19 subdirectory

import csv
import pandas as pd
import altair as alt

# show charts inline
alt.renderers.enable('default') #note: you may have to change 'default' to 'notebook' depending on your configuration

    
# Countries/regions will be defined using this class
class Region:
    def __init__(self, name, population):
        self.name = name
        self.population = population
        self.confirmed = []
        self.deaths = []
        
    # Latest confirmed count
    def get_confirmed(self):
        return self.confirmed[-1]
    
    # Latest death count
    def get_deaths(self):
        return self.deaths[-1]
        
    # Confirmed over population
    def get_cop(self): 
        return self.get_confirmed()/self.population
    
    # Deaths over population
    def get_dop(self):
        return self.get_deaths()/self.population
    
    # Deaths over confirmed
    def get_doc(self):
        return self.get_deaths()/self.get_confirmed()
    
    # Summary
    # Format: Name: Confirmed, Deaths, Confirmed over pop., Deaths over pop., Deaths over confirmed
    def summary(self):
        print(self.name+': '+str(self.get_confirmed())+', '+str(self.get_deaths())+', '+str(self.get_cop())+ \
              ', '+str(self.get_dop())+', '+str(self.get_doc()))
    

# Convert a list of strings into a list of ints
def to_int(row: list) -> list:
    return [int(r) for r in row]


# Add values corresponding to the same position in multiple lists 
# (for regions that have multiple regions in the data)
def add_row(old_row: list, new_row: list) -> list:
    if old_row == []:
        return to_int(new_row)
    else:
        return[o + int(n) for o, n in zip(old_row, new_row)]
    
        
# Initialize the regions and set populations
taiwan = Region('Taiwan', 23780000)
singapore = Region('Singapore', 5639000)
hongkong = Region('Hong Kong', 7451000)
china = Region('China (Mainland)', 1427647786)  # mainland
usa = Region('USA', 329515103)
italy = Region('Italy', 60360000)
southkorea = Region('South Korea', 51640000)
japan = Region('Japan', 126500000)

            
# Get confirmed cases
with open('./COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv', newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    for row in reader:
        if row[1] == "Country/Region":
            dates = pd.to_datetime(row[4:], format="%m/%d/%y")
        elif row[1] == "Taiwan*":
            taiwan.confirmed = to_int(row[4:])
        elif row[1] == "Singapore":
            singapore.confirmed = to_int(row[4:])
        elif row[0] == "Hong Kong":
            hongkong.confirmed = to_int(row[4:])
        elif row[1] == "China" and row[0] != "Hong Kong":  # mainland
            china.confirmed = add_row(china.confirmed, row[4:])
        elif row[1] == "US":  
            usa.confirmed = to_int(row[4:])
        elif row[1] == "Italy":  
            italy.confirmed = to_int(row[4:])
        elif row[1] == "Korea, South":  
            southkorea.confirmed = to_int(row[4:])
        elif row[1] == "Japan":  
            japan.confirmed = to_int(row[4:])
        
               
# Get deaths
with open('./COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv', newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    for row in reader:
        if row[1] == "Taiwan*":
            taiwan.deaths = to_int(row[4:])
        elif row[1] == "Singapore":
            singapore.deaths = to_int(row[4:])
        elif row[0] == "Hong Kong":
            hongkong.deaths = to_int(row[4:])
        elif row[1] == "China" and row[0] != "Hong Kong":  # mainland
            china.deaths = add_row(china.deaths, row[4:])
        elif row[1] == "US": 
            usa.deaths = to_int(row[4:])
        elif row[1] == "Italy": 
            italy.deaths = to_int(row[4:])
        elif row[1] == "Korea, South":  
            southkorea.deaths = to_int(row[4:])
        elif row[1] == "Japan":  
            japan.deaths = to_int(row[4:])
            
                    
as_of_date = dates[-1] # The last date that was recorded         

In [2]:
# Run this cell to get current stats for the countries we care about

print('As of', as_of_date.strftime("%Y-%m-%d"))
print('Region: Confirmed, Deaths, Confirmed over pop., Deaths over pop., Deaths over confirmed')
taiwan.summary()
singapore.summary()
hongkong.summary()
southkorea.summary()
japan.summary()
china.summary()
usa.summary()
italy.summary()

As of 2020-04-15
Region: Confirmed, Deaths, Confirmed over pop., Deaths over pop., Deaths over confirmed
Taiwan: 395, 6, 1.6610597140454165e-05, 2.5231286795626577e-07, 0.015189873417721518
Singapore: 3699, 10, 0.0006559673701010818, 1.7733640716439086e-06, 0.002703433360367667
Hong Kong: 1017, 4, 0.00013649174607435245, 5.368406925244934e-07, 0.003933136676499509
South Korea: 10591, 225, 0.0002050929512006197, 4.35708752904725e-06, 0.0212444528373147
Japan: 8100, 146, 6.403162055335969e-05, 1.1541501976284584e-06, 0.018024691358024692
China (Mainland): 82339, 3342, 5.7674589494302626e-05, 2.3409135171663413e-06, 0.04058829959071643
USA: 636350, 28326, 0.001931170966691624, 8.59626758898514e-05, 0.04451323956941935
Italy: 165155, 21645, 0.0027361663353214047, 0.00035859840954274353, 0.1310587024310496


In [3]:
# Run the following cells to get charts comparing rates of infection, death, and population

# Build the lists for use as chart data
regions = [taiwan, singapore, hongkong, southkorea, japan, china, usa, italy]

names = []
cases = []
deaths = []
doc = []
cop = []
dop = []
for r in regions:
    names.append(r.name)
    cases.append(r.get_confirmed())
    deaths.append(r.get_deaths())
    doc.append(r.get_doc())
    cop.append(r.get_cop())
    dop.append(r.get_dop())

# Total Cases
data = pd.DataFrame(list(zip(names, cases)), columns=['Region', 'Cases'])

bars = alt.Chart(data).mark_bar().encode(
    x="Cases:Q",
    y=alt.Y("Region:O", sort='x')
).properties(
    title={
        "text":'COVID-19: Confirmed Cases',
        "subtitle":'As of ' + as_of_date.strftime("%B %d, %Y")
    }
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=(
        alt.Text("Cases:Q", format=",")
    )
)

alt.layer(bars, text).configure_axisX(
    labelAngle=-45
).configure_axisY(
    title=None
)

In [4]:
# Total Deaths
data = pd.DataFrame(list(zip(names, deaths)), columns=['Region', 'Deaths'])

bars = alt.Chart(data).mark_bar().encode(
    x="Deaths:Q",
    y=alt.Y("Region:O", sort='x')
).properties(
    title={
        "text":'COVID-19: Deaths',
        "subtitle":'As of ' + as_of_date.strftime("%B %d, %Y")
    }
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=(
        alt.Text("Deaths:Q", format=",")
    )
)

alt.layer(bars, text).configure_axisX(
    labelAngle=-45
).configure_axisY(
    title=None
).configure_bar(
    color='darkred'
)

In [5]:
# Confirmed over population
data = pd.DataFrame(list(zip(names, cop)), columns=['Region', '%'])

bars = alt.Chart(data).mark_bar().encode(
    x=alt.X("%:Q", axis=alt.Axis(format='%')),
    y=alt.Y("Region:O", sort='x')
).properties(
    title={
        "text":'COVID-19: Confirmed Cases as % of Population',
        "subtitle":'As of ' + as_of_date.strftime("%B %d, %Y")
    }
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=(
        alt.Text("%:Q", format=".3%")
    )
)

alt.layer(bars, text).configure_axisX(
    labelAngle=-45,
    title=None
).configure_axisY(
    title=None
)

In [6]:
# Deaths over confirmed
data = pd.DataFrame(list(zip(names, doc)), columns=['Region', '%'])

bars = alt.Chart(data).mark_bar().encode(
    x=alt.X("%:Q", axis=alt.Axis(format='%')),
    y=alt.Y("Region:O", sort='x')
).properties(
    title={
        "text":'COVID-19: Deaths as % of Confirmed Cases',
        "subtitle":'As of ' + as_of_date.strftime("%B %d, %Y")
    }
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=(
        alt.Text("%:Q", format=".2%")
    )
)

alt.layer(bars, text).configure_axisX(
    labelAngle=0,
    title=None
).configure_axisY(
    title=None
).configure_bar(
    color='darkred'
)

In [7]:
# Deaths over population
data = pd.DataFrame(list(zip(names, dop)), columns=['Region', '%'])

bars = alt.Chart(data).mark_bar().encode(
    x=alt.X("%:Q", axis=alt.Axis(format='%')),
    y=alt.Y("Region:O", sort='x')
).properties(
    title={
        "text":'COVID-19: Deaths as % of Population',
        "subtitle":'As of ' + as_of_date.strftime("%B %d, %Y")
    }
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=(
        alt.Text("%:Q", format=".5%")
    )
)

alt.layer(bars, text).configure_axisX(
    labelAngle=-45,
    title=None
).configure_axisY(
    title=None
).configure_bar(
    color='darkred'
)

In [8]:
# Trajectory

#todo
# plt.plot(dates, usa_confirmed)
# plt.title("USA Confirmed Cases")

In [9]:
# Structures data for grouped bar graphs
def create_group_data(regions: list) -> pd.DataFrame:
    data = []
    for r in regions:
        data.append([r.name, r.get_confirmed(), 'Confirmed'])
        data.append([r.name, r.get_deaths(), 'Deaths'])
    return pd.DataFrame(data, columns=['Region', 'Count', 'Case Type'])


# Confirmed and Deaths, Small Regions
regions_small = [taiwan, singapore, hongkong, southkorea, japan]

data = create_group_data(regions_small)

bars = alt.Chart(data).mark_bar().encode(
    x="Count:Q",
    y="Case Type:N",
    color=alt.Color("Case Type:N", legend=None, scale=alt.Scale(range=['steelblue','darkred']))
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=(
        alt.Text("Count:Q", format=",")
    )
)

alt.layer(bars, text).facet(
    row="Region:N"
).configure_axisX(
    labelAngle=-45,
    title=None
).configure_axisY(
    title=None
).properties(
    title={
        "text":'COVID-19: Regions with Few Confirmed Cases',
        "subtitle":'As of ' + as_of_date.strftime("%B %d, %Y")
    }
)

In [10]:
# Confirmed and Deaths, Large Regions
regions_large = [china, usa, italy]

data = create_group_data(regions_large)

bars = alt.Chart(data).mark_bar().encode(
    x="Count:Q",
    y="Case Type:N",
    color=alt.Color("Case Type:N", legend=None, scale=alt.Scale(range=['steelblue','darkred']))
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=(
        alt.Text("Count:Q", format=",")
    )
)

alt.layer(bars, text).facet(
    row="Region:N"
).configure_axisX(
    labelAngle=-45,
    title=None
).configure_axisY(
    title=None
).properties(
    title={
        "text":'COVID-19: Regions with Many Confirmed Cases',
        "subtitle":'As of ' + as_of_date.strftime("%B %d, %Y")
    }
)