In [1]:
# Run this cell to update data after doing a git pull on the COVID-19 subdirectory

import csv
import pandas as pd
import altair as alt

# show charts inline
alt.renderers.enable('default') #note: you may have to change 'default' to 'notebook' depending on your configuration

    
# Countries/regions will be defined using this class
class Region:
    # name: The name that we want to display in charts: 'China (Mainland)'
    # population: The region's population: 1427647786
    # csse_col2: The country name as specified in the CSSE tables, column 2: 'China'
    # csse_col1: (optional, must specify as list) The region name as specified in the CSSE tables, column 1: ['Hong Kong']
    # csse_col1_excl: (optional) If set to True, this will exclude the column 1 region(s), for countries that
        # have irrelevant regions, e.g. Hong Kong for Mainland China and Virgin Islands for UK: True
    def __init__(self, name: str, population: int, csse_col2: str, csse_col1=None, csse_col1_excl=False):
        self.name = name
        self.population = population
        self.csse_col2 = csse_col2
        self.csse_col1 = csse_col1
        self.csse_col1_excl = csse_col1_excl
        self.confirmed = []
        self.deaths = []
        
    # Latest confirmed count
    def get_confirmed(self):
        return self.confirmed[-1]
    
    # Latest death count
    def get_deaths(self):
        return self.deaths[-1]
        
    # Confirmed over population
    def get_cop(self): 
        return self.get_confirmed()/self.population
    
    # Deaths over population
    def get_dop(self):
        return self.get_deaths()/self.population
    
    # Deaths over confirmed
    def get_doc(self):
        return self.get_deaths()/self.get_confirmed()
    
    # Summary
    # Format: Name: Confirmed, Deaths, Confirmed over pop., Deaths over pop., Deaths over confirmed
    def summary(self):
        print(self.name+': '+str(self.get_confirmed())+', '+str(self.get_deaths())+', '+str(self.get_cop())+ \
              ', '+str(self.get_dop())+', '+str(self.get_doc()))
    
        
# Initialize the regions and set populations
# Refer to the comments for the Region class __init__ above for guidance on how to do this properly
taiwan = Region('Taiwan', 23780000, 'Taiwan*')
singapore = Region('Singapore', 5639000, 'Singapore')
hongkong = Region('Hong Kong', 7451000, 'China', ['Hong Kong'])
china = Region('China (Mainland)', 1427647786, 'China', ['Hong Kong'], True)  # mainland
usa = Region('USA', 329515103, 'US')
italy = Region('Italy', 60360000, 'Italy')
southkorea = Region('South Korea', 51640000, 'Korea, South')
japan = Region('Japan', 126500000, 'Japan')
germany = Region('Germany', 83020000, 'Germany')
spain = Region('Spain', 46940000, 'Spain')
uk = Region('UK', 66650000, 'United Kingdom', [''])  # UK main islands only


# The regions we care about, so we can pull in the data and automatically populate the charts
# If you want to exclude any region, just remove it from this list
# If you want to add a country, initialize it above then add it to this list
regions = [taiwan, singapore, hongkong, southkorea, japan, china, usa, italy, germany, spain, uk]


# Alternate (incomplete) version just for China regions. Uncomment if you want to analyze these
# hubei = Region('Hubei', 58500000, 'China', ['Hubei'])
# guangdong = Region('Guangdong', 113460000, 'China', ['Guangdong'])
# shanghai = Region('Shanghai', 24281400, 'China', ['Shanghai'])
# beijing = Region('Beijing', 21542000, 'China', ['Beijing'])
# hongkong = Region('Hong Kong', 7451000, 'China', ['Hong Kong'])
# regions = [hubei, guangdong, shanghai, beijing, hongkong]  


# Convert a list of strings into a list of ints
def to_int(row: list) -> list:
    return [int(r) for r in row]


# Add values corresponding to the same position in multiple lists 
# (for regions that have multiple regions in the data)
def add_row(old_row: list, new_row: list) -> list:
    if old_row == []:
        return to_int(new_row)
    else:
        return[o + int(n) for o, n in zip(old_row, new_row)]

            
# Get confirmed cases
with open('./COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv', newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    for row in reader:
        if row[1] == "Country/Region":
            dates = pd.to_datetime(row[4:], format="%m/%d/%y")
        else:
            for region in regions:
                if row[1] == region.csse_col2:  # Country matches
                    if region.csse_col1 is None:  # No spec for region (column 1 of csv)
                        region.confirmed = to_int(row[4:])
                    elif region.csse_col1_excl == False and row[0] in region.csse_col1:  # Found the specific region we want data for
                        region.confirmed = to_int(row[4:])
                    elif region.csse_col1_excl and row[0] not in region.csse_col1:  # We can add the data for the region as it isn't excluded (HK)
                        region.confirmed = add_row(region.confirmed, row[4:])
     
               
# Get deaths
with open('./COVID-19/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv', newline='') as csvfile:
    reader = csv.reader(csvfile, delimiter=',')
    for row in reader:
        for region in regions:
            if row[1] == region.csse_col2:  # Country matches
                if region.csse_col1 is None:  # No spec for region (column 1 of csv)
                    region.deaths = to_int(row[4:])
                elif region.csse_col1_excl == False and row[0] in region.csse_col1:  # Found the specific region we want data for
                    region.deaths = to_int(row[4:])
                elif region.csse_col1_excl and row[0] not in region.csse_col1:  # We can add the data for the region as it isn't excluded (HK)
                    region.deaths = add_row(region.deaths, row[4:])
            
                    
as_of_date = dates[-1] # The last date that was recorded    


# Build the lists for use as chart data
names = []
cases = []
deaths = []
doc = []
cop = []
dop = []
for r in regions:
    names.append(r.name)
    cases.append(r.get_confirmed())
    deaths.append(r.get_deaths())
    doc.append(r.get_doc())
    cop.append(r.get_cop())
    dop.append(r.get_dop())
    

# Print summaries of all regions
print('As of', as_of_date.strftime("%Y-%m-%d"))
print('Region: Confirmed, Deaths, Confirmed over pop., Deaths over pop., Deaths over confirmed')
for r in regions:
    r.summary()

As of 2020-04-15
Region: Confirmed, Deaths, Confirmed over pop., Deaths over pop., Deaths over confirmed
Taiwan: 395, 6, 1.6610597140454165e-05, 2.5231286795626577e-07, 0.015189873417721518
Singapore: 3699, 10, 0.0006559673701010818, 1.7733640716439086e-06, 0.002703433360367667
Hong Kong: 1017, 4, 0.00013649174607435245, 5.368406925244934e-07, 0.003933136676499509
South Korea: 10591, 225, 0.0002050929512006197, 4.35708752904725e-06, 0.0212444528373147
Japan: 8100, 146, 6.403162055335969e-05, 1.1541501976284584e-06, 0.018024691358024692
China (Mainland): 82339, 3342, 5.7674589494302626e-05, 2.3409135171663413e-06, 0.04058829959071643
USA: 636350, 28326, 0.001931170966691624, 8.59626758898514e-05, 0.04451323956941935
Italy: 165155, 21645, 0.0027361663353214047, 0.00035859840954274353, 0.1310587024310496
Germany: 134753, 3804, 0.0016231390026499638, 4.582028426885088e-05, 0.0282294271741631
Spain: 177644, 18708, 0.003784490839369408, 0.00039855134213890073, 0.10531174708968499
UK: 98476, 

In [2]:
# Run the following cells to get charts comparing rates of infection, death, and population

# Total Cases
data = pd.DataFrame(list(zip(names, cases)), columns=['Region', 'Cases'])

bars = alt.Chart(data).mark_bar().encode(
    x="Cases:Q",
    y=alt.Y("Region:O", sort='x')
).properties(
    title={
        "text":'COVID-19: Confirmed Cases',
        "subtitle":'As of ' + as_of_date.strftime("%B %d, %Y")
    }
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=(
        alt.Text("Cases:Q", format=",")
    )
)

alt.layer(bars, text).configure_axisX(
    labelAngle=-45
).configure_axisY(
    title=None
)

In [3]:
# Total Deaths
data = pd.DataFrame(list(zip(names, deaths)), columns=['Region', 'Deaths'])

bars = alt.Chart(data).mark_bar().encode(
    x="Deaths:Q",
    y=alt.Y("Region:O", sort='x')
).properties(
    title={
        "text":'COVID-19: Deaths',
        "subtitle":'As of ' + as_of_date.strftime("%B %d, %Y")
    }
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=(
        alt.Text("Deaths:Q", format=",")
    )
)

alt.layer(bars, text).configure_axisX(
    labelAngle=-45
).configure_axisY(
    title=None
).configure_bar(
    color='darkred'
)

In [4]:
# Confirmed over population
data = pd.DataFrame(list(zip(names, cop)), columns=['Region', '%'])

bars = alt.Chart(data).mark_bar().encode(
    x=alt.X("%:Q", axis=alt.Axis(format='%')),
    y=alt.Y("Region:O", sort='x')
).properties(
    title={
        "text":'COVID-19: Confirmed Cases as % of Population',
        "subtitle":'As of ' + as_of_date.strftime("%B %d, %Y")
    }
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=(
        alt.Text("%:Q", format=".3%")
    )
)

alt.layer(bars, text).configure_axisX(
    labelAngle=-45,
    title=None
).configure_axisY(
    title=None
)

In [5]:
# Deaths over confirmed
data = pd.DataFrame(list(zip(names, doc)), columns=['Region', '%'])

bars = alt.Chart(data).mark_bar().encode(
    x=alt.X("%:Q", axis=alt.Axis(format='%')),
    y=alt.Y("Region:O", sort='x')
).properties(
    title={
        "text":'COVID-19: Deaths as % of Confirmed Cases',
        "subtitle":'As of ' + as_of_date.strftime("%B %d, %Y")
    }
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=(
        alt.Text("%:Q", format=".2%")
    )
)

alt.layer(bars, text).configure_axisX(
    labelAngle=0,
    title=None
).configure_axisY(
    title=None
).configure_bar(
    color='darkred'
)

In [6]:
# Deaths over population
data = pd.DataFrame(list(zip(names, dop)), columns=['Region', '%'])

bars = alt.Chart(data).mark_bar().encode(
    x=alt.X("%:Q", axis=alt.Axis(format='%')),
    y=alt.Y("Region:O", sort='x')
).properties(
    title={
        "text":'COVID-19: Deaths as % of Population',
        "subtitle":'As of ' + as_of_date.strftime("%B %d, %Y")
    }
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=(
        alt.Text("%:Q", format=".5%")
    )
)

alt.layer(bars, text).configure_axisX(
    labelAngle=-45,
    title=None
).configure_axisY(
    title=None
).configure_bar(
    color='darkred'
)

In [7]:
# Trajectory

#todo
# plt.plot(dates, usa_confirmed)
# plt.title("USA Confirmed Cases")

In [8]:
# Structures data for grouped bar graphs
def create_group_data(regions: list) -> pd.DataFrame:
    data = []
    for r in regions:
        data.append([r.name, r.get_confirmed(), 'Confirmed'])
        data.append([r.name, r.get_deaths(), 'Deaths'])
    return pd.DataFrame(data, columns=['Region', 'Count', 'Case Type'])


# Confirmed and Deaths, Small Regions
regions_small = [taiwan, singapore, southkorea, japan, hongkong]

data = create_group_data(regions_small)

bars = alt.Chart(data).mark_bar().encode(
    x="Count:Q",
    y="Case Type:N",
    color=alt.Color("Case Type:N", legend=None, scale=alt.Scale(range=['steelblue','darkred']))
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=(
        alt.Text("Count:Q", format=",")
    )
)

alt.layer(bars, text).facet(
    row="Region:N"
).configure_axisX(
    labelAngle=-45,
    title=None
).configure_axisY(
    title=None
).properties(
    title={
        "text":'COVID-19: Regions with Few Confirmed Cases',
        "subtitle":'As of ' + as_of_date.strftime("%B %d, %Y")
    }
)

In [9]:
# Confirmed and Deaths, Large Regions
regions_large = [china, usa, italy, germany, spain, uk]

data = create_group_data(regions_large)

bars = alt.Chart(data).mark_bar().encode(
    x="Count:Q",
    y="Case Type:N",
    color=alt.Color("Case Type:N", legend=None, scale=alt.Scale(range=['steelblue','darkred']))
)

text = bars.mark_text(
    align='left',
    baseline='middle',
    dx=3
).encode(
    text=(
        alt.Text("Count:Q", format=",")
    )
)

alt.layer(bars, text).facet(
    row="Region:N"
).configure_axisX(
    labelAngle=-45,
    title=None
).configure_axisY(
    title=None
).properties(
    title={
        "text":'COVID-19: Regions with Many Confirmed Cases',
        "subtitle":'As of ' + as_of_date.strftime("%B %d, %Y")
    }
)