## COVID-19 TracKer 

Coronavirus Disease 2019 (COVID-19) is a disease that was first identified in Wuhan, China, and later spread throughout the world. This webpage is dedicated to collecting and publishing the data required to understand the COVID-19 outbreak, especially in the United States.

In [1]:
'''
Explore the COVID-19 data

Author: Tu Duong

Copyright © 2020 Tu Duong All Rights Reserved
'''

import os
import csv
import datetime
import urllib
import urllib.request
import urllib.error
import numpy                as np
import pandas               as pd
import plotly.graph_objects as go

from plotly.subplots import make_subplots
from datetime import date


In [2]:
# load data into pandas data frame
ourworld_url = "https://covid.ourworldindata.org/data/ecdc/full_data.csv"

df = pd.read_csv(ourworld_url)

# convert 'date' column to the same data type for merging later
df['date']      = pd.to_datetime(df['date'])

us_covid19_data = df[ df['location']=='United States']


In [3]:
# load test data
covidtracking_url = 'https://covidtracking.com/api/v1/us/daily.csv'

us_covid19_test_data = pd.read_csv(covidtracking_url)
us_covid19_test_data = us_covid19_test_data.sort_values(by='date', ascending=True)

# convert 'date' column to the same data type for merging later
# note: we need to convert to string first since us_covid19_test_data['date'] is int64.
# to_datetime won't work well with int64
us_covid19_test_data['date'] = us_covid19_test_data['date'].astype(str) 
us_covid19_test_data['date'] = pd.to_datetime(us_covid19_test_data['date']) 



## Latest number

In [19]:
latest_covid19_numbers = us_covid19_data.iloc[-1]
headers_dict = dict(values=['Date', 'Location', 'Total Cases', 'Total Deaths'], 
                    font=dict(size=14),
                    height=40)

disp_date     = latest_covid19_numbers.date.strftime('%Y-%m-%d')
disp_location = latest_covid19_numbers.location
disp_total_case = f'{latest_covid19_numbers.total_cases:,}'
disp_total_deaths = f'{latest_covid19_numbers.total_deaths:,}'

values_dict  = dict(values=[disp_date, disp_location, disp_total_case, disp_total_deaths],
                    font_size=14,
                    height=30)

fig_table = go.Figure( data=[ go.Table(header=headers_dict, cells=values_dict) ] )
fig_table.show()

In [20]:
# merge 2 panda data frames into 1 data frame
merged_df = pd.merge(us_covid19_data,
                 us_covid19_test_data[['date', 'totalTestResultsIncrease']],
                 on='date')

# replace NaN with 0
merged_df = merged_df.fillna(0)

# generate 'positive_rate' column
merged_df['positive_rate'] = merged_df.apply(lambda row: (row.new_cases / row.totalTestResultsIncrease) * 100 if row.totalTestResultsIncrease else 0, axis = 1) 


In [21]:

fig = make_subplots(specs=[[{'secondary_y': True}]])

fig.add_trace(go.Bar(name='US Daily Tests',
                     x=merged_df.date,
                     y=merged_df.totalTestResultsIncrease,
                     marker_color='darkblue',
                     opacity=0.3))

fig.add_trace(go.Bar(name='US Daily New Cases',
                     x=merged_df.date,
                     y=merged_df.new_cases,
                     marker_color='darkblue'))

fig.add_trace(go.Scatter(name='Positive Rate',
                     x=merged_df.date,
                     y=merged_df.positive_rate,
                     mode='lines+markers',
                     marker_color='orange'),
             secondary_y="True")

fig.update_layout(title='COVID-19 Daily Numbers in United States', barmode='overlay', plot_bgcolor='rgb(245,245,245)')
fig.update_yaxes(title_text='Number of People', color='darkblue', secondary_y=False)
fig.update_yaxes(title_text='Positive Rate (%)', color='rgb(255, 140, 0)', secondary_y=True)

fig.show()


<div align="center">Copyright © 2020 Tu Duong All Rights Reserved</div>