In [126]:
import pandas as pd 
import plotly.express as px
import plotly.graph_objects as go

In [127]:
covid_data = pd.read_csv('WHO-COVID-19-global-data.csv')

In [128]:
covid_data

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
0,2020-01-03,AF,Afghanistan,EMRO,0,0,0,0
1,2020-01-04,AF,Afghanistan,EMRO,0,0,0,0
2,2020-01-05,AF,Afghanistan,EMRO,0,0,0,0
3,2020-01-06,AF,Afghanistan,EMRO,0,0,0,0
4,2020-01-07,AF,Afghanistan,EMRO,0,0,0,0
...,...,...,...,...,...,...,...,...
185803,2022-02-20,ZW,Zimbabwe,AFRO,194,233224,1,5386
185804,2022-02-21,ZW,Zimbabwe,AFRO,128,233352,0,5386
185805,2022-02-22,ZW,Zimbabwe,AFRO,219,233571,0,5386
185806,2022-02-23,ZW,Zimbabwe,AFRO,409,233980,2,5388


Country_code in the original dataset is two-letter. Need to convert to three-letter ISO country code for Plotly built-in countries geometry. The mapping used "countries_codes_and_coordinates.csv" in rawdata folder

In [129]:
country_code = pd.read_csv("rawdata/countries_codes_and_coordinates_new.csv")

In [130]:
country_code

Unnamed: 0,Country,Alpha-2code,Alpha-3code,Numeric code,Latitude (average),Longitude (average)
0,Afghanistan,AF,AFG,4,33.0000,65.0
1,Albania,AL,ALB,8,41.0000,20.0
2,Algeria,DZ,DZA,12,28.0000,3.0
3,American Samoa,AS,ASM,16,-14.3333,-170.0
4,Andorra,AD,AND,20,42.5000,1.6
...,...,...,...,...,...,...
251,Wallis and Futuna,WF,WLF,876,-13.3000,-176.2
252,Western Sahara,EH,ESH,732,24.5000,-13.0
253,Yemen,YE,YEM,887,15.0000,48.0
254,Zambia,ZM,ZMB,894,-15.0000,30.0


Clean the data:
1. Join two datasets on 2 digit code (not on country, because country names may be different, e.g. "United States of America" in covid_data but "United States" in country_code)
2. Align country name to the standard names in "countries_codes_and_coordinates.csv"
3. Keep the fields of  "country_name", "Date_reported", "Cumulative_cases", "Cumulative_deaths", "Alpha-3code"
4. Since the project focuses on showing cumulative cases and deaths, filter out Date_reported as "2020-12-31" for all countries

In [131]:
country_code = country_code.rename({'Country': 'country_name'}, axis = 1)

In [132]:
country_code

Unnamed: 0,country_name,Alpha-2code,Alpha-3code,Numeric code,Latitude (average),Longitude (average)
0,Afghanistan,AF,AFG,4,33.0000,65.0
1,Albania,AL,ALB,8,41.0000,20.0
2,Algeria,DZ,DZA,12,28.0000,3.0
3,American Samoa,AS,ASM,16,-14.3333,-170.0
4,Andorra,AD,AND,20,42.5000,1.6
...,...,...,...,...,...,...
251,Wallis and Futuna,WF,WLF,876,-13.3000,-176.2
252,Western Sahara,EH,ESH,732,24.5000,-13.0
253,Yemen,YE,YEM,887,15.0000,48.0
254,Zambia,ZM,ZMB,894,-15.0000,30.0


In [133]:
covid_data.join(country_code.set_index('Alpha-2code'), on = 'Country_code')

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths,country_name,Alpha-3code,Numeric code,Latitude (average),Longitude (average)
0,2020-01-03,AF,Afghanistan,EMRO,0,0,0,0,Afghanistan,AFG,4.0,33.0,65.0
1,2020-01-04,AF,Afghanistan,EMRO,0,0,0,0,Afghanistan,AFG,4.0,33.0,65.0
2,2020-01-05,AF,Afghanistan,EMRO,0,0,0,0,Afghanistan,AFG,4.0,33.0,65.0
3,2020-01-06,AF,Afghanistan,EMRO,0,0,0,0,Afghanistan,AFG,4.0,33.0,65.0
4,2020-01-07,AF,Afghanistan,EMRO,0,0,0,0,Afghanistan,AFG,4.0,33.0,65.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
185803,2022-02-20,ZW,Zimbabwe,AFRO,194,233224,1,5386,Zimbabwe,ZWE,716.0,-20.0,30.0
185804,2022-02-21,ZW,Zimbabwe,AFRO,128,233352,0,5386,Zimbabwe,ZWE,716.0,-20.0,30.0
185805,2022-02-22,ZW,Zimbabwe,AFRO,219,233571,0,5386,Zimbabwe,ZWE,716.0,-20.0,30.0
185806,2022-02-23,ZW,Zimbabwe,AFRO,409,233980,2,5388,Zimbabwe,ZWE,716.0,-20.0,30.0


In [134]:
df = covid_data.join(country_code.set_index('Alpha-2code'), on = 'Country_code')

In [135]:
extracted_df = df[["country_name", "Date_reported", "Cumulative_cases", "Cumulative_deaths", "Alpha-3code"]]
extracted_df = extracted_df[extracted_df.Date_reported == '2020-12-31']

Start plotting the geographical map with Dash

In [197]:
fig = px.choropleth(extracted_df, locations = 'Alpha-3code',
                    color = 'Cumulative_deaths',
                    hover_name = 'country_name',
                    color_continuous_scale=px.colors.sequential.Inferno[::-1],
                    title = 'World Covid Deaths (Year 2020)')

fig.update_layout(annotations = [dict(
    x = 0.55,
    y = 0.05,
    text = 'Source : WHO Coronavirus (COVID-19) Dashboard'
   )])

fig.show()