In [16]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly_express as px
import plotly.graph_objects as go
from datetime import datetime

## Plotting the COVID-19 Dataset on a Map

In [17]:
df = pd.read_csv('ecdc.csv')
df.head()

Unnamed: 0,country,country_code,continent,population,indicator,weekly_count,year_week,rate_14_day,cumulative_count,source
0,Afghanistan,AFG,Asia,38928341,cases,0,2020-01,,0,Epidemic intelligence national data
1,Afghanistan,AFG,Asia,38928341,cases,0,2020-02,0.0,0,Epidemic intelligence national data
2,Afghanistan,AFG,Asia,38928341,cases,0,2020-03,0.0,0,Epidemic intelligence national data
3,Afghanistan,AFG,Asia,38928341,cases,0,2020-04,0.0,0,Epidemic intelligence national data
4,Afghanistan,AFG,Asia,38928341,cases,0,2020-05,0.0,0,Epidemic intelligence national data


In [18]:
df.isna().sum()

country                0
country_code        1140
continent              0
population             0
indicator              0
weekly_count           0
year_week              0
rate_14_day          444
cumulative_count       0
source                 0
dtype: int64

In [39]:
missing_iso_ecdc = df[df["country_code"].isna()]
missing_iso_ecdc

Unnamed: 0,country,country_code,continent,population,indicator,weekly_count,year_week,rate_14_day,cumulative_count,source
190,Africa (total),,Africa,352325033,cases,0,2020-01,,0,Epidemic intelligence national data
191,Africa (total),,Africa,352325033,cases,0,2020-02,0.000000,0,Epidemic intelligence national data
192,Africa (total),,Africa,352325033,cases,0,2020-03,0.000000,0,Epidemic intelligence national data
193,Africa (total),,Africa,352325033,cases,0,2020-04,0.000000,0,Epidemic intelligence national data
194,Africa (total),,Africa,352325033,cases,0,2020-05,0.000000,0,Epidemic intelligence national data
...,...,...,...,...,...,...,...,...,...,...
26865,Oceania (total),,Oceania,42354311,deaths,215,2021-38,9.302477,2980,Epidemic intelligence national data
26866,Oceania (total),,Oceania,42354311,deaths,236,2021-39,10.648267,3216,Epidemic intelligence national data
26867,Oceania (total),,Oceania,42354311,deaths,193,2021-40,10.128839,3409,Epidemic intelligence national data
26868,Oceania (total),,Oceania,42354311,deaths,185,2021-41,8.924711,3594,Epidemic intelligence national data


missing_iso_ecdc['country'].str.contains('(total)').sum()

In [19]:
# I need country specific dataframe, therefore I drop the data without country_code: 3-letter ISO code
df["country_code"].isna().sum()
mod_df = df.dropna( how='any',
                    subset=['country_code'])
mod_df.isna().sum()

country               0
country_code          0
continent             0
population            0
indicator             0
weekly_count          0
year_week             0
rate_14_day         432
cumulative_count      0
source                0
dtype: int64

In [46]:
mod_df.shape

(37412, 10)

In [20]:
print(mod_df[mod_df['cumulative_count'] == mod_df['cumulative_count'].max()])

                        country country_code continent  population indicator  \
36726  United States Of America          USA   America   331002647     cases   

       weekly_count year_week  rate_14_day  cumulative_count  \
36726        435826   2021-42   333.829959          45444816   

                                    source  
36726  Epidemic intelligence national data  


In [24]:
fig = px.choropleth(mod_df, locations="country_code",
                    color="cumulative_count",
                    hover_name="country",
                    animation_frame="year_week",
                    title = "Covid Cases in cumulative count", 
                    range_color=[0,mod_df["cumulative_count"].quantile(0.8)],
                    color_continuous_scale=px.colors.sequential.Plasma)
 
 
fig["layout"].pop("updatemenus")
fig.show()
fig.write_html('Visualiseringar/Q4.Covid Cases in cumulative count.html', auto_open=True)


In [26]:
#select entries with the continent as asia
df_Asia = mod_df[mod_df.continent == 'Asia']
 
#plot
fig = px.choropleth(df_Asia, locations="country_code",
                    color="weekly_count",
                    hover_name="country",
                    animation_frame="year_week",
                    title = "Weekly new COVID cases in Asia",
                    scope ='asia', 
                    range_color=[0,df_Asia["weekly_count"].quantile(0.8)],
                    color_continuous_scale=px.colors.sequential.Plasma)
 
fig["layout"].pop("updatemenus")
fig.show()
fig.write_html('Visualiseringar/Q4.Weekly new COVID cases in Asia.html', auto_open=True)

In [27]:
# import data
who_covid19= pd.read_csv('WHO-COVID-19-global-data.csv')
who_covid19.head()

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
0,2020-01-03,AF,Afghanistan,EMRO,0,0,0,0
1,2020-01-04,AF,Afghanistan,EMRO,0,0,0,0
2,2020-01-05,AF,Afghanistan,EMRO,0,0,0,0
3,2020-01-06,AF,Afghanistan,EMRO,0,0,0,0
4,2020-01-07,AF,Afghanistan,EMRO,0,0,0,0


In [28]:
# A quick look of the data
who_covid19.shape

(159027, 8)

In [29]:
# check columns
who_covid19.columns

Index(['Date_reported', 'Country_code', 'Country', 'WHO_region', 'New_cases',
       'Cumulative_cases', 'New_deaths', 'Cumulative_deaths'],
      dtype='object')

In [30]:
who_covid19.dtypes

Date_reported        object
Country_code         object
Country              object
WHO_region           object
New_cases             int64
Cumulative_cases      int64
New_deaths            int64
Cumulative_deaths     int64
dtype: object

In [31]:
# show the percentage of missing values in each column
who_covid19.isnull().sum()/df.shape[0]

Date_reported        0.000000
Country_code         0.017405
Country              0.000000
WHO_region           0.000000
New_cases            0.000000
Cumulative_cases     0.000000
New_deaths           0.000000
Cumulative_deaths    0.000000
dtype: float64

In [33]:
who_covid19["Date_reported"] = pd.to_datetime(who_covid19["Date_reported"], format="%Y-%m-%d")

In [37]:
missing_iso = who_covid19[who_covid19["Country_code"].isna()]
missing_iso

Unnamed: 0,Date_reported,Country_code,Country,WHO_region,New_cases,Cumulative_cases,New_deaths,Cumulative_deaths
95953,2020-01-03,,Namibia,AFRO,0,0,0,0
95954,2020-01-04,,Namibia,AFRO,0,0,0,0
95955,2020-01-05,,Namibia,AFRO,0,0,0,0
95956,2020-01-06,,Namibia,AFRO,0,0,0,0
95957,2020-01-07,,Namibia,AFRO,0,0,0,0
...,...,...,...,...,...,...,...,...
96619,2021-10-30,,Namibia,AFRO,18,128886,2,3552
96620,2021-10-31,,Namibia,AFRO,0,128886,0,3552
96621,2021-11-01,,Namibia,AFRO,0,128886,0,3552
96622,2021-11-02,,Namibia,AFRO,41,128927,2,3554


In [None]:
mod_df = df.dropna( how='any',
                    subset=['country_code'])
mod_df.isna().sum()