##  Import the necessary packages


In [1]:
import pandas as pd
%matplotlib inline

import plotly.express as px

## Loading the data


In [2]:
url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
df = pd.read_csv(url,sep=',')
df.to_csv('../data/raw/covid_ourworldindata/owid-covid-data.csv',sep=';',index=False)
df.dropna(how = 'all')

# selecting 3 countries
df_GER=df[df['location']=='Germany']
df_US=df[df['location']=='United States']
df_SIN=df[df['location']=='Singapore']

df_mylist=pd.concat([df_GER,df_US,df_SIN]) 

# absolut Covid cases/population size
relative_cases=df_mylist['total_cases']/df_mylist['population']
df_mylist["relative_cases"] = relative_cases

# vaccination rate (percentage of the population) 
vaccination_rate=(df_mylist['people_fully_vaccinated']/df_mylist['population'])*100
df_mylist['vaccination_rate']=vaccination_rate

# required features
df_mylist = df_mylist[['date','location','total_cases','population','people_fully_vaccinated','relative_cases','vaccination_rate']]
df_mylist['date'] = pd.to_datetime(df_mylist['date']) # convert to datetime

df_mylist.reset_index(drop=True)
df_mylist.describe()

Unnamed: 0,total_cases,population,people_fully_vaccinated,relative_cases,vaccination_rate
count,2658.0,2658.0,1616.0,2658.0,1616.0
mean,12816920.0,140914200.0,67572120.0,0.0693121,51.032858
std,21897440.0,139737200.0,79395350.0,0.0853072,29.072376
min,1.0,5453600.0,2.0,3.003769e-09,3.7e-05
25%,62724.5,5453600.0,4324547.0,0.009944211,25.755772
50%,1457572.0,83900470.0,40567390.0,0.02944576,61.320832
75%,17125930.0,332915100.0,132510500.0,0.1009004,74.764496
max,87092230.0,332915100.0,222123200.0,0.3327066,91.593094


## Delivery 1 : Figure 1 The relative cases overtime of Covid infectors (absolut Covid cases/population size)


In [3]:
# Line chart
fig=px.line(df_mylist,x="date",y="relative_cases",color='location',
            title='The relative cases overtime of Covid infectors',
            width=1000, height=720) 
fig.show()

## Delivery 2 : Figure 2 The vaccination rate (percentage of the population) over time

In [4]:
df_mylist = df_mylist[(df_mylist['date']>='2020-12-13') & (df_mylist['date']<='2022-06-20') ] # extracting particular range of date
df_mylist=df_mylist.interpolate('bfill') # Using existing values: ffill, pad, bfill, backfill # to fill nan values

In [5]:
pd.isnull(df_mylist).sum() # checking for nan values

date                       0
location                   0
total_cases                0
population                 0
people_fully_vaccinated    0
relative_cases             0
vaccination_rate           0
dtype: int64

In [6]:
# Line Chart
fig=px.line(df_mylist,x="date",y="vaccination_rate",color='location',
           hover_data=['population','people_fully_vaccinated'],#,barmode='relative',opacity=0.9,orientation="v",
            width=1000, height=720,template='gridon',
           title='The vaccination rate over time'
          ) 
fig.show()

In [7]:
# Bar Chart
fig=px.bar(df_mylist,x="date",y="vaccination_rate",color='location',
           hover_data=['population','people_fully_vaccinated'],
           barmode='relative',opacity=0.9,orientation="v",
           width=1000, height=720,template='gridon',
           title='The vaccination rate over time'
          ) 
fig.show()