# Exploratory analysis on vaccination rate

source: www.ourworldindata.org

people_vaccinated_per_hundred: total number of people who received at least one vaccine dose

Country code: three letter 'iso_code'

In [1]:
import pandas as pd
import numpy as np

In [2]:
url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
df = pd.read_csv(url)

Lets select only parameter 'people_vaccinated_per_hundred' and select value closes to end of the month

In [3]:
df=df[['people_vaccinated_per_hundred','location','date']]
df['date']=pd.to_datetime(df.date,format='%Y-%m-%d')
df['year']=df.date.dt.year
df['month']=df.date.dt.month
df['day']=df.date.dt.day

In [4]:
groups=df.groupby(['year','month','location']).apply(lambda df : df.sort_values('date', ascending=False).iloc[0])
groups.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,people_vaccinated_per_hundred,location,date,year,month,day
year,month,location,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020,1,Argentina,,Argentina,2020-01-31,2020,1,31
2020,1,Asia,,Asia,2020-01-31,2020,1,31
2020,1,Australia,,Australia,2020-01-31,2020,1,31
2020,1,Cambodia,,Cambodia,2020-01-31,2020,1,31
2020,1,Canada,,Canada,2020-01-31,2020,1,31


In [5]:
print('Number of locations:',len(df.location.unique()))
print('min date:',df.date.min())
print('max date:',df.date.max())

Number of locations: 233
min date: 2020-01-01 00:00:00
max date: 2021-09-26 00:00:00


## Number of areas having data per month

In [6]:
groups['people_vaccinated_per_hundred'].groupby(['year','month']).count()

year  month
2020  1          0
      2          0
      3          0
      4          0
      5          0
      6          0
      7          0
      8          0
      9          0
      10         0
      11         0
      12        29
2021  1         48
      2         71
      3        106
      4        101
      5        126
      6        115
      7        104
      8        129
      9         96
Name: people_vaccinated_per_hundred, dtype: int64

## Maximum values per month

In [7]:
groups['people_vaccinated_per_hundred'].groupby(['year','month']).max()

year  month
2020  1           NaN
      2           NaN
      3           NaN
      4           NaN
      5           NaN
      6           NaN
      7           NaN
      8           NaN
      9           NaN
      10          NaN
      11          NaN
      12        11.27
2021  1         38.25
      2         69.80
      3         94.21
      4        111.32
      5         71.55
      6         75.54
      7         78.10
      8         85.13
      9        118.51
Name: people_vaccinated_per_hundred, dtype: float64

In [8]:
groups[groups['location']=='Gibraltar']

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,people_vaccinated_per_hundred,location,date,year,month,day
year,month,location,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021,1,Gibraltar,38.25,Gibraltar,2021-01-31,2021,1,31
2021,2,Gibraltar,69.8,Gibraltar,2021-02-28,2021,2,28
2021,3,Gibraltar,94.21,Gibraltar,2021-03-31,2021,3,31
2021,4,Gibraltar,111.32,Gibraltar,2021-04-30,2021,4,30
2021,5,Gibraltar,,Gibraltar,2021-05-31,2021,5,31
2021,6,Gibraltar,,Gibraltar,2021-06-30,2021,6,30
2021,7,Gibraltar,,Gibraltar,2021-07-31,2021,7,31
2021,8,Gibraltar,,Gibraltar,2021-08-31,2021,8,31
2021,9,Gibraltar,118.51,Gibraltar,2021-09-25,2021,9,25


In [9]:
groups[groups['location']=='Israel']

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,people_vaccinated_per_hundred,location,date,year,month,day
year,month,location,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020,2,Israel,,Israel,2020-02-29,2020,2,29
2020,3,Israel,,Israel,2020-03-31,2020,3,31
2020,4,Israel,,Israel,2020-04-30,2020,4,30
2020,5,Israel,,Israel,2020-05-31,2020,5,31
2020,6,Israel,,Israel,2020-06-30,2020,6,30
2020,7,Israel,,Israel,2020-07-31,2020,7,31
2020,8,Israel,,Israel,2020-08-31,2020,8,31
2020,9,Israel,,Israel,2020-09-30,2020,9,30
2020,10,Israel,,Israel,2020-10-31,2020,10,31
2020,11,Israel,,Israel,2020-11-30,2020,11,30


In [10]:
groups[groups['location']=='Algeria']

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,people_vaccinated_per_hundred,location,date,year,month,day
year,month,location,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020,2,Algeria,,Algeria,2020-02-29,2020,2,29
2020,3,Algeria,,Algeria,2020-03-31,2020,3,31
2020,4,Algeria,,Algeria,2020-04-30,2020,4,30
2020,5,Algeria,,Algeria,2020-05-31,2020,5,31
2020,6,Algeria,,Algeria,2020-06-30,2020,6,30
2020,7,Algeria,,Algeria,2020-07-31,2020,7,31
2020,8,Algeria,,Algeria,2020-08-31,2020,8,31
2020,9,Algeria,,Algeria,2020-09-30,2020,9,30
2020,10,Algeria,,Algeria,2020-10-31,2020,10,31
2020,11,Algeria,,Algeria,2020-11-30,2020,11,30


In [11]:
counts=groups['people_vaccinated_per_hundred'].groupby(['location']).count()
counts.describe()

count    233.000000
mean       3.969957
std        3.357178
min        0.000000
25%        1.000000
50%        3.000000
75%        7.000000
max       10.000000
Name: people_vaccinated_per_hundred, dtype: float64

## Data on continents

In [12]:
counts=groups[groups.location.isin(['Europe','Asia','Africa','North America','South America'])]['people_vaccinated_per_hundred'].groupby(['location']).count()
counts

location
Africa            9
Asia             10
Europe           10
North America    10
South America    10
Name: people_vaccinated_per_hundred, dtype: int64