# Covid statistics



In [29]:
import pandas as pd
import numpy as np

url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
df = pd.read_csv(url)

Select only one column (total_vaccinations_per_hundred). There is lots of other incators too, for example 'fully_vaccinated' etc.

In [30]:
df.head()
v=df[['people_vaccinated_per_hundred','location','date','iso_code']].dropna()
v=v.rename(columns={'iso_code':'Code3'})

There is daily data on each country, lets select only most recent date for each country.

In [31]:
recent=v.groupby('location').apply(lambda df : df.sort_values('date', ascending=False).iloc[0])
recent

Unnamed: 0_level_0,people_vaccinated_per_hundred,location,date,Code3
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,2.08,Afghanistan,2021-09-30,AFG
Africa,7.76,Africa,2021-10-18,OWID_AFR
Albania,34.10,Albania,2021-10-17,ALB
Algeria,13.49,Algeria,2021-09-25,DZA
Andorra,70.21,Andorra,2021-09-26,AND
...,...,...,...,...
Wallis and Futuna,52.11,Wallis and Futuna,2021-10-18,WLF
World,47.76,World,2021-10-18,OWID_WRL
Yemen,1.01,Yemen,2021-09-27,YEM
Zambia,1.64,Zambia,2021-09-07,ZMB


Data is saved in CSV format, where columns are 'Country or Area', 'Item', 'Year', 'Value'.

In [32]:
data_name = recent.columns[0]
print(data_name)
recent=recent.drop(['date'],axis=1).rename(columns={'location':'Country or Area', data_name:'Value'})
recent['Item']=data_name
recent['Year']=2021
recent.to_csv('Covid_statistics.csv', index=False)

people_vaccinated_per_hundred


In [33]:
recent[recent.Code3=='RUS']

Unnamed: 0_level_0,Value,Country or Area,Code3,Item,Year
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Russia,35.22,Russia,RUS,people_vaccinated_per_hundred,2021


In [34]:
df=df[['people_vaccinated_per_hundred','date', 'iso_code']]
df['date']=pd.to_datetime(df.date,format='%Y-%m-%d')
df['year']=df.date.dt.year
df['month']=df.date.dt.month
df['day']=df.date.dt.day

In [35]:
groups=df.groupby(['year','month','iso_code']).apply(lambda df : df.sort_values('date', ascending=False).iloc[0])
groups.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,people_vaccinated_per_hundred,date,iso_code,year,month,day
year,month,iso_code,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020,1,ARE,,2020-01-31,ARE,2020,1,31
2020,1,ARG,,2020-01-31,ARG,2020,1,31
2020,1,AUS,,2020-01-31,AUS,2020,1,31
2020,1,CAN,,2020-01-31,CAN,2020,1,31
2020,1,CHN,,2020-01-31,CHN,2020,1,31


In [36]:
groups=groups.drop(['day','date'], axis=1).rename(columns={'people_vaccinated_per_hundred':'value', 'iso_code':'country_code'})
groups['feature_id']=6
groups['value']=pd.to_numeric(groups['value'],errors='coerce')
groups=groups.round({'value':1})
groups

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,country_code,year,month,feature_id
year,month,iso_code,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020,1,ARE,,ARE,2020,1,6
2020,1,ARG,,ARG,2020,1,6
2020,1,AUS,,AUS,2020,1,6
2020,1,CAN,,CAN,2020,1,6
2020,1,CHN,,CHN,2020,1,6
...,...,...,...,...,...,...,...
2021,10,WSM,57.6,WSM,2021,10,6
2021,10,YEM,,YEM,2021,10,6
2021,10,ZAF,,ZAF,2021,10,6
2021,10,ZMB,,ZMB,2021,10,6


In [37]:
groups.to_csv('Covid_statistics.csv', index=False)