# Covid statistics (deathrate)



In [10]:
import pandas as pd
import numpy as np

url = "https://covid.ourworldindata.org/data/owid-covid-data.csv"
df = pd.read_csv(url)

Select only one column (total_vaccinations_per_hundred). There is lots of other incators too, for example 'fully_vaccinated' etc.

In [11]:
df.head()
v=df[['total_deaths_per_million','location','date','iso_code']].dropna()
v=v.rename(columns={'iso_code':'Code3'})

There is daily data on each country, lets select only most recent date for each country.

In [12]:
recent=v.groupby('location').apply(lambda df : df.sort_values('date', ascending=False).iloc[0])
recent

Unnamed: 0_level_0,total_deaths_per_million,location,date,Code3
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Afghanistan,182.476,Afghanistan,2021-10-29,AFG
Africa,158.711,Africa,2021-10-29,OWID_AFR
Albania,1012.554,Albania,2021-10-29,ALB
Algeria,132.529,Algeria,2021-10-29,DZA
Andorra,1680.585,Andorra,2021-10-29,AND
...,...,...,...,...
Vietnam,223.757,Vietnam,2021-10-29,VNM
World,633.594,World,2021-10-29,OWID_WRL
Yemen,61.560,Yemen,2021-10-29,YEM
Zambia,193.439,Zambia,2021-10-29,ZMB


Data is saved in CSV format, where columns are 'Country or Area', 'Item', 'Year', 'Value'.

In [13]:
data_name = recent.columns[0]
print(data_name)
recent=recent.drop(['date'],axis=1).rename(columns={'location':'Country or Area', data_name:'Value'})
recent['Item']=data_name
recent['Year']=2021
recent.to_csv('Covid_statistics.csv', index=False)

total_deaths_per_million


In [14]:
recent[recent.Code3=='RUS']

Unnamed: 0_level_0,Value,Country or Area,Code3,Item,Year
location,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Russia,1589.526,Russia,RUS,total_deaths_per_million,2021


In [15]:
df=df[['total_deaths_per_million','date', 'iso_code']]
df['date']=pd.to_datetime(df.date,format='%Y-%m-%d')
df['year']=df.date.dt.year
df['month']=df.date.dt.month
df['day']=df.date.dt.day

In [16]:
groups=df.groupby(['year','month','iso_code']).apply(lambda df : df.sort_values('date', ascending=False).iloc[0])
groups.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total_deaths_per_million,date,iso_code,year,month,day
year,month,iso_code,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2020,1,ARE,,2020-01-31,ARE,2020,1,31
2020,1,ARG,,2020-01-31,ARG,2020,1,31
2020,1,AUS,,2020-01-31,AUS,2020,1,31
2020,1,CAN,,2020-01-31,CAN,2020,1,31
2020,1,CHN,0.147,2020-01-31,CHN,2020,1,31


In [17]:
groups=groups.drop(['day','date'], axis=1).rename(columns={'total_deaths_per_million':'value', 'iso_code':'country_code'})
groups['feature_id']=7
groups['value']=pd.to_numeric(groups['value'],errors='coerce')
groups=groups.round({'value':1})
groups

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value,country_code,year,month,feature_id
year,month,iso_code,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2020,1,ARE,,ARE,2020,1,7
2020,1,ARG,,ARG,2020,1,7
2020,1,AUS,,AUS,2020,1,7
2020,1,CAN,,CAN,2020,1,7
2020,1,CHN,0.1,CHN,2020,1,7
...,...,...,...,...,...,...,...
2021,10,WSM,,WSM,2021,10,7
2021,10,YEM,61.6,YEM,2021,10,7
2021,10,ZAF,1484.8,ZAF,2021,10,7
2021,10,ZMB,193.4,ZMB,2021,10,7


In [18]:
groups.to_csv('Covid_statistics_deaths.csv', index=False)