<a href="https://colab.research.google.com/github/w-oke/covid_reproduction/blob/main/owid_exploration.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd

The Our World In Data (OWID) project provides COVID-19 data on its websites at https://ourworldindata.org/coronavirus and https://github.com/owid/covid-19-data/tree/master/public/data


In [3]:
# column descriptions:
owid_col_desc_link = 'https://github.com/owid/covid-19-data/raw/master/public/data/owid-covid-codebook.csv'
owid_col_desc = pd.read_csv(owid_col_desc_link)
owid_col_desc.head()

Unnamed: 0,column,source,category,description
0,iso_code,International Organization for Standardization,Others,ISO 3166-1 alpha-3 – three-letter country codes
1,continent,Our World in Data,Others,Continent of the geographical location
2,location,Our World in Data,Others,Geographical location
3,date,Our World in Data,Others,Date of observation
4,total_cases,COVID-19 Data Repository by the Center for Sys...,Confirmed cases,Total confirmed cases of COVID-19


In [4]:
# create a list of all the columns in the dataset
cols = owid_col_desc.column.to_list()

In [10]:
# possible independent variables
var = {'y': [ 
 'new_cases_smoothed_per_million',
 'reproduction_rate',
 'positive_rate',
 #'tests_per_case', # inverse of positiviy rate (?)
 ]}

# metadata - not intended for training
var['meta'] = [    
 'date',
 'iso_code',
 'location',
 'population', 
  ]

var['number'] = [
 'new_tests_smoothed_per_thousand',
 # 'tests_units',
 # 'total_vaccinations',
 # 'total_vaccinations_per_hundred', # use people_vaccinated & people_fully_vaccinated instead
 # 'people_vaccinated',
 'people_vaccinated_per_hundred',
 # 'people_fully_vaccinated',
 'people_fully_vaccinated_per_hundred',
 'total_boosters_per_hundred',
 'stringency_index',
 'population_density',
 'median_age',
 'human_development_index',
 #'aged_65_older',
 #'aged_70_older',
 'gdp_per_capita',
 'extreme_poverty',
 #'cardiovasc_death_rate',
 #'diabetes_prevalence',
 #'female_smokers',
 #'male_smokers',
 'handwashing_facilities',
 'hospital_beds_per_thousand',
 'life_expectancy',
 ]

In [12]:
# create a single list of all the features
var_all = [item for sublist in list(var.values()) for item in sublist]
print('The first 4 items in "var_all" are: ', var_all[0:4])

# create a single string of all the features
var_all2 = ', '.join(var_all)
print('var_all2: ', var_all2)

The first 4 items in "var_all" are:  ['new_cases_smoothed_per_million', 'reproduction_rate', 'positive_rate', 'date']
var_all2:  new_cases_smoothed_per_million, reproduction_rate, positive_rate, date, iso_code, location, population, new_tests_smoothed_per_thousand, people_vaccinated_per_hundred, people_fully_vaccinated_per_hundred, total_boosters_per_hundred, stringency_index, population_density, median_age, human_development_index, gdp_per_capita, extreme_poverty, handwashing_facilities, hospital_beds_per_thousand, life_expectancy


In [19]:
# owid dataset:
owid_link = 'https://github.com/owid/covid-19-data/raw/master/public/data/owid-covid-data.csv'
owid = pd.read_csv(owid_link, nrows=2000, usecols=var_all)
owid.head()

Unnamed: 0,iso_code,location,date,new_cases_smoothed_per_million,reproduction_rate,new_tests_smoothed_per_thousand,positive_rate,people_vaccinated_per_hundred,people_fully_vaccinated_per_hundred,total_boosters_per_hundred,stringency_index,population,population_density,median_age,gdp_per_capita,extreme_poverty,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index
0,AFG,Afghanistan,2020-02-24,,,,,,,,8.33,39835428.0,54.422,18.6,1803.987,,37.746,0.5,64.83,0.511
1,AFG,Afghanistan,2020-02-25,,,,,,,,8.33,39835428.0,54.422,18.6,1803.987,,37.746,0.5,64.83,0.511
2,AFG,Afghanistan,2020-02-26,,,,,,,,8.33,39835428.0,54.422,18.6,1803.987,,37.746,0.5,64.83,0.511
3,AFG,Afghanistan,2020-02-27,,,,,,,,8.33,39835428.0,54.422,18.6,1803.987,,37.746,0.5,64.83,0.511
4,AFG,Afghanistan,2020-02-28,,,,,,,,8.33,39835428.0,54.422,18.6,1803.987,,37.746,0.5,64.83,0.511


In [17]:
owid.shape

(2000, 20)