Purpose of this project is to look into some covid statistics.

In [2]:
# Importing needed libraries for data analysis.

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [3]:
# Loading "Our World in Data's" Covid csv file into a dataframe.

df = pd.read_csv('owid-covid-data.csv')

In [4]:
# Taking a look at what our dataframe looks like.

df

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
0,AFG,Asia,Afghanistan,2020-02-24,5.0,5.0,,,,,...,,597.029,9.59,,,37.746,0.5,64.83,0.511,
1,AFG,Asia,Afghanistan,2020-02-25,5.0,0.0,,,,,...,,597.029,9.59,,,37.746,0.5,64.83,0.511,
2,AFG,Asia,Afghanistan,2020-02-26,5.0,0.0,,,,,...,,597.029,9.59,,,37.746,0.5,64.83,0.511,
3,AFG,Asia,Afghanistan,2020-02-27,5.0,0.0,,,,,...,,597.029,9.59,,,37.746,0.5,64.83,0.511,
4,AFG,Asia,Afghanistan,2020-02-28,5.0,0.0,,,,,...,,597.029,9.59,,,37.746,0.5,64.83,0.511,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
118129,ZWE,Africa,Zimbabwe,2021-09-16,127368.0,285.0,187.429,4560.0,9.0,5.571,...,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571,
118130,ZWE,Africa,Zimbabwe,2021-09-17,127632.0,264.0,209.857,4562.0,2.0,4.286,...,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571,
118131,ZWE,Africa,Zimbabwe,2021-09-18,127739.0,107.0,217.000,4563.0,1.0,3.857,...,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571,
118132,ZWE,Africa,Zimbabwe,2021-09-19,127938.0,199.0,238.429,4567.0,4.0,4.143,...,21.4,307.846,1.82,1.6,30.7,36.791,1.7,61.49,0.571,


In [5]:
# Taking a look at the summary of our datafram (specifically the values and columns).

df.info(verbose = True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 118134 entries, 0 to 118133
Data columns (total 62 columns):
 #   Column                                 Non-Null Count   Dtype  
---  ------                                 --------------   -----  
 0   iso_code                               118134 non-null  object 
 1   continent                              112736 non-null  object 
 2   location                               118134 non-null  object 
 3   date                                   118134 non-null  object 
 4   total_cases                            112287 non-null  float64
 5   new_cases                              112284 non-null  float64
 6   new_cases_smoothed                     111269 non-null  float64
 7   total_deaths                           101591 non-null  float64
 8   new_deaths                             101746 non-null  float64
 9   new_deaths_smoothed                    111269 non-null  float64
 10  total_cases_per_million                111695 non-null  

In [6]:
# Filtering by the most recent date when this project is started, which is Aug. 15th, 2021

most_recent_date = df[df['date'] == '2021-08-30']

# Top tourist destinations and most populus countries in Asia, Europe, and North America
countries = ['France','Spain','United States','Italy','Turkey', 'Malaysia',
             'Mexico','Taiwan','Germany','United Kingdom','Cuba','Canada',
             'Japan','South Korea','Netherlands','India','Guatemala','Haiti',]

countries_of_interest = most_recent_date[most_recent_date['location'].isin(countries)]

In [7]:
# Let's see what that looks like (only looking into the first 10 rows):

countries_of_interest

Unnamed: 0,iso_code,continent,location,date,total_cases,new_cases,new_cases_smoothed,total_deaths,new_deaths,new_deaths_smoothed,...,extreme_poverty,cardiovasc_death_rate,diabetes_prevalence,female_smokers,male_smokers,handwashing_facilities,hospital_beds_per_thousand,life_expectancy,human_development_index,excess_mortality
19730,CAN,North America,Canada,2021-08-30,1504157.0,6874.0,3245.571,26972.0,18.0,22.429,...,0.5,105.599,7.37,12.0,16.6,,2.5,82.43,0.929,
26817,CUB,North America,Cuba,2021-08-30,646513.0,6075.0,7699.143,5219.0,75.0,85.857,...,,190.968,8.27,17.1,53.3,85.198,5.2,78.8,0.783,
38796,FRA,Europe,France,2021-08-30,6834998.0,7688.0,18101.286,114778.0,151.0,120.0,...,,86.06,4.77,30.1,35.6,,5.98,82.66,0.901,
41309,DEU,Europe,Germany,2021-08-30,3947035.0,6823.0,9343.143,92208.0,62.0,25.714,...,,156.139,8.31,28.2,33.1,,8.0,81.33,0.947,
44061,GTM,North America,Guatemala,2021-08-30,465799.0,740.0,3684.571,11886.0,28.0,52.857,...,8.7,155.898,10.18,,,76.665,0.6,74.3,0.663,
46511,HTI,North America,Haiti,2021-08-30,20850.0,17.0,18.714,584.0,0.0,0.143,...,23.5,430.548,6.65,2.9,23.1,22.863,0.7,64.0,0.51,
49417,IND,Asia,India,2021-08-30,32768880.0,30941.0,42015.286,438560.0,350.0,492.857,...,21.2,282.28,10.39,1.9,20.6,59.55,0.53,69.66,0.645,
53723,ITA,Europe,Italy,2021-08-30,4534499.0,4253.0,6531.429,129146.0,53.0,50.143,...,2.0,113.151,4.78,19.8,27.8,,3.18,83.51,0.892,
54890,JPN,Asia,Japan,2021-08-30,1473847.0,13625.0,21526.143,16016.0,47.0,47.857,...,,79.37,5.72,11.2,33.7,,13.05,84.63,0.919,
65524,MYS,Asia,Malaysia,2021-08-30,1725357.0,19268.0,21798.857,16382.0,295.0,291.429,...,0.1,260.942,16.74,1.0,42.4,,1.9,76.16,0.81,


In [8]:
# Creating a new DF with only the columns needed for now.

# The specific columns we are looking to use are:

# 1 -> continent
# 2 -> location (country)
# 4 -> total_cases
# 7 -> total_deaths
# 31 -> positive_rate
# 36 -> people_fully_vaccinated
# 45 -> stringency_index
# 46 -> population
# 59 -> life_expectancy
# 60 -> human_development_index

data = countries_of_interest.iloc[:,[1,2,4,7,36,31,45,46,59,60]] 

# Using iloc here because it is more efficient than typing out each name of the column.

In [9]:
for i, j in enumerate(df.columns):
    print(i,j)

0 iso_code
1 continent
2 location
3 date
4 total_cases
5 new_cases
6 new_cases_smoothed
7 total_deaths
8 new_deaths
9 new_deaths_smoothed
10 total_cases_per_million
11 new_cases_per_million
12 new_cases_smoothed_per_million
13 total_deaths_per_million
14 new_deaths_per_million
15 new_deaths_smoothed_per_million
16 reproduction_rate
17 icu_patients
18 icu_patients_per_million
19 hosp_patients
20 hosp_patients_per_million
21 weekly_icu_admissions
22 weekly_icu_admissions_per_million
23 weekly_hosp_admissions
24 weekly_hosp_admissions_per_million
25 new_tests
26 total_tests
27 total_tests_per_thousand
28 new_tests_per_thousand
29 new_tests_smoothed
30 new_tests_smoothed_per_thousand
31 positive_rate
32 tests_per_case
33 tests_units
34 total_vaccinations
35 people_vaccinated
36 people_fully_vaccinated
37 total_boosters
38 new_vaccinations
39 new_vaccinations_smoothed
40 total_vaccinations_per_hundred
41 people_vaccinated_per_hundred
42 people_fully_vaccinated_per_hundred
43 total_boosters_

In [10]:
# Sorting in alphabetical order by continent.

data.sort_values('continent')

Unnamed: 0,continent,location,total_cases,total_deaths,people_fully_vaccinated,positive_rate,stringency_index,population,life_expectancy,human_development_index
54890,Asia,Japan,1473847.0,16016.0,57369554.0,0.159,52.31,126050800.0,84.63,0.919
109230,Asia,Turkey,6366408.0,56458.0,36765581.0,0.065,32.41,85042740.0,77.69,0.82
104556,Asia,Taiwan,15991.0,834.0,900651.0,0.0,,23855010.0,80.46,
49417,Asia,India,32768880.0,438560.0,146228416.0,0.023,70.83,1393409000.0,69.66,0.645
65524,Asia,Malaysia,1725357.0,16382.0,14989026.0,,,32776200.0,76.16,0.81
99373,Asia,South Korea,251421.0,2285.0,15246121.0,0.044,50.93,51305180.0,83.03,0.916
38796,Europe,France,6834998.0,114778.0,40040630.0,0.026,66.67,67564250.0,82.66,0.901
41309,Europe,Germany,3947035.0,92208.0,50342010.0,,62.04,83900470.0,81.33,0.947
100505,Europe,Spain,4847298.0,84146.0,32996436.0,0.071,47.69,46745210.0,83.56,0.904
53723,Europe,Italy,4534499.0,129146.0,36539599.0,0.028,65.28,60367470.0,83.51,0.892


In [12]:
# Filling in NaN values manually with the most recent info. on that datapoint online.

data.at[104556,"stringency_index"] = 56.48 # stringency_index of Taiwan is 56.48 (2021)
data.at[104556, "human_development_index"] = 0.916 # HDI of Taiwan is  (201)
#data.at[65524, "positive_rate"] =  #

data.sort_values('continent')

Unnamed: 0,continent,location,total_cases,total_deaths,people_fully_vaccinated,positive_rate,stringency_index,population,life_expectancy,human_development_index
54890,Asia,Japan,1473847.0,16016.0,57369554.0,0.159,52.31,126050800.0,84.63,0.919
109230,Asia,Turkey,6366408.0,56458.0,36765581.0,0.065,32.41,85042740.0,77.69,0.82
104556,Asia,Taiwan,15991.0,834.0,900651.0,0.0,56.48,23855010.0,80.46,0.916
49417,Asia,India,32768880.0,438560.0,146228416.0,0.023,70.83,1393409000.0,69.66,0.645
65524,Asia,Malaysia,1725357.0,16382.0,14989026.0,,,32776200.0,76.16,0.81
99373,Asia,South Korea,251421.0,2285.0,15246121.0,0.044,50.93,51305180.0,83.03,0.916
38796,Europe,France,6834998.0,114778.0,40040630.0,0.026,66.67,67564250.0,82.66,0.901
41309,Europe,Germany,3947035.0,92208.0,50342010.0,,62.04,83900470.0,81.33,0.947
100505,Europe,Spain,4847298.0,84146.0,32996436.0,0.071,47.69,46745210.0,83.56,0.904
53723,Europe,Italy,4534499.0,129146.0,36539599.0,0.028,65.28,60367470.0,83.51,0.892
