In [1]:
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', None)
pd.set_option("max_rows", None)

# the 4 datasets used below are pivoted and cleaned in excel
#   - Pivot dates
#   - Ensure all country names are the exact same
#   - Lowercase all names
confirmed_df = pd.read_csv("time_series_covid_19_confirmed.csv")
confirmed_df['Country/Region'] = confirmed_df['Country/Region'].str.lower()
confirmed_df['Confirmed Cases'] = confirmed_df.groupby(['Country/Region', 'Date'])['Confirmed Cases'].transform('sum')

death_df = pd.read_csv("time_series_covid_19_deaths.csv")
death_df['Country/Region'] = death_df['Country/Region'].str.lower()
death_df['Deaths'] = death_df.groupby(['Country/Region', 'Date'])['Deaths'].transform('sum')

recover_df = pd.read_csv("time_series_covid_19_recovered.csv")
recover_df['Country/Region'] = recover_df['Country/Region'].str.lower()
recover_df['Recovered'] = recover_df.groupby(['Country/Region', 'Date'])['Recovered'].transform('sum')

total_pop_df = pd.read_csv("total_population.csv", engine='python').head(83)
total_pop_df['country'] = total_pop_df['country'].str.lower()
total_pop_df = total_pop_df.set_index('country')

In [2]:
# Find total numbers for deaths/confirmed case/recovered cases

death_total_df = pd.DataFrame(death_df.groupby(['Country/Region'])['Deaths'].max())
death_total_df.rename(columns={'Deaths':'total deaths'}, inplace=True)

confirmed_total_df = pd.DataFrame(confirmed_df.groupby(['Country/Region'])['Confirmed Cases'].max())
confirmed_total_df.rename(columns={'Confirmed Cases':'total confirmed'}, inplace=True)

recovered_total_df = pd.DataFrame(recover_df.groupby(['Country/Region'])['Recovered'].max())
recovered_total_df.rename(columns={'Recovered':'total recovered'}, inplace=True)


In [3]:
# Read in the countries based on their clusters
# (cluster formed by cultural and political characteristics of each country)

cluster = pd.read_csv("cluster.csv")
cluster = cluster.set_index('country')
cluster

Unnamed: 0_level_0,ctr_y,uai,ltowvs,PCA-1,cluster
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
argentina,ARG,86.0,20.403023,0.57938,0
uruguay,URY,100.0,26.196474,-1.372984,0
turkey,TUR,85.0,45.59194,1.538767,0
spain,ESP,86.0,47.607053,-1.342671,0
slovenia,SVN,88.0,48.61461,-1.438984,0
serbia,SRB,92.0,52.141058,1.083939,0
romania,ROM,90.0,51.889169,0.47319,0
portugal,PRT,104.0,28.211587,-1.714982,0
poland,POL,93.0,37.783375,-0.749826,0
peru,PER,87.0,25.188917,0.68882,0


In [4]:
# Concating all the data together to calculate stats per capita
result = pd.concat([confirmed_total_df, death_total_df, recovered_total_df, total_pop_df, cluster], axis=1, join='inner')
result = result.reset_index()
result['population'] = result['population'] * 1000
result['confirmed/pop'] = result['total confirmed']/result['population']
result['deaths/confirmed'] = result['total deaths']/result['total confirmed']
result['recovered/confirmed'] = result['total recovered']/result['total confirmed']


Unnamed: 0,index,total confirmed,total deaths,total recovered,population,ctr_y,uai,ltowvs,PCA-1,cluster,confirmed/pop,deaths/confirmed,recovered/confirmed
0,algeria,50400,1698,35428,43851040.0,DZA,68.0,23.0,2.690514,1,0.001149,0.03369,0.702937
1,argentina,664799,14376,525486,45195780.0,ARG,86.0,20.403023,0.57938,0,0.014709,0.021625,0.790443
2,australia,26980,861,24446,25499880.0,AUS,51.0,21.15869,-3.175086,1,0.001058,0.031913,0.906079
3,austria,39984,777,30949,9006400.0,AUT,70.0,60.453401,-3.329479,3,0.00444,0.019433,0.774035
4,bahrain,67014,231,60117,1701583.0,BHR,68.0,23.0,1.05635,1,0.039383,0.003447,0.897081
5,bangladesh,353844,5044,262953,164689400.0,BGD,60.0,47.103275,2.573541,1,0.002149,0.014255,0.743133
6,belgium,106887,9996,19079,11589620.0,BEL,94.0,81.86398,-1.93459,3,0.009223,0.093519,0.178497
7,brazil,4591364,138105,4046827,212559400.0,BRA,76.0,43.828715,1.045571,0,0.0216,0.030079,0.8814
8,bulgaria,19283,779,13867,6948445.0,BGR,85.0,69.017632,0.071254,3,0.002775,0.040398,0.719131
9,canada,149939,9294,129850,37742160.0,CAN,48.0,36.020151,-3.185865,1,0.003973,0.061985,0.866019


In [5]:
# Ranking each country by each of the 3 metric of effectiveness 
#  - confirmed cases over population
#  - deaths over confirmed cases
#  - recovered cases over confirmed cases

confirm_per_pop = result[['index', 'confirmed/pop']]
confirm_per_pop.sort_values(by=['confirmed/pop'], inplace=True, ascending=True)
confirm_per_pop['rank_confirmed'] = range(0, len(confirm_per_pop))
confirm_per_pop = confirm_per_pop.set_index('index')

death_per_confirm = result[['index', 'deaths/confirmed']]
death_per_confirm.sort_values(by=['deaths/confirmed'], inplace=True, ascending=True)
death_per_confirm['rank_deaths'] = range(0, len(death_per_confirm))
death_per_confirm = death_per_confirm.set_index('index')

recover_per_confirm = result[['index', 'recovered/confirmed']]
recover_per_confirm.sort_values(by=['recovered/confirmed'], inplace=True, ascending=False)
recover_per_confirm['rank_recover'] = range(0, len(recover_per_confirm))
recover_per_confirm = recover_per_confirm.set_index('index')



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0_level_0,confirmed/pop,rank_confirmed
index,Unnamed: 1_level_1,Unnamed: 2_level_1
vietnam,1.1e-05,0
taiwan,2.1e-05,1
thailand,5e-05,2
china,5.9e-05,3
yemen,6.8e-05,4
somalia,0.000218,5
syrian arab republic,0.000224,6
sudan,0.00031,7
malaysia,0.000325,8
new zealand,0.000379,9


In [8]:
# Collect all metrics into one final measure of effectiveness for an overall rank of how well each country has managed covid

final = pd.concat([cluster, confirm_per_pop, death_per_confirm, recover_per_confirm], axis = 1)
final['effectiveness'] = (final['rank_confirmed'] + final['rank_deaths'] + final['rank_recover'])/3
final = final.sort_values("effectiveness").reset_index()

final.to_csv('effectiveness_ranking.csv')