# Covid 19 Vaccine Progress Indicators Analysis

### Import Dependencies

In [2]:
#Data Cleaning
import pandas as pd
import numpy as np
from scipy.stats import linregress

#Data Visualization
import matplotlib.pyplot as plt
import seaborn as sn

# Data Extraction
import requests
import json
import time

## Import CSV file

In [5]:
#Covid vaccinations per country
df = pd.read_csv('./Resources/country_vaccinations.csv')

### CSV Data Cleaning

In [6]:
# Group by country iso code and find the most recent count of people vaccinated / percentage of people vaccinated
countryVacc_df = df.groupby('iso_code').agg({'total_vaccinations':'max','people_fully_vaccinated_per_hundred':'max'}).dropna()
countryVacc_df = countryVacc_df[(countryVacc_df['people_fully_vaccinated_per_hundred'] != 0)]

In [7]:
# Reset index to include iso code as value
vaccination_df= countryVacc_df.reset_index()

In [8]:
#Save to CSV file for later access
vaccination_df.to_csv('./Resources/iso_code.csv', index=False)

## Import World Bank JSON data with API

In [6]:
#read csv of iso codes
file = './Resources/iso_code.csv'
iso_code_df = pd.read_csv(file)

In [7]:
#adding empty columns for indicator values to add to dataframe
iso_code_df['GDP'] = ''
iso_code_df['Total_Pop'] = ''
iso_code_df['Pop_Den'] = ''
iso_code_df['health_exp'] = ''

In [8]:
# JSON requests from world bank database

# Indicators and date we want to request
variable = ['NY.GDP.MKTP.CD','SP.POP.TOTL.','EN.POP.DNST','SH.XPD.PVTD.CH.ZS']
date = '2018'


for j, var in enumerate(variable): # loop through all indicators requested
    for i, iso in enumerate(iso_code_df['iso_code']): # Loop through all iso codes in dataframe
        try:
            #keep track of how long the request takes
            tic = time.perf_counter()
            
            #base url for making indicator requests
            base_url = f'http://api.worldbank.org/v2/country/{iso}/indicator/{var}?date={date}&format=json'
            
            #request the modified url and recast as json
            response = requests.get(base_url).json()
            
            #index json object
            responseJSON = response[1][0]['value']
            
            #add response to dataframe
            iso_code_df.iloc[i,3+j] = responseJSON
            
            #stop time
            toc = time.perf_counter()
            
            #print statement to monitor progress
            print(f'{iso},{var}: Extraction finished in {toc - tic:0.4f}s.',end='\r')

        except: 
            print(f'Error {iso},{var} data missing, time:{toc - tic:0.4f}s.')
 

Error AIA,NY.GDP.MKTP.CD data missing.d in 0.3933s.
Error FLK,NY.GDP.MKTP.CD data missing.d in 0.2120s.
Error GGY,NY.GDP.MKTP.CD data missing.d in 0.1810s.
Error JEY,NY.GDP.MKTP.CD data missing.d in 0.2054s.
Error MSR,NY.GDP.MKTP.CD data missing.d in 0.2022s.
Error OWID_CYN,NY.GDP.MKTP.CD data missing.0.1690s.
Error OWID_ENG,NY.GDP.MKTP.CD data missing.
Error OWID_NIR,NY.GDP.MKTP.CD data missing.
Error OWID_SCT,NY.GDP.MKTP.CD data missing.
Error OWID_WLS,NY.GDP.MKTP.CD data missing.
Error SHN,NY.GDP.MKTP.CD data missing.d in 0.3230s.
Error AIA,SP.POP.TOTL. data missing.d in 0.2276s.s.
Error FLK,SP.POP.TOTL. data missing.d in 0.1921s.
Error GGY,SP.POP.TOTL. data missing.d in 0.1572s.
Error JEY,SP.POP.TOTL. data missing.d in 0.1874s.
Error MSR,SP.POP.TOTL. data missing.d in 0.1469s.
Error OWID_CYN,SP.POP.TOTL. data missing.0.1690s.
Error OWID_ENG,SP.POP.TOTL. data missing.
Error OWID_NIR,SP.POP.TOTL. data missing.
Error OWID_SCT,SP.POP.TOTL. data missing.
Error OWID_WLS,SP.POP.TOTL. data

### Data Cleaning

In [9]:
# Drop rows with missing values
iso_code_df.replace('',np.NaN,inplace=True)
iso_cleaned_df = iso_code_df.dropna()

In [12]:
iso_cleaned_df.head(10)

NameError: name 'iso_cleaned_df' is not defined

### ISO 3166-1 alpha-3 Conversion

In [9]:
#Clean CSV file with iso data ()
# source https://gist.github.com/tadast/8827699

#Remove double quoting
rm_quote = lambda x: x.replace('"', '')
iso_conv_df = pd.read_csv('./Resources/countries_codes_and_coordinates.csv',
                          converters={
                              'Alpha-3 code': rm_quote,
                              'Latitude (average)':rm_quote,
                              'Longitude (average)':rm_quote
                          })

#Rename column to match df to merge to
iso_conv_df = iso_conv_df.rename(columns={
    'Alpha-3 code':'iso_code',
    'Latitude (average)':'Lat_avg',
    'Longitude (average)':'Lng_avg'
})

#Remove space in ISO code
iso_conv_df['iso_code'] = iso_conv_df['iso_code'].str[1:]
iso_conv_df['Lat_avg'] = iso_conv_df['Lat_avg'].str[1:]
iso_conv_df['Lng_avg'] = iso_conv_df['Lng_avg'].str[1:]

#drop unneeded columns
iso_conv_df = iso_conv_df[['iso_code','Country','Lat_avg','Lng_avg']]

In [22]:
# Merge Data 
total_iso_df = vaccination_df.merge(iso_conv_df)
iso_cleaned_df = iso_cleaned_df.merge(total_iso_df)


Unnamed: 0,iso_code,Country,GDP,Total_Pop,Pop_Den,health_exp,Lat_avg,Lng_avg,total_vaccinations,people_fully_vaccinated_per_hundred
0,ALB,Albania,1.514702e+10,2866376.0,104.612263,44.657787,41,20,400064.0,0.02
1,AND,Andorra,3.218316e+09,77006.0,163.842553,32.065220,42.5,1.6,26414.0,6.06
2,ARE,United Arab Emirates,4.222150e+11,9630959.0,135.609110,48.421539,24,54,10336214.0,38.79
3,ARG,Argentina,5.176267e+11,44494502.0,16.258510,38.290127,-34,-64,7386650.0,1.95
4,AUT,Austria,4.550949e+11,8840521.0,107.131859,26.914829,47.3333,13.3333,2863389.0,8.73
...,...,...,...,...,...,...,...,...,...,...
95,TUR,Turkey,7.783819e+11,82319724.0,106.960129,22.617729,39,35,21678461.0,9.84
96,URY,Uruguay,5.959689e+10,3449299.0,19.708028,27.016809,-33,-56,1695351.0,15.95
97,USA,United States,2.058016e+13,326687501.0,35.713622,49.587131,38,-97,232407669.0,28.93
98,ZAF,South Africa,3.682889e+11,57792520.0,47.640752,44.066910,-29,24,292623.0,0.49


In [24]:
iso_cleaned_df = iso_cleaned_df[
                                ['iso_code',
                                 'Country',
                                 'GDP',
                                 'Total_Pop',
                                 'Pop_Den',
                                 'health_exp',
                                 'Lat_avg',
                                 'Lng_avg',
                                 'total_vaccinations',
                                 'people_fully_vaccinated_per_hundred'
                                ]
                               ]
iso_cleaned_df

Unnamed: 0,iso_code,Country,GDP,Total_Pop,Pop_Den,health_exp,Lat_avg,Lng_avg,total_vaccinations,people_fully_vaccinated_per_hundred
0,ALB,Albania,1.514702e+10,2866376.0,104.612263,44.657787,41,20,400064.0,0.02
1,AND,Andorra,3.218316e+09,77006.0,163.842553,32.065220,42.5,1.6,26414.0,6.06
2,ARE,United Arab Emirates,4.222150e+11,9630959.0,135.609110,48.421539,24,54,10336214.0,38.79
3,ARG,Argentina,5.176267e+11,44494502.0,16.258510,38.290127,-34,-64,7386650.0,1.95
4,AUT,Austria,4.550949e+11,8840521.0,107.131859,26.914829,47.3333,13.3333,2863389.0,8.73
...,...,...,...,...,...,...,...,...,...,...
95,TUR,Turkey,7.783819e+11,82319724.0,106.960129,22.617729,39,35,21678461.0,9.84
96,URY,Uruguay,5.959689e+10,3449299.0,19.708028,27.016809,-33,-56,1695351.0,15.95
97,USA,United States,2.058016e+13,326687501.0,35.713622,49.587131,38,-97,232407669.0,28.93
98,ZAF,South Africa,3.682889e+11,57792520.0,47.640752,44.066910,-29,24,292623.0,0.49


### Save to CSV for future use

In [25]:
#Save df to csv
iso_cleaned_df.to_csv('./Resources/isoDataFinal.csv',index=False)