<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Load-the-pandas,-numpy,-and-matplotlib-libraries,-along-with-the-Covid-case-data-file" data-toc-modified-id="Load-the-pandas,-numpy,-and-matplotlib-libraries,-along-with-the-Covid-case-data-file-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Load the pandas, numpy, and matplotlib libraries, along with the Covid case data file</a></span></li><li><span><a href="#Check-the-demographic-columns-for-missing-data" data-toc-modified-id="Check-the-demographic-columns-for-missing-data-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Check the demographic columns for missing data</a></span></li><li><span><a href="#List-the-countries-with-three-or-more-missing-values-for-the-demographic-data" data-toc-modified-id="List-the-countries-with-three-or-more-missing-values-for-the-demographic-data-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>List the countries with three or more missing values for the demographic data</a></span></li><li><span><a href="#Check-the-Covid-case-data-for-missing-values" data-toc-modified-id="Check-the-Covid-case-data-for-missing-values-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Check the Covid case data for missing values</a></span></li><li><span><a href="#Use-the-fillna-method-to-fix-the-missing-cases-data-for-the-one-country-affected-(Hong-Kong)." data-toc-modified-id="Use-the-fillna-method-to-fix-the-missing-cases-data-for-the-one-country-affected-(Hong-Kong).-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Use the fillna method to fix the missing cases data for the one country affected (Hong Kong).</a></span></li></ul></div>

# Load the pandas, numpy, and matplotlib libraries, along with the Covid case data file

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
import watermark
%load_ext watermark

In [3]:
%watermark -n -v -iv

Python implementation: CPython
Python version       : 3.7.9
IPython version      : 7.20.0

pandas    : 1.2.1
json      : 2.0.9
matplotlib: 3.3.2
watermark : 2.1.0
numpy     : 1.19.2



In [5]:
pd.set_option('display.width', 80)
pd.set_option('display.max_columns', 7)
pd.set_option('display.max_rows', 20)
pd.options.display.float_format = '{:,.0f}'.format

In [6]:
covidtotals = pd.read_csv('data/covidtotalswithmissings.csv')
covidtotals.set_index('iso_code', inplace=True)

In [13]:
covidtotals.head(2).T

iso_code,AFG,ALB
lastdate,2020-06-01,2020-06-01
location,Afghanistan,Albania
total_cases,15205,1137
total_deaths,257,33
total_cases_pm,391,395
total_deaths_pm,7,11
population,38928341,28778
pop_density,54,105
median_age,19,38
gdp_per_capita,1804,11803


In [7]:
totvars = [
    'location', 'total_cases', 'total_deaths', 'total_cases_pm',
    'total_deaths_pm'
]

In [8]:
demovars = [
    'population', 'pop_density', 'median_age', 'gdp_per_capita', 'hosp_beds'
]

# Check the demographic columns for missing data

In [9]:
covidtotals[demovars].isnull().sum(axis=0)

population         0
pop_density       12
median_age        24
gdp_per_capita    28
hosp_beds         46
dtype: int64

In [10]:
demovarsmisscnt = covidtotals[demovars].isnull().sum(axis=1)

In [11]:
demovarsmisscnt.value_counts()

0    156
1     24
2     12
3     10
4      8
dtype: int64

# List the countries with three or more missing values for the demographic data

In [17]:
covidtotals.loc[demovarsmisscnt >= 3, ['location'] + demovars].head(2).T

iso_code,AND,AIA
location,Andorra,Anguilla
population,77265,15002
pop_density,164,
median_age,,
gdp_per_capita,,
hosp_beds,,


In [18]:
type(covidtotals)

pandas.core.frame.DataFrame

# Check the Covid case data for missing values

In [23]:
covidtotals[totvars].isnull().sum(axis=0)

location           0
total_cases        0
total_deaths       0
total_cases_pm     1
total_deaths_pm    1
dtype: int64

In [24]:
totvarsmisscnt = covidtotals[totvars].isnull().sum(axis=1)

In [25]:
totvarsmisscnt.value_counts()

0    209
2      1
dtype: int64

In [26]:
covidtotals.loc[totvarsmisscnt > 0].T

iso_code,HKG
lastdate,2020-05-26
location,Hong Kong
total_cases,0
total_deaths,0
total_cases_pm,
total_deaths_pm,
population,7496988
pop_density,704
median_age,45
gdp_per_capita,56055


# Use the fillna method to fix the missing cases data for the one country affected (Hong Kong).

In [27]:
covidtotals['total_cases_pm'].fillna(covidtotals['total_cases'] /
                                     (covidtotals['population'] / 1000000),
                                     inplace=True)

In [30]:
covidtotals['total_deaths_pm'].fillna(covidtotals['total_deaths'] /
                                      (covidtotals['population'] / 1000000),
                                      inplace=True)

In [31]:
covidtotals[totvars].isnull().sum(axis=0)

location           0
total_cases        0
total_deaths       0
total_cases_pm     0
total_deaths_pm    0
dtype: int64