In [1]:
import numpy as np
import pandas as pd
import my_own_package

In [47]:
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 0)

In [3]:
data_path = r'C:\Users\DucTRung\Desktop\country_by_freedom_indexes_in_covid19\data_sets'

# COVID-19

In [4]:
covid_f = pd.read_csv(data_path + '\\worldometer_covid19_na.csv')

## Drop unneccesrily indexes

In [5]:
# index: 'world'
covid_f.drop([0], inplace=True)

In [6]:
# columns: 'new_cases', 'new_deaths'
covid_f.drop(columns=['new_cases', 'new_deaths'], inplace=True)

In [7]:
covid_f.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 212 entries, 1 to 212
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   country           212 non-null    object
 1   total_cases       212 non-null    object
 2   total_deaths      162 non-null    object
 3   total_recovered   188 non-null    object
 4   active_cases      212 non-null    object
 5   serious_critical  131 non-null    object
 6   cases_over_1_m    210 non-null    object
 7   deaths_over_1_m   160 non-null    object
 8   total_tests       160 non-null    object
 9   tests_over_1_m    160 non-null    object
dtypes: object(10)
memory usage: 18.2+ KB


## Converting from String to Float

In [8]:
def convert_float(data):
    if data is not np.nan:
        return data.replace(',', '')
    else:
        return data

In [9]:
covid_f['total_cases'] = covid_f['total_cases'].apply(convert_float).astype('float')
covid_f['total_deaths'] = covid_f['total_deaths'].apply(convert_float).astype('float')
covid_f['total_recovered'] = covid_f['total_recovered'].apply(convert_float).astype('float')
covid_f['total_tests'] = covid_f['total_tests'].apply(convert_float).astype('float')
covid_f['active_cases'] = covid_f['active_cases'].apply(convert_float).astype('float')
covid_f['serious_critical'] = covid_f['serious_critical'].apply(convert_float).astype('float')
covid_f['cases_over_1_m'] = covid_f['cases_over_1_m'].apply(convert_float).astype('float')
covid_f['deaths_over_1_m'] = covid_f['deaths_over_1_m'].apply(convert_float).astype('float')
covid_f['tests_over_1_m'] = covid_f['tests_over_1_m'].apply(convert_float).astype('float')

In [10]:
covid_f.dtypes

country              object
total_cases         float64
total_deaths        float64
total_recovered     float64
active_cases        float64
serious_critical    float64
cases_over_1_m      float64
deaths_over_1_m     float64
total_tests         float64
tests_over_1_m      float64
dtype: object

## Missing values

In [12]:
covid_f.columns

Index(['country', 'total_cases', 'total_deaths', 'total_recovered',
       'active_cases', 'serious_critical', 'cases_over_1_m', 'deaths_over_1_m',
       'total_tests', 'tests_over_1_m'],
      dtype='object')

### 'total_deaths

In [17]:
covid_f['total_deaths'].fillna(0, inplace=True)

### 'total_recovered'

In [27]:
covid_f['total_recovered'].mean()

2446.244680851064

In [25]:
# replace 'total_recovered' of 'UK' by mean of all countries 'total_recovered'
covid_f.loc[6, 'total_recovered'] = covid_f['total_recovered'].mean()

In [29]:
covid_f['total_recovered'].fillna(0, inplace=True)

### 'active_cases'

In [30]:
covid_f['active_cases'].isnull().sum()

0

### 'serious_critical'

In [34]:
covid_f[covid_f['country'] == 'Vietnam']

Unnamed: 0,country,total_cases,total_deaths,total_recovered,active_cases,serious_critical,cases_over_1_m,deaths_over_1_m,total_tests,tests_over_1_m
112,Vietnam,266.0,0.0,169.0,97.0,8.0,3.0,,121821.0,1252.0


In [38]:
covid_f['serious_critical'].fillna(method='ffill', inplace=True)

### cases_over_1_m

In [42]:
covid_f[covid_f['cases_over_1_m'].isnull()]

Unnamed: 0,country,total_cases,total_deaths,total_recovered,active_cases,serious_critical,cases_over_1_m,deaths_over_1_m,total_tests,tests_over_1_m
83,Diamond Princess,712.0,12.0,639.0,61.0,7.0,,,,
194,MS Zaandam,9.0,2.0,0.0,7.0,1.0,,,,


In [44]:
covid_f.drop([83, 194], inplace=True)

### 'deaths_over_1_m'

In [50]:
covid_f['deaths_over_1_m'].fillna(0, inplace=True)

### 'total_tests'

In [67]:
covid_f['total_tests'].fillna('unknown', inplace=True)

### 'tests_over_1_m'

In [70]:
covid_f['tests_over_1_m'].fillna('unknown', inplace=True)

In [71]:
covid_f.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 210 entries, 1 to 212
Data columns (total 10 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   country           210 non-null    object 
 1   total_cases       210 non-null    float64
 2   total_deaths      210 non-null    float64
 3   total_recovered   210 non-null    float64
 4   active_cases      210 non-null    float64
 5   serious_critical  210 non-null    float64
 6   cases_over_1_m    210 non-null    float64
 7   deaths_over_1_m   210 non-null    float64
 8   total_tests       210 non-null    object 
 9   tests_over_1_m    210 non-null    object 
dtypes: float64(7), object(3)
memory usage: 18.0+ KB


In [72]:
covid_f.to_csv('wrangled_frame\\covid_f.csv', index=False)

# Life Expectancy

In [73]:
lei_f = pd.read_csv(data_path + '\\le_index_2019.csv')

## Merge to 'covid_f'

In [74]:
# Checking the key value ('country')
lei_f.set_index(lei_f['country'].isin(covid_f['country'])).loc[False]

Unnamed: 0_level_0,country,life_expectancy_index_2019
country,Unnamed: 1_level_1,Unnamed: 2_level_1
False,U.S. Virgin Islands,81.17
False,Guam,80.74
False,Puerto Rico,80.69
False,Samoa,73.75
False,Solomon Islands,73.38
False,North Korea,72.89
False,Tajikistan,71.76
False,Tonga,71.32
False,Vanuatu,70.99
False,Kiribati,69.17


In [80]:
# merge 
merged_f = pd.merge(covid_f, lei_f, how='left')

## Missing values

In [81]:
merged_f['life_expectancy_index_2019'].isnull().sum()

23

In [83]:
merged_f.dtypes

country                        object
total_cases                   float64
total_deaths                  float64
total_recovered               float64
active_cases                  float64
serious_critical              float64
cases_over_1_m                float64
deaths_over_1_m               float64
total_tests                    object
tests_over_1_m                 object
life_expectancy_index_2019    float64
dtype: object

# Corruption Perception Index

In [84]:
cpi_f = pd.read_csv(data_path + '\\cpi_2019.csv')

## Merge to 'covid_f'

In [86]:
# check the key value
cpi_f.set_index(cpi_f['country'].isin(covid_f['country'])).loc[False]

Unnamed: 0_level_0,country,corruption_perception_index_2019
country,Unnamed: 1_level_1,Unnamed: 2_level_1
False,Cape Verde,58
False,Solomon Islands,42
False,Lesotho,40
False,Kosovo,36
False,Comoros,25
False,Tajikistan,25
False,Turkmenistan,19
False,North Korea,17


In [87]:
merged_f = pd.merge(merged_f, cpi_f, how='left')

In [88]:
merged_f.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 210 entries, 0 to 209
Data columns (total 12 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   country                           210 non-null    object 
 1   total_cases                       210 non-null    float64
 2   total_deaths                      210 non-null    float64
 3   total_recovered                   210 non-null    float64
 4   active_cases                      210 non-null    float64
 5   serious_critical                  210 non-null    float64
 6   cases_over_1_m                    210 non-null    float64
 7   deaths_over_1_m                   210 non-null    float64
 8   total_tests                       210 non-null    object 
 9   tests_over_1_m                    210 non-null    object 
 10  life_expectancy_index_2019        187 non-null    float64
 11  corruption_perception_index_2019  168 non-null    float64
dtypes: float

# BRI Countries

In [89]:
bri_f = pd.read_csv(data_path + '\\bri_countries.csv')

## Merge to 'covid_f'

In [90]:
# check the key value
bri_f.set_index(bri_f['bri_country'].isin(covid_f['country'])).loc[False]

Unnamed: 0_level_0,bri_country,income_group
bri_country,Unnamed: 1_level_1,Unnamed: 2_level_1
False,Cook Islands,
False,Comoros,Low income
False,Kiribati,Lower middle income
False,Kyrgyz Republic,Lower middle income
False,Lesotho,Lower middle income
False,Micronesia,Lower middle income
False,Niue,
False,Samoa,Upper middle income
False,Slovak Republic,High income
False,Solomon Islands,Lower middle income


In [91]:
merged_f = pd.merge(merged_f, bri_f, left_on='country', right_on='bri_country', how='left')

In [92]:
merged_f.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 210 entries, 0 to 209
Data columns (total 14 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   country                           210 non-null    object 
 1   total_cases                       210 non-null    float64
 2   total_deaths                      210 non-null    float64
 3   total_recovered                   210 non-null    float64
 4   active_cases                      210 non-null    float64
 5   serious_critical                  210 non-null    float64
 6   cases_over_1_m                    210 non-null    float64
 7   deaths_over_1_m                   210 non-null    float64
 8   total_tests                       210 non-null    object 
 9   tests_over_1_m                    210 non-null    object 
 10  life_expectancy_index_2019        187 non-null    float64
 11  corruption_perception_index_2019  168 non-null    float64
 12  bri_coun

## Map memership and non-membership bri countriy

In [101]:
def bri_member(country):
    if country is not np.nan:
        return 'bri_member'
    else:
        return 'non_member'

In [103]:
merged_f['bri_country'] = merged_f['bri_country'].apply(bri_member)

# Country_by_Region

In [107]:
cbr_f = pd.read_csv(data_path + '\\country_by_region.csv')

## Merge to 'covid_f'

In [108]:
# check the key value
cbr_f.set_index(cbr_f['country'].isin(covid_f['country'])).loc[False]

Unnamed: 0_level_0,country,region,continent
country,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
False,North Korea,Eastern Asia,Asia
False,Tajikistan,Central Asia,Asia
False,Turkmenistan,Central Asia,Asia
False,Comoros,Eastern Africa,Africa
False,Lesotho,Southern Africa,Africa
False,Swaziland,Southern Africa,Africa
False,Puerto Rico,Caribbean,Latin America and Caribbean
False,United States Virgin Islands,Caribbean,Latin America and Caribbean
False,Solomon Islands,Melanesia,Oceania
False,Vanuatu,Melanesia,Oceania


In [109]:
merged_f = pd.merge(merged_f, cbr_f, how='left')

In [110]:
merged_f.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 210 entries, 0 to 209
Data columns (total 16 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   country                           210 non-null    object 
 1   total_cases                       210 non-null    float64
 2   total_deaths                      210 non-null    float64
 3   total_recovered                   210 non-null    float64
 4   active_cases                      210 non-null    float64
 5   serious_critical                  210 non-null    float64
 6   cases_over_1_m                    210 non-null    float64
 7   deaths_over_1_m                   210 non-null    float64
 8   total_tests                       210 non-null    object 
 9   tests_over_1_m                    210 non-null    object 
 10  life_expectancy_index_2019        187 non-null    float64
 11  corruption_perception_index_2019  168 non-null    float64
 12  bri_coun

## Missing values

In [111]:
merged_f[merged_f['region'].isnull()]

Unnamed: 0,country,total_cases,total_deaths,total_recovered,active_cases,serious_critical,cases_over_1_m,deaths_over_1_m,total_tests,tests_over_1_m,life_expectancy_index_2019,corruption_perception_index_2019,bri_country,income_group,region,continent
178,Eswatini,15.0,0.0,8.0,7.0,1.0,13.0,0.0,714,615,61.05,,non_member,,,


In [113]:
merged_f.loc[178, 'region'] = 'Southern Africa'

In [115]:
merged_f.loc[178, 'continent'] = 'Africa'

In [117]:
merged_f.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 210 entries, 0 to 209
Data columns (total 16 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   country                           210 non-null    object 
 1   total_cases                       210 non-null    float64
 2   total_deaths                      210 non-null    float64
 3   total_recovered                   210 non-null    float64
 4   active_cases                      210 non-null    float64
 5   serious_critical                  210 non-null    float64
 6   cases_over_1_m                    210 non-null    float64
 7   deaths_over_1_m                   210 non-null    float64
 8   total_tests                       210 non-null    object 
 9   tests_over_1_m                    210 non-null    object 
 10  life_expectancy_index_2019        187 non-null    float64
 11  corruption_perception_index_2019  168 non-null    float64
 12  bri_coun

# Demoracy Index

In [118]:
di_f = pd.read_csv(data_path + '\\democracy_index_2019.csv')

## Merge to 'covid_19'

In [119]:
# check the key value
di_f.set_index(di_f['country'].isin(covid_f['country'])).loc[False]

Unnamed: 0_level_0,country,democracy_index_2019
country,Unnamed: 1_level_1,Unnamed: 2_level_1
False,Cape Verde,77.8
False,Lesotho,65.4
False,Comoros,31.5
False,Tajikistan,19.3
False,Turkmenistan,17.2
False,North Korea,10.8


In [120]:
merged_f = pd.merge(merged_f, di_f, how='left')

In [121]:
merged_f.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 210 entries, 0 to 209
Data columns (total 17 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   country                           210 non-null    object 
 1   total_cases                       210 non-null    float64
 2   total_deaths                      210 non-null    float64
 3   total_recovered                   210 non-null    float64
 4   active_cases                      210 non-null    float64
 5   serious_critical                  210 non-null    float64
 6   cases_over_1_m                    210 non-null    float64
 7   deaths_over_1_m                   210 non-null    float64
 8   total_tests                       210 non-null    object 
 9   tests_over_1_m                    210 non-null    object 
 10  life_expectancy_index_2019        187 non-null    float64
 11  corruption_perception_index_2019  168 non-null    float64
 12  bri_coun

# Economic Freedom Index

In [122]:
efi_f = pd.read_csv(data_path + '\\economic_freedom_index.csv')

## Merge to 'covid_f'

In [123]:
# check the value key
efi_f.set_index(efi_f['country'].isin(covid_f['country'])).loc[False]

Unnamed: 0_level_0,country,economic_freedom_index_2019
country,Unnamed: 1_level_1,Unnamed: 2_level_1
False,Kosovo,67.0
False,Kyrgyz Republic,62.3
False,Samoa,62.2
False,Tonga,57.7
False,Vanuatu,56.4
False,Tajikistan,55.6
False,Comoros,55.4
False,Solomon Islands,54.6
False,Lesotho,53.1
False,The Gambia,52.4


In [124]:
merged_f = pd.merge(merged_f, efi_f, how='left')

In [125]:
merged_f.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 210 entries, 0 to 209
Data columns (total 18 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   country                           210 non-null    object 
 1   total_cases                       210 non-null    float64
 2   total_deaths                      210 non-null    float64
 3   total_recovered                   210 non-null    float64
 4   active_cases                      210 non-null    float64
 5   serious_critical                  210 non-null    float64
 6   cases_over_1_m                    210 non-null    float64
 7   deaths_over_1_m                   210 non-null    float64
 8   total_tests                       210 non-null    object 
 9   tests_over_1_m                    210 non-null    object 
 10  life_expectancy_index_2019        187 non-null    float64
 11  corruption_perception_index_2019  168 non-null    float64
 12  bri_coun

# Press Freedom Index

In [126]:
pfi_f = pd.read_csv(data_path + '\\press_freedom_index.csv')

## Merge to 'covid_f

In [127]:
# check the key value
pfi_f.set_index(pfi_f['country'].isin(covid_f['country'])).loc[False]

Unnamed: 0_level_0,country,press_freedom_index_2019
country,Unnamed: 1_level_1,Unnamed: 2_level_1
False,Samoa,81.75
False,Cape Verde,80.19
False,Tonga,74.59
False,Organisation of Eastern Caribbean States,73.96
False,Comoros,72.09
False,Northern Cyprus,70.33
False,Kosovo,70.32
False,Lesotho,70.26
False,Swaziland,50.91
False,Tajikistan,45.98


In [130]:
merged_f = pd.merge(merged_f, pfi_f, how='left')

In [131]:
merged_f.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 210 entries, 0 to 209
Data columns (total 19 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   country                           210 non-null    object 
 1   total_cases                       210 non-null    float64
 2   total_deaths                      210 non-null    float64
 3   total_recovered                   210 non-null    float64
 4   active_cases                      210 non-null    float64
 5   serious_critical                  210 non-null    float64
 6   cases_over_1_m                    210 non-null    float64
 7   deaths_over_1_m                   210 non-null    float64
 8   total_tests                       210 non-null    object 
 9   tests_over_1_m                    210 non-null    object 
 10  life_expectancy_index_2019        187 non-null    float64
 11  corruption_perception_index_2019  168 non-null    float64
 12  bri_coun

# World Happiness Index

In [132]:
hi_f = pd.read_csv(data_path + '\\world_happiness_index.csv')

In [133]:
hi_f = hi_f[['country', 'happiness_index_2019']]

## Merge to 'covid_f'

In [134]:
hi_f.set_index(hi_f['country'].isin(covid_f['country'])).loc[False]

Unnamed: 0_level_0,country,happiness_index_2019
country,Unnamed: 1_level_1,Unnamed: 2_level_1
False,Trinidad & Tobago,6.192
False,Kosovo,6.1
False,Northern Cyprus,5.718
False,Tajikistan,5.467
False,Turkmenistan,5.247
False,Palestinian Territories,4.696
False,Comoros,3.973
False,Lesotho,3.802


## Convert Index from 10 unit to 100 unit

In [135]:
hi_f[hi_f.columns[1]] *= 10

In [136]:
hi_f

Unnamed: 0,country,happiness_index_2019
0,Finland,77.69
1,Denmark,76.00
2,Norway,75.54
3,Iceland,74.94
4,Netherlands,74.88
...,...,...
151,Rwanda,33.34
152,Tanzania,32.31
153,Afghanistan,32.03
154,Central African Republic,30.83


In [137]:
merged_f = pd.merge(merged_f, hi_f, how='left')

In [138]:
merged_f.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 210 entries, 0 to 209
Data columns (total 20 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   country                           210 non-null    object 
 1   total_cases                       210 non-null    float64
 2   total_deaths                      210 non-null    float64
 3   total_recovered                   210 non-null    float64
 4   active_cases                      210 non-null    float64
 5   serious_critical                  210 non-null    float64
 6   cases_over_1_m                    210 non-null    float64
 7   deaths_over_1_m                   210 non-null    float64
 8   total_tests                       210 non-null    object 
 9   tests_over_1_m                    210 non-null    object 
 10  life_expectancy_index_2019        187 non-null    float64
 11  corruption_perception_index_2019  168 non-null    float64
 12  bri_coun

# Reorder of features

In [141]:
merged_f.columns

Index(['country', 'total_cases', 'total_deaths', 'total_recovered',
       'active_cases', 'serious_critical', 'cases_over_1_m', 'deaths_over_1_m',
       'total_tests', 'tests_over_1_m', 'life_expectancy_index_2019',
       'corruption_perception_index_2019', 'bri_country', 'income_group',
       'region', 'continent', 'democracy_index_2019',
       'economic_freedom_index_2019', 'press_freedom_index_2019',
       'happiness_index_2019'],
      dtype='object')

In [143]:
merged_f = merged_f[['country', 'region', 'continent', 'bri_country', 'income_group',
                    'total_cases', 'total_deaths', 'total_recovered', 'total_tests',
                    'active_cases', 'serious_critical', 'cases_over_1_m', 'deaths_over_1_m', 'tests_over_1_m',
                    'life_expectancy_index_2019',  'happiness_index_2019', 'corruption_perception_index_2019',
                    'democracy_index_2019', 'economic_freedom_index_2019', 'press_freedom_index_2019']]

## Rename features

In [147]:
merged_f.rename(columns={'income_group':'bri_member_income_group'}, inplace=True)

In [148]:
merged_f.head()

Unnamed: 0,country,region,continent,bri_country,bri_member_income_group,total_cases,total_deaths,total_recovered,total_tests,active_cases,serious_critical,cases_over_1_m,deaths_over_1_m,tests_over_1_m,life_expectancy_index_2019,happiness_index_2019,corruption_perception_index_2019,democracy_index_2019,economic_freedom_index_2019,press_freedom_index_2019
0,USA,Northern America,Northern America,non_member,,588465.0,23711.0,37326.0,2961820.0,527428.0,12772.0,1778.0,72.0,8948,79.11,68.92,69.0,79.6,76.8,74.31
1,Spain,Southern Europe,Europe,non_member,,172541.0,18056.0,67504.0,600000.0,86981.0,7371.0,3690.0,386.0,12833,83.99,63.54,62.0,82.9,65.7,78.01
2,Italy,Southern Europe,Europe,bri_member,High income,159516.0,20465.0,35435.0,1046910.0,103616.0,3260.0,2638.0,338.0,17315,84.01,62.23,53.0,75.2,62.2,75.02
3,France,Western Europe,Europe,non_member,,136779.0,14967.0,27718.0,333807.0,94094.0,6821.0,2095.0,229.0,5114,83.13,65.92,69.0,81.2,63.8,77.79
4,Germany,Western Europe,Europe,non_member,,130434.0,3220.0,68200.0,1317890.0,59014.0,4288.0,1557.0,38.0,15730,81.88,69.85,80.0,86.8,73.5,85.4


# Handle missing values

In [149]:
for i, j in zip(merged_f.isnull().sum().index, merged_f.isnull().sum()):
    print(i, ':', j)

country : 0
region : 0
continent : 0
bri_country : 0
bri_member_income_group : 85
total_cases : 0
total_deaths : 0
total_recovered : 0
total_tests : 0
active_cases : 0
serious_critical : 0
cases_over_1_m : 0
deaths_over_1_m : 0
tests_over_1_m : 0
life_expectancy_index_2019 : 23
happiness_index_2019 : 62
corruption_perception_index_2019 : 42
democracy_index_2019 : 49
economic_freedom_index_2019 : 44
press_freedom_index_2019 : 42


## Drop all points with all NaN features

In [158]:
merged_f.columns

Index(['country', 'region', 'continent', 'bri_country',
       'bri_member_income_group', 'total_cases', 'total_deaths',
       'total_recovered', 'total_tests', 'active_cases', 'serious_critical',
       'cases_over_1_m', 'deaths_over_1_m', 'tests_over_1_m',
       'life_expectancy_index_2019', 'happiness_index_2019',
       'corruption_perception_index_2019', 'democracy_index_2019',
       'economic_freedom_index_2019', 'press_freedom_index_2019'],
      dtype='object')

### Freedom_indexes frame

In [152]:
indexes_f = merged_f[['life_expectancy_index_2019', 'happiness_index_2019', 'corruption_perception_index_2019', 'democracy_index_2019', 'economic_freedom_index_2019', 'press_freedom_index_2019']]

In [156]:
indexes_f.dropna(how='all', inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


### Epidemic status

In [168]:
epidemic_f = merged_f[['country', 'region', 'continent', 'bri_country',
       'bri_member_income_group', 'total_cases', 'total_deaths',
       'total_recovered', 'total_tests', 'active_cases', 'serious_critical',
       'cases_over_1_m', 'deaths_over_1_m', 'tests_over_1_m',]]

In [169]:
epidemic_f.isnull().sum()

country                     0
region                      0
continent                   0
bri_country                 0
bri_member_income_group    85
total_cases                 0
total_deaths                0
total_recovered             0
total_tests                 0
active_cases                0
serious_critical            0
cases_over_1_m              0
deaths_over_1_m             0
tests_over_1_m              0
dtype: int64

### Merge 'indexes_f' and 'epidemic_f'

In [170]:
wrangled_f = pd.merge(epidemic_f, indexes_f, left_index=True, right_index=True)

In [171]:
wrangled_f.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 190 entries, 0 to 209
Data columns (total 20 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   country                           190 non-null    object 
 1   region                            190 non-null    object 
 2   continent                         190 non-null    object 
 3   bri_country                       190 non-null    object 
 4   bri_member_income_group           125 non-null    object 
 5   total_cases                       190 non-null    float64
 6   total_deaths                      190 non-null    float64
 7   total_recovered                   190 non-null    float64
 8   total_tests                       190 non-null    object 
 9   active_cases                      190 non-null    float64
 10  serious_critical                  190 non-null    float64
 11  cases_over_1_m                    190 non-null    float64
 12  deaths_o

# Save wrangled data set

In [172]:
wrangled_f.to_csv('wrangled_frame\\wrangled_f_3.csv', index=False)

In [52]:
# country_f = merged_f.set_index(['country'])

In [53]:
# region_f = merged_f.set_index(['region'])

In [54]:
# continent_f = merged_f.set_index(['continent'])

In [55]:
# country_f.to_csv(data_path + '\\wrangled_frame\\country_f.csv', index=True)

In [56]:
# region_f.to_csv(data_path + '\\wrangled_frame\\region_f.csv', index=True)

In [57]:
# continent_f.to_csv(data_path + '\\wrangled_frame\\continent_f.csv', index=True)