# Common imports

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt

import pandas as pd

from pathlib import Path

# Manipulating Human Development Reports dataset

## Reading Human Development Reports dataset

In [None]:
data_folder = Path('./data')

In [None]:
#Reading from file only columns that are going to be used
hdr_df = pd.read_csv(data_folder / 'human_development_reports' / 'HDR23-24_Composite_indices_complete_time_series.csv', \
                    usecols=['country',*[f'hdi_{year}' for year in range(1990,2023)]])


BRICS = ["Brazil","Russia","India","China","South Africa","Egypt","South Africa","Ethiopia","Iran", "United Arab Emirates"]
Mercosul = ["Brazil","Argentina","Paraguay","Uruguay","Chile","Colombia","Ecuador","Guyana","Peru","Suriname"]

## Country name treatments

In [None]:
#Changing Russia country name
hdr_df['country'] = hdr_df['country'].str.replace('Russian Federation','Russia') 

#Eliminating content in parenthesis from some countries
hdr_df['country'] = hdr_df['country'].str.replace(r'(.*?)\s*\(.*\)',r'\1',regex=True)

## Confirming whether countries are found on the database

In [None]:
def double_check_countries_existence_on_df(countries_list: list, df: pd.DataFrame):
    for country in countries_list:
        assert country in df.country.values, f"{country} not found on the dataset"

In [None]:
double_check_countries_existence_on_df(BRICS, hdr_df)
double_check_countries_existence_on_df(Mercosul, hdr_df)

## Cleaning unused data

In [None]:
#Renaming human developing indices per year to respective year
hdr_df = hdr_df.rename(columns=dict(zip([f"hdi_{year}" for year in range(1990,2023)],range(1990,2023))))

In [None]:
#Eliminating unused data
hdr_df = hdr_df[hdr_df.country.isin(['World',*BRICS,*Mercosul])]

In [None]:
# Transposing df
hdr_df = hdr_df.set_index('country').transpose()

In [None]:
# Casting years to int
hdr_df.index = hdr_df.index.astype(int)

In [None]:
# Calculating mean hdi index for BRICS and Mercosul countries
hdr_df['BRICS'] = hdr_df[BRICS].mean(axis=1)
hdr_df['Mercosul'] = hdr_df[Mercosul].mean(axis=1)

# Manipulating World Development Indicators dataset

## Reading dataset

In [None]:
gdp_capita_growth_df = pd.read_csv(data_folder / 'P_Data_Extract_From_World_Development_Indicators' / 'ca58506c-ca1b-4e43-91e1-1537eb09b0b8_Data.csv', \
                    nrows=802)

## Treating data

In [None]:
# Cleaning unused series
gdp_capita_growth_df=gdp_capita_growth_df[gdp_capita_growth_df['Series Name']=='GDP per capita growth (annual %)' ]

# Renaming country name column
gdp_capita_growth_df = gdp_capita_growth_df.rename(columns={"Country Name":"country"})

# Treating country names
gdp_capita_growth_df['country'] = gdp_capita_growth_df['country'].str.replace('Russian Federation','Russia') 

#Eliminating content after comma for some countries
gdp_capita_growth_df['country'] = gdp_capita_growth_df['country'].str.replace(r'(.*?)\s*,(.*)',r'\1',regex=True)

In [None]:
## Assuring countries studied does exist on dataset
double_check_countries_existence_on_df(BRICS, gdp_capita_growth_df)
double_check_countries_existence_on_df(Mercosul, gdp_capita_growth_df)

In [None]:
# Renaming columns to year
gdp_capita_growth_df = gdp_capita_growth_df.rename(columns=dict(zip([f"{year} [YR{year}]" for year in range(1960,2023)],range(1960,2023))))

In [None]:
#Dropping unused columns
gdp_capita_growth_df = gdp_capita_growth_df.drop(columns=['Country Code','Series Name','Series Code'])

In [None]:
#Narrowing to countries of interest unused data
gdp_capita_growth_df = gdp_capita_growth_df[gdp_capita_growth_df.country.isin(['World',*BRICS,*Mercosul])]

In [None]:
# Narrowing to time range of hdr data
gdp_capita_growth_df = gdp_capita_growth_df.set_index('country')
gdp_capita_growth_df = gdp_capita_growth_df[hdr_df.index.values]

In [None]:
gdp_capita_growth_df = gdp_capita_growth_df.transpose()

# Casting years to int
gdp_capita_growth_df.index = gdp_capita_growth_df.index.astype(int)

In [None]:
# Calculating mean value for BRICS and Mercosul countries
gdp_capita_growth_df['BRICS'] = gdp_capita_growth_df[BRICS].mean(axis=1)
gdp_capita_growth_df['Mercosul'] = gdp_capita_growth_df[Mercosul].mean(axis=1)

In [None]:
gdp_capita_growth_df