# Import the necessary libraries

In [2]:
import sys
import os
import matplotlib.pyplot as plt
import seaborn as sns
import logging
import pandas as pd

# Configure logging
logging.basicConfig(filename='../logs/',
                    level=logging.INFO,
                    format='%(asctime)s:%(levelname)s:%(message)s')
  

sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../scripts')))
from load_csv_data import Load_CSV_Data


# Load the data sets

In [7]:
# GDP data

df_gdp = Load_CSV_Data('../data/gdp/gdp_data.csv')
df_gdp.load_csv_data()
df_gdp = df_gdp.get_data()

# Inflation data

df_inflation = Load_CSV_Data('../data/inflation/inflation_data.csv')
df_inflation.load_csv_data()
df_inflation = df_inflation.get_data()

# Exchange rate data

df_exrate = Load_CSV_Data('../data/exchange_rate/exchange_rate_data.csv')
df_exrate.load_csv_data()
df_exrate = df_exrate.get_data()



Data successfully loaded from ../data/gdp/gdp_data.csv
Data successfully loaded from ../data/inflation/inflation_data.csv
Data successfully loaded from ../data/exchange_rate/exchange_rate_data.csv


In [9]:
df_gdp.head()


Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Aruba,ABW,"Official exchange rate (LCU per US$, period av...",PA.NUS.FCRF,,,,,,,...,1.79,1.79,1.79,1.79,1.79,1.79,1.79,1.79,1.79,1.79
1,Africa Eastern and Southern,AFE,"Official exchange rate (LCU per US$, period av...",PA.NUS.FCRF,,,,,,,...,,,,,,,,,,
2,Afghanistan,AFG,"Official exchange rate (LCU per US$, period av...",PA.NUS.FCRF,17.19656,17.19656,17.19656,35.10964,38.69226,38.69226,...,57.2475,61.143462,67.866086,68.026904,72.083247,77.737949,76.813536,,,
3,Africa Western and Central,AFW,"Official exchange rate (LCU per US$, period av...",PA.NUS.FCRF,,,,,,,...,,,,,,,,,,
4,Angola,AGO,"Official exchange rate (LCU per US$, period av...",PA.NUS.FCRF,2.87e-08,2.87e-08,2.87e-08,2.88e-08,2.88e-08,2.88e-08,...,98.302417,120.060702,163.656434,165.915951,252.855748,364.825805,578.25878,631.441955,460.567512,685.020238


In [10]:
df_inflation.head()


Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Aruba,ABW,GDP (current US$),NY.GDP.MKTP.CD,,,,,,,...,2790850000.0,2962907000.0,2983635000.0,3092429000.0,3276184000.0,3395799000.0,2558906000.0,3103184000.0,3544708000.0,
1,Africa Eastern and Southern,AFE,GDP (current US$),NY.GDP.MKTP.CD,21216960000.0,22307470000.0,23702470000.0,25779380000.0,28049540000.0,30374910000.0,...,979690000000.0,899296000000.0,829830000000.0,940105000000.0,1012720000000.0,1006530000000.0,929074000000.0,1086770000000.0,1183960000000.0,1236160000000.0
2,Afghanistan,AFG,GDP (current US$),NY.GDP.MKTP.CD,,,,,,,...,20497130000.0,19134220000.0,18116570000.0,18753460000.0,18053220000.0,18799440000.0,19955930000.0,14266500000.0,14502160000.0,
3,Africa Western and Central,AFW,GDP (current US$),NY.GDP.MKTP.CD,11884130000.0,12685660000.0,13606830000.0,14439980000.0,15769110000.0,16934480000.0,...,894585000000.0,769367000000.0,692181000000.0,685750000000.0,768190000000.0,823934000000.0,787147000000.0,845993000000.0,877141000000.0,796586000000.0
4,Angola,AGO,GDP (current US$),NY.GDP.MKTP.CD,,,,,,,...,135967000000.0,90496420000.0,52761620000.0,73690150000.0,79450690000.0,70897960000.0,48501560000.0,66505130000.0,104400000000.0,84722960000.0


In [11]:
df_exrate.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Aruba,ABW,"Inflation, GDP deflator (annual %)",NY.GDP.DEFL.KD.ZG,,,,,,,...,3.958897,6.831287,-1.0028,-3.178167,3.477547,6.094207,-0.871325,-4.990168,3.412842,
1,Africa Eastern and Southern,AFE,"Inflation, GDP deflator (annual %)",NY.GDP.DEFL.KD.ZG,,1.933947,0.596607,3.150053,3.726978,2.962826,...,5.403535,5.545296,7.002823,5.217431,4.321757,4.444379,5.089641,5.535521,8.562985,8.770823
2,Afghanistan,AFG,"Inflation, GDP deflator (annual %)",NY.GDP.DEFL.KD.ZG,,,,,,,...,0.566945,2.447563,-2.197526,2.403656,2.071349,6.52148,6.962946,2.838996,9.406239,
3,Africa Western and Central,AFW,"Inflation, GDP deflator (annual %)",NY.GDP.DEFL.KD.ZG,,3.278446,2.485956,2.781479,3.381061,2.405026,...,0.731853,1.911559,1.736323,1.759106,2.150339,1.739014,1.715558,4.642203,8.094694,3.386691
4,Angola,AGO,"Inflation, GDP deflator (annual %)",NY.GDP.DEFL.KD.ZG,,,,,,,...,3.560885,-3.518385,21.774313,22.614437,28.167093,19.187004,10.763105,38.823722,13.709848,17.647757


# Data cleaning

In [12]:
import pandas as pd

# Filter for the year range 1987-2022
year_range = [str(year) for year in range(1987, 2023)]

# GDP data cleaning
df_gdp = df_gdp[['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'] + year_range]
df_gdp_long = df_gdp.melt(id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'],
                          var_name='Year', value_name='GDP')
df_gdp_long = df_gdp_long[(df_gdp_long['Year'].astype(int) >= 1987) & (df_gdp_long['Year'].astype(int) <= 2022)].dropna()
df_gdp_long.rename(columns={'Country Name': 'Country', 'Country Code': 'CountryCode', 'Indicator Name': 'GDP_Indicator'}, inplace=True)

# Inflation data cleaning
df_inflation = df_inflation[['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'] + year_range]
df_inflation_long = df_inflation.melt(id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'],
                                      var_name='Year', value_name='Inflation')
df_inflation_long = df_inflation_long[(df_inflation_long['Year'].astype(int) >= 1987) & (df_inflation_long['Year'].astype(int) <= 2022)].dropna()
df_inflation_long.rename(columns={'Country Name': 'Country', 'Country Code': 'CountryCode', 'Indicator Name': 'Inflation_Indicator'}, inplace=True)

# Exchange rate data cleaning
df_exrate = df_exrate[['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'] + year_range]
df_exrate_long = df_exrate.melt(id_vars=['Country Name', 'Country Code', 'Indicator Name', 'Indicator Code'],
                                var_name='Year', value_name='Exchange_Rate')
df_exrate_long = df_exrate_long[(df_exrate_long['Year'].astype(int) >= 1987) & (df_exrate_long['Year'].astype(int) <= 2022)].dropna()
df_exrate_long.rename(columns={'Country Name': 'Country', 'Country Code': 'CountryCode', 'Indicator Name': 'ExchangeRate_Indicator'}, inplace=True)

# Preview the cleaned data
print("GDP Data Sample:\n", df_gdp_long.head())
print("Inflation Data Sample:\n", df_inflation_long.head())
print("Exchange Rate Data Sample:\n", df_exrate_long.head())

# Save the cleaned datasets for merging
df_gdp_long.to_csv('../data/gdp/cleaned_gdp_data.csv', index=False)
df_inflation_long.to_csv('../data/inflation/cleaned_inflation_data.csv', index=False)
df_exrate_long.to_csv('../data/exchange_rate/cleaned_exchange_rate_data.csv', index=False)


GDP Data Sample:
                 Country CountryCode  \
0                 Aruba         ABW   
2           Afghanistan         AFG   
4                Angola         AGO   
8  United Arab Emirates         ARE   
9             Argentina         ARG   

                                       GDP_Indicator Indicator Code  Year  \
0  Official exchange rate (LCU per US$, period av...    PA.NUS.FCRF  1987   
2  Official exchange rate (LCU per US$, period av...    PA.NUS.FCRF  1987   
4  Official exchange rate (LCU per US$, period av...    PA.NUS.FCRF  1987   
8  Official exchange rate (LCU per US$, period av...    PA.NUS.FCRF  1987   
9  Official exchange rate (LCU per US$, period av...    PA.NUS.FCRF  1987   

            GDP  
0  1.790000e+00  
2  3.927643e+01  
4  2.990000e-08  
8  3.671000e+00  
9  2.144300e-04  
Inflation Data Sample:
                        Country CountryCode Inflation_Indicator  \
0                        Aruba         ABW   GDP (current US$)   
1  Africa Eastern an