In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from io import StringIO
from sqlalchemy import create_engine
from datetime import datetime

In [2]:
# Define the correct path to the CSV file
csv_path = 'data/gdp_growth.csv'

# Check if the file exis
# Read the CSV file and create a DataFrame
world_bank_df = pd.read_csv(csv_path)

# Display the first few rows of the DataFrame
print(world_bank_df.head())

# Display basic information about the DataFrame
print(world_bank_df.info())

# Read the CSV file and create a DataFrame
try:
    world_bank_df = pd.read_csv(csv_path, sep='\t', error_bad_lines=False, warn_bad_lines=True)
    print("CSV file read successfully.")
except pd.errors.ParserError as e:
    print(f"Error reading CSV file: {e}")
    print("Attempting to read with different settings...")
    try:
        world_bank_df = pd.read_csv(csv_path, sep=',', encoding='utf-8', quotechar='"', error_bad_lines=False, warn_bad_lines=True)
        print("CSV file read successfully with alternative settings.")
    except Exception as e:
        print(f"Failed to read CSV file: {e}")
        world_bank_df = pd.DataFrame()  # Create an empty DataFrame if all attempts fail

# Display the first few rows of the DataFrame
if not world_bank_df.empty:
    print(world_bank_df.head())
    # Display basic information about the DataFrame
    print(world_bank_df.info())
else:
    print("DataFrame is empty. Please check the CSV file and its path.")



                  Country Name Code  1960      1961      1962      1963  \
0                        Aruba  ABW   NaN       NaN       NaN       NaN   
1  Africa Eastern and Southern  AFE   NaN       NaN       NaN       NaN   
2                  Afghanistan  AFG   NaN       NaN       NaN       NaN   
3   Africa Western and Central  AFW   NaN  1.848719  3.770212  7.272501   
4                       Angola  AGO   NaN       NaN       NaN       NaN   

       1964      1965      1966      1967  ...       2012      2013      2014  \
0       NaN       NaN       NaN       NaN  ...  -1.369863  4.198232  0.300000   
1       NaN       NaN       NaN       NaN  ...   1.972652  4.308370  3.986754   
2       NaN       NaN       NaN       NaN  ...  12.752287  5.600745  2.724543   
3  5.396356  4.049794 -1.787094 -9.546521  ...   5.142964  6.104241  5.927350   
4       NaN       NaN       NaN       NaN  ...   8.542188  4.954545  4.822628   

       2015      2016      2017      2018      2019      2020 

TypeError: read_csv() got an unexpected keyword argument 'error_bad_lines'

In [None]:
# Function to extract data from IMF API
def extract_imf_data(dataset):
    base_url = "http://dataservices.imf.org/REST/SDMX_JSON.svc/CompactData/{}"
    url = base_url.format(dataset)
    response = requests.get(url)
    data = response.json()
    # Parse JSON data into a pandas DataFrame
    # This is a simplified example and may need adjustment based on the actual IMF API response structure
    df = pd.DataFrame(data['CompactData']['DataSet']['Series'])
    return df


In [None]:
# Function to transform and clean the data
def transform_data(world_bank_df, imf_df):
    # Merge datasets
    merged_df = pd.merge(world_bank_df, imf_df, on=['Country', 'Year'], how='outer')
    
    # Clean data by removing rows with missing values
    merged_df = merged_df.dropna()
    
    # Calculate additional metrics (e.g., Debt-to-GDP ratio)
    merged_df['Debt_to_GDP_Ratio'] = merged_df['Government Debt'] / merged_df['GDP']
    
    # Normalize specific columns (GDP growth, Inflation, Unemployment)
    columns_to_normalize = ['GDP growth', 'Inflation', 'Unemployment']
    merged_df[columns_to_normalize] = (merged_df[columns_to_normalize] - merged_df[columns_to_normalize].mean()) / merged_df[columns_to_normalize].std()
    
    return merged_df

In [None]:
# Function to load transformed data into a database
def load_data(df, database_url):
    engine = create_engine(database_url)
    table_name = 'economic_stability_data'
    df.to_sql(table_name, engine, if_exists='replace', index=False)


In [None]:
# ETL pipeline function
def run_etl_pipeline():
    # List of example countries
    countries = ['USA', 'GBR', 'DEU', 'FRA', 'JPN', 'CHN', 'IND']
    
    # Extract data from World Bank (GDP growth) and IMF (Financial Soundness Indicators)
    world_bank_df = extract_world_bank_data('NY.GDP.MKTP.KD.ZG', countries)
    imf_df = extract_imf_data('FSI')
    
    # Transform the data
    transformed_df = transform_data(world_bank_df, imf_df)
    
    # Load the data into a PostgreSQL database
    database_url = 'postgresql://username:password@localhost:5432/economic_data'
    load_data(transformed_df, database_url)
    
    print(f"ETL pipeline completed at {datetime.now()}")


In [None]:
# Run the ETL pipeline
if __name__ == "__main__":
    run_etl_pipeline()