In [1]:
import pandas as pd
import warnings
warnings.filterwarnings('ignore')



In [2]:
file_path='C:/Users/Administrator/Documents/kifiya/Week_10/data/preproccessed_brent_oil_prices.csv'
df = pd.read_csv(file_path, parse_dates=['Date'], dayfirst=True)
df = df.sort_values(by='Date')


# Clean Brent oil prices data
print("Column names in Brent oil prices data:", df.columns)
df.columns = df.columns.str.strip()
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df['Year'] = df['Date'].dt.year
df = df.dropna(subset=['Year'])
df['Year'] = df['Year'].astype(int)

display(df.head())


gdp_growth_path ='C:/Users/Administrator/Documents/kifiya/Week_10/data/gdp_growth.csv'
inflation_path = 'C:/Users/Administrator/Documents/kifiya/Week_10/data/inflation.csv'
exchange_rate_path = 'C:/Users/Administrator/Documents/kifiya/Week_10/data/exchangerates.csv'

Column names in Brent oil prices data: Index(['Date', 'Price', 'Returns', 'Log_Returns', 'Volatility', 'Momentum',
       'Rolling_Corr'],
      dtype='object')


Unnamed: 0,Date,Price,Returns,Log_Returns,Volatility,Momentum,Rolling_Corr,Year
0,1987-06-19,19.05,0.0,0.0,0.004308,0.37,,1987
1,1987-06-22,19.1,0.002625,0.002621,0.003544,0.32,,1987
2,1987-06-23,18.9,-0.010471,-0.010526,0.004313,0.0,,1987
3,1987-06-24,18.75,-0.007937,-0.007968,0.004698,-0.28,,1987
4,1987-06-25,18.7,-0.002667,-0.00267,0.004736,-0.35,,1987


In [3]:
def preprocess_economic_data(file_path, country, data_type):
    # Load the dataset
    df1 = pd.read_csv(file_path, skiprows=4)

    # Filter for the specified country; consider adding a check for 'All' or specific country handling
    df1 = df1[df1['Country Name'] == country] if country != 'All' else df1

    # Drop unnecessary columns
    df1.drop(columns=['Country Code', 'Indicator Name', 'Indicator Code'], inplace=True)

    # Remove columns with names that are not numeric (e.g., "Unnamed: 68")
    df1 = df1.loc[:, ~df1.columns.str.contains('^Unnamed')]

    # Melt the dataframe to long format
    df1 = df1.melt(id_vars=['Country Name'], var_name='Year', value_name='Value')

    # Convert 'Year' to numeric, forcing errors to NaN, then drop NaNs
    df1['Year'] = pd.to_numeric(df1['Year'], errors='coerce')
    df1.dropna(subset=['Year'], inplace=True)

    # Pivot the DataFrame to have years as index
    df1 = df1.pivot(index='Year', columns='Country Name', values='Value')

    # Drop rows (years) with all NaN values
    df1.dropna(axis=0, how='all', inplace=True)

    # Debug print to show the DataFrame after preprocessing
    print(f"DataFrame after preprocessing ({data_type}):")
    display(df1.head())

    return df1

# Preprocess each dataset for 'All' countries
country = 'All'
gdp_growth_clean = preprocess_economic_data(gdp_growth_path, country, 'GDP Growth Rates')
inflation_rates_clean = preprocess_economic_data(inflation_path, country, 'Inflation Rates')
exchange_rates_clean = preprocess_economic_data(exchange_rate_path, country, 'Exchange Rates')

# Merge datasets with the main DataFrame 'df'
merged_data = df.merge(gdp_growth_clean, on='Year', how='left', suffixes=('', '_GDP'))
merged_data = merged_data.merge(inflation_rates_clean, on='Year', how='left', suffixes=('', '_Inflation'))
merged_data = merged_data.merge(exchange_rates_clean, on='Year', how='left', suffixes=('', '_Exchange'))

# Display the merged data
print("Merged Data:")
display(merged_data.head())


DataFrame after preprocessing (GDP Growth Rates):


Country Name,Afghanistan,Africa Eastern and Southern,Africa Western and Central,Albania,Algeria,American Samoa,Andorra,Angola,Antigua and Barbuda,Arab World,...,Uzbekistan,Vanuatu,"Venezuela, RB",Viet Nam,Virgin Islands (U.S.),West Bank and Gaza,World,"Yemen, Rep.",Zambia,Zimbabwe
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1961,,0.460106,1.873455,,-13.605441,,,,,,...,,,3.192519,,,,3.969728,,1.361382,6.316157
1962,,7.868013,3.707643,,-19.685042,,,,,,...,,,8.532934,,,,5.319004,,-2.490839,1.434471
1963,,5.6164,7.145784,,34.313729,,,,,,...,,,3.900951,,,,5.018764,,3.272393,6.244345
1964,,4.668135,5.406403,,5.839413,,,,,,...,,,11.129345,,,,6.583205,,12.214048,-1.106172
1965,,5.13899,4.102491,,6.206898,,,,,,...,,,4.162867,,,,5.595768,,16.647456,4.910571


DataFrame after preprocessing (Inflation Rates):


Country Name,Afghanistan,Africa Eastern and Southern,Africa Western and Central,Albania,Algeria,American Samoa,Andorra,Angola,Antigua and Barbuda,Arab World,...,Uzbekistan,Vanuatu,"Venezuela, RB",Viet Nam,Virgin Islands (U.S.),West Bank and Gaza,World,"Yemen, Rep.",Zambia,Zimbabwe
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1960,,,,,,,,,,,...,,,,,,,,,,
1961,,,,,,,,,,,...,,,,,,,,,,
1962,,,,,,,,,,,...,,,,,,,,,,
1963,,,,,,,,,,,...,,,,,,,,,,
1964,,,,,,,,,,,...,,,,,,,,,,


DataFrame after preprocessing (Exchange Rates):


Country Name,Afghanistan,Africa Eastern and Southern,Africa Western and Central,Albania,Algeria,American Samoa,Andorra,Angola,Antigua and Barbuda,Arab World,...,Uzbekistan,Vanuatu,"Venezuela, RB",Viet Nam,Virgin Islands (U.S.),West Bank and Gaza,World,"Yemen, Rep.",Zambia,Zimbabwe
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1960,17.196561,,,,4.93706,,,2.866844e-08,1.71429,,...,,89.765,0.00335,,,,,,0.714286,0.000714
1961,17.196561,,,,4.93706,,,2.873486e-08,1.71429,,...,,89.765,0.00335,,,,,,0.714286,0.000714
1962,17.196561,,,,4.93706,,,2.867844e-08,1.71429,,...,,89.765,0.00335,,,,,,0.714286,0.000714
1963,35.109645,,,,4.93706,,,2.875203e-08,1.71429,,...,,89.765,0.00335,,,,,,0.714286,0.000714
1964,38.692262,,,,4.93706,,,2.881978e-08,1.71429,,...,,89.765,0.00435,,,,,,0.714286,0.000714


Merged Data:


Unnamed: 0,Date,Price,Returns,Log_Returns,Volatility,Momentum,Rolling_Corr,Year,Afghanistan,Africa Eastern and Southern,...,Uzbekistan_Exchange,Vanuatu_Exchange,"Venezuela, RB_Exchange",Viet Nam_Exchange,Virgin Islands (U.S.)_Exchange,West Bank and Gaza_Exchange,World_Exchange,"Yemen, Rep._Exchange",Zambia_Exchange,Zimbabwe_Exchange
0,1987-06-19,19.05,0.0,0.0,0.004308,0.37,,1987,,3.964516,...,,109.849167,0.0145,78.953316,,,,,0.009519,0.001663
1,1987-06-22,19.1,0.002625,0.002621,0.003544,0.32,,1987,,3.964516,...,,109.849167,0.0145,78.953316,,,,,0.009519,0.001663
2,1987-06-23,18.9,-0.010471,-0.010526,0.004313,0.0,,1987,,3.964516,...,,109.849167,0.0145,78.953316,,,,,0.009519,0.001663
3,1987-06-24,18.75,-0.007937,-0.007968,0.004698,-0.28,,1987,,3.964516,...,,109.849167,0.0145,78.953316,,,,,0.009519,0.001663
4,1987-06-25,18.7,-0.002667,-0.00267,0.004736,-0.35,,1987,,3.964516,...,,109.849167,0.0145,78.953316,,,,,0.009519,0.001663


In [4]:
merged_data.to_csv("C:/Users/Administrator/Documents/kifiya/Week_10/data/Merged_brent_oil_prices_with_Indicators.csv", index= True)
print("Merged file saved")

Merged file saved
