# Economic Analysis Notebook

This notebook analyzes the relationship between education investment and economic indicators.

In [None]:
# Import required libraries and setup environment
import sys
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from dotenv import load_dotenv
import matplotlib

# Add project root to Python path
project_root = Path().absolute().parent
sys.path.append(str(project_root))

# Import project modules
from src.data_processing.db_manager import DatabaseManager
from src.data_processing.data_cleaner import DataCleaner
from src.data_processing.imf_data_processor import IMFDataProcessor

# Set plotting style
plt.style.use('seaborn')
sns.set_palette('husl')
plt.rcParams['figure.figsize'] = [12, 6]
plt.rcParams['font.size'] = 12
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']
plt.rcParams['axes.unicode_minus'] = False

In [None]:
# Initialize data processors
db_manager = DatabaseManager()
cleaner = DataCleaner()
imf_processor = IMFDataProcessor()

# Load education data from CSV
education_data = pd.read_csv('../data/cache/education_investment.csv')

# Clean education data
education_data_cleaned = education_data.copy()
education_data_cleaned['year'] = pd.to_numeric(education_data_cleaned['year'], errors='coerce')
education_data_cleaned['value'] = pd.to_numeric(education_data_cleaned['value'], errors='coerce')
education_data_cleaned = education_data_cleaned.dropna()

# Display the first few rows of cleaned data
print("Shape of cleaned education data:", education_data_cleaned.shape)
education_data_cleaned.head()

In [None]:
# Select major countries for trend comparison
major_countries = ['DE', 'FR', 'IT', 'ES', 'PL']
major_country_data = education_data_cleaned[education_data_cleaned['geo_time_period'].isin(major_countries)]

# Create a mapping for country names
country_names = {
    'DE': 'Germany',
    'FR': 'France',
    'IT': 'Italy',
    'ES': 'Spain',
    'PL': 'Poland'
}

# Display selected countries data
print("\nData for selected countries:")
major_country_data.head(10)

In [None]:
# Get economic data
start_year = int(major_country_data['year'].min())
end_year = int(major_country_data['year'].max())
economic_data = imf_processor.get_economic_indicators(major_countries, start_year, end_year)

# Display economic data
print("\nEconomic indicators data:")
economic_data.head()

In [None]:
# Merge education and economic data
merged_data = pd.merge(
    major_country_data,
    economic_data,
    left_on=['geo_time_period', 'year'],
    right_on=['country', 'year'],
    how='inner'
)

# Display merged data
print("Shape of merged data:", merged_data.shape)
merged_data.head()

In [None]:
# Create correlation heatmap
plt.figure(figsize=(10, 8))
correlation_matrix = merged_data[['value', 'gdp_growth', 'employment_rate']].corr()
sns.heatmap(correlation_matrix, 
            annot=True, 
            cmap='coolwarm', 
            vmin=-1, 
            vmax=1, 
            center=0)
plt.title('Correlation between Education Investment and Economic Indicators')
plt.tight_layout()
plt.show()

In [None]:
# Create trend line plots
plt.figure(figsize=(15, 10))

# Create two subplots
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(15, 12))

# Plot education investment vs GDP growth
for country in major_countries:
    country_data = merged_data[merged_data['country'] == country]
    ax1.plot(country_data['year'], 
             country_data['gdp_growth'], 
             marker='o', 
             label=f'{country_names[country]} - GDP Growth')
    ax1.plot(country_data['year'], 
             country_data['value'], 
             marker='s', 
             linestyle='--', 
             label=f'{country_names[country]} - Education Investment')

ax1.set_title('Education Investment vs GDP Growth')
ax1.set_xlabel('Year')
ax1.set_ylabel('Percentage')
ax1.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax1.grid(True, linestyle='--', alpha=0.7)

# Plot education investment vs employment rate
for country in major_countries:
    country_data = merged_data[merged_data['country'] == country]
    ax2.plot(country_data['year'], 
             country_data['employment_rate'], 
             marker='o', 
             label=f'{country_names[country]} - Employment Rate')
    ax2.plot(country_data['year'], 
             country_data['value'], 
             marker='s', 
             linestyle='--', 
             label=f'{country_names[country]} - Education Investment')

ax2.set_title('Education Investment vs Employment Rate')
ax2.set_xlabel('Year')
ax2.set_ylabel('Percentage')
ax2.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
ax2.grid(True, linestyle='--', alpha=0.7)

plt.tight_layout()
plt.show()