In [1]:
# Import required libraries
import sys
import os
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from dotenv import load_dotenv
import matplotlib

In [2]:
# Load environment variables from .env file
load_dotenv(Path('..').resolve() / '.env')

True

In [3]:
# Add project root to Python path
project_root = Path('..').resolve()
sys.path.append(str(project_root))

In [4]:
# Import project modules
from src.data_processing.db_manager import DatabaseManager
from src.data_processing.data_cleaner import DataCleaner

In [5]:
# Set plotting style
plt.style.use('seaborn-v0_8')  # Use the v0.8 compatible style
sns.set_theme()  # Use seaborn's default theme
plt.rcParams['figure.figsize'] = [12, 6]
plt.rcParams['font.size'] = 12
plt.rcParams['font.sans-serif'] = ['Arial Unicode MS']

In [6]:
# Initialize database connections and utilities
db_manager = DatabaseManager()
cleaner = DataCleaner()

In [7]:
# Get education investment data from PostgreSQL
education_data = db_manager.query_postgres("""
    SELECT *
    FROM education_data
    ORDER BY year, geo_time_period
""")

2024-12-19 14:28:35,894 - INFO - Successfully connected to PostgreSQL


In [8]:
# Get economic indicators from PostgreSQL
economic_data = db_manager.query_postgres("""
    SELECT *
    FROM economic_data
    ORDER BY year, country_code
""")

In [9]:
# Try to get policy data from MongoDB
try:
    if db_manager.mongo_db:
        policy_data = db_manager.mongo_db['education_policies'].find()
        policy_docs = list(policy_data)
        print(f"Retrieved {len(policy_docs)} education policy documents")
    else:
        print("Warning: MongoDB connection not available, skipping policy data")
        policy_docs = []
except Exception as e:
    print(f"Error retrieving MongoDB data: {str(e)}")
    policy_docs = []



In [10]:
print(f"Retrieved {len(education_data)} education investment records")
print(f"Retrieved {len(economic_data)} economic indicator records")

Retrieved 17756 education investment records
Retrieved 378 economic indicator records


In [11]:
# Display sample of education data
print("\nSample of education investment data:")
print(education_data.head())


Sample of education investment data:
     id freq unit  isced11 geo_time_period  year    value  \
0  1135    A  NAC     ED35              AT  2012  12905.2   
1   208    A  EUR      ED3              AT  2012  11843.4   
2   260    A  EUR  ED34_44              AT  2012  10180.0   
3   934    A  NAC      ED1              AT  2012   8068.6   
4   964    A  NAC    ED1_2              AT  2012   9683.6   

                collected_at    source  
0 2024-12-19 14:18:32.863238  Eurostat  
1 2024-12-19 14:18:32.863238  Eurostat  
2 2024-12-19 14:18:32.863238  Eurostat  
3 2024-12-19 14:18:32.863238  Eurostat  
4 2024-12-19 14:18:32.863238  Eurostat  
