# 数据库操作演示

本notebook展示如何使用PostgreSQL和MongoDB存储和检索教育数据分析项目中的数据。

In [None]:
import sys
import os
import pandas as pd
from dotenv import load_dotenv

# 添加项目根目录到Python路径
project_root = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(project_root)

# 导入数据库管理器
from src.data_processing.db_manager import DatabaseManager

# 加载环境变量
load_dotenv()

## 1. PostgreSQL数据库操作

PostgreSQL用于存储结构化的教育和经济数据。

In [None]:
# 初始化数据库管理器
db_manager = DatabaseManager()

# 连接PostgreSQL
postgres_conn = db_manager.connect_postgres()
print("Successfully connected to PostgreSQL")

In [None]:
# 创建表结构
create_tables_query = """
-- 教育投资数据表
CREATE TABLE IF NOT EXISTS education_investment (
    id SERIAL PRIMARY KEY,
    country_code VARCHAR(3),
    year INTEGER,
    investment_amount DECIMAL,
    gdp_percentage DECIMAL,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);

-- 经济指标数据表
CREATE TABLE IF NOT EXISTS economic_indicators (
    id SERIAL PRIMARY KEY,
    country_code VARCHAR(3),
    year INTEGER,
    gdp_growth DECIMAL,
    employment_rate DECIMAL,
    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
"""

db_manager.execute_postgres_query(create_tables_query)
print("Tables created successfully")

In [None]:
# 插入示例数据
sample_education_data = pd.DataFrame({
    'country_code': ['IRL', 'GBR', 'FRA'],
    'year': [2020, 2020, 2020],
    'investment_amount': [10000, 15000, 12000],
    'gdp_percentage': [4.5, 5.0, 4.8]
})

sample_economic_data = pd.DataFrame({
    'country_code': ['IRL', 'GBR', 'FRA'],
    'year': [2020, 2020, 2020],
    'gdp_growth': [2.5, 1.8, 1.5],
    'employment_rate': [75.0, 76.5, 74.8]
})

# 将数据保存到PostgreSQL
db_manager.save_to_postgres(sample_education_data, 'education_investment')
db_manager.save_to_postgres(sample_economic_data, 'economic_indicators')
print("Sample data inserted successfully")

In [None]:
# 查询示例
query = """
SELECT e.country_code, e.year, e.investment_amount, e.gdp_percentage,
       i.gdp_growth, i.employment_rate
FROM education_investment e
JOIN economic_indicators i
ON e.country_code = i.country_code AND e.year = i.year
ORDER BY e.country_code, e.year;
"""

results_df = db_manager.query_postgres(query)
print("\nQuery Results:")
print(results_df)

## 2. MongoDB数据库操作

MongoDB用于存储非结构化数据，如详细的国家报告和元数据。

In [None]:
# 连接MongoDB
mongo_client = db_manager.connect_mongo()
print("Successfully connected to MongoDB")

In [None]:
# 插入示例文档
country_report = {
    'country_code': 'IRL',
    'year': 2020,
    'report_type': 'education_analysis',
    'content': {
        'summary': 'Detailed analysis of Ireland\'s education system',
        'key_findings': [
            'Increased investment in STEM education',
            'Growing focus on digital skills',
            'Enhanced teacher training programs'
        ],
        'recommendations': [
            'Further increase in R&D funding',
            'Expand international collaboration'
        ],
        'metadata': {
            'author': 'Education Research Team',
            'created_at': '2020-12-01',
            'version': '1.0'
        }
    }
}

db_manager.save_to_mongo('country_reports', country_report)
print("Document inserted successfully")

In [None]:
# 查询MongoDB文档
query = {'country_code': 'IRL', 'year': 2020}
result = db_manager.query_mongo('country_reports', query)

print("\nMongoDB Query Result:")
for doc in result:
    print(f"Country: {doc['country_code']}")
    print(f"Year: {doc['year']}")
    print("\nKey Findings:")
    for finding in doc['content']['key_findings']:
        print(f"- {finding}")
    print("\nRecommendations:")
    for rec in doc['content']['recommendations']:
        print(f"- {rec}")

## 3. 数据库设计说明

### PostgreSQL设计
- **education_investment表**：存储结构化的教育投资数据
  - 包含国家代码、年份、投资金额、GDP占比等字段
  - 使用适当的数据类型和约束确保数据完整性

- **economic_indicators表**：存储经济指标数据
  - 包含GDP增长率、就业率等关键经济指标
  - 与education_investment表通过country_code和year关联

### MongoDB设计
- **country_reports集合**：存储非结构化的国家报告
  - 包含详细的分析报告、建议和元数据
  - 灵活的文档结构，便于存储复杂的层次化数据

### 数据库选择理由
1. **PostgreSQL**：
   - 适合存储结构化数据
   - 支持复杂的SQL查询和分析
   - 强大的数据完整性保证

2. **MongoDB**：
   - 适合存储非结构化和半结构化数据
   - 灵活的文档模型
   - 良好的横向扩展能力