In [None]:
# ===================================================================
#           数据指挥中心仪表盘 (Data QA Dashboard) v3.1
# ===================================================================
#
# 目的: 智能适应不同数据文件的Schema，修复KeyError，提供更健壮的校验。
#
# -------------------------------------------------------------------

# 1. 导入必要的库
import pandas as pd
import yaml
from pathlib import Path
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from tabulate import tabulate
import pprint
from datetime import datetime

# 2. 加载配置
try:
    with open('config.yaml', 'r', encoding='utf-8') as f:
        config = yaml.safe_load(f)
    BASE_PATH = Path(config['storage']['base_path'])
    EXPECTED_DATA_TYPES = [f"daily_{task.get('adjust', 'none')}" for task in config.get('tasks', []) if task.get('type') == 'daily']
    EXPECTED_DATA_TYPES.extend([task.get('type') for task in config.get('tasks', []) if task.get('type') != 'daily' and task.get('enabled')])
    print(f"✅ 成功加载配置，数据根目录: {BASE_PATH.resolve()}")
    print(f"   预期的任务数据类型: {sorted(list(set(EXPECTED_DATA_TYPES)))}")
except FileNotFoundError:
    print("❌ 未找到 config.yaml 文件。")
    BASE_PATH = Path("data")
    EXPECTED_DATA_TYPES = ['daily_qfq', 'daily_none', 'daily_basic', 'stock_list']


# --- 核心校验函数 ---
def deep_dive_stock(symbol: str):
    print("\n" + "="*20, f"对股票 {symbol} 进行深度钻取", "="*20)
    
    # --- A. 加载所有相关数据文件 ---
    print("\n[数据文件加载]")
    data_frames = {}
    
    # 动态查找该 symbol 存在的所有数据类型
    symbol_dirs = list(BASE_PATH.glob(f"*/entity={symbol}"))
    if not symbol_dirs:
        print(f"未找到股票 {symbol} 的任何数据目录。")
        return
        
    for symbol_dir in symbol_dirs:
        data_type = symbol_dir.parent.name
        file_path = symbol_dir / "data.parquet"
        if file_path.exists():
            df = pd.read_parquet(file_path)
            data_frames[data_type] = df
            print(f"  ✅ 已加载 '{data_type}' 数据，共 {len(df)} 条记录。")

    if not data_frames:
        print("\n未能加载任何数据文件，无法进行分析。")
        return

    # --- B. 为每个DataFrame生成独立的、智能的预览和统计 ---
    for name, df in data_frames.items():
        print("\n" + "-"*15, f" {name} 数据详情 ", "-"*15)
        
        # --- 智能统计 ---
        print("\n[关键列统计数据]")
        # 定义我们希望统计的列
        stats_cols_to_check = ['open', 'high', 'low', 'close', 'vol', 'amount', 'pe', 'pe_ttm', 'pb', 'ps', 'total_mv']
        # 找出当前DataFrame中实际存在的、可供统计的列
        existing_stats_cols = [col for col in stats_cols_to_check if col in df.columns]
        if existing_stats_cols:
            stats_df = df[existing_stats_cols].describe()
            print(tabulate(stats_df, headers='keys', tablefmt='psql'))
        else:
            print("  - 未找到可供统计的关键列。")

        # --- 智能预览 ---
        print("\n[最近5条数据预览]")
        # 定义我们希望预览的列
        preview_cols_to_check = ['trade_date', 'ts_code', 'open', 'high', 'low', 'close', 'vol', 'pe_ttm', 'pb', 'total_mv']
        existing_preview_cols = [col for col in preview_cols_to_check if col in df.columns]
        if existing_preview_cols:
            print(tabulate(df[existing_preview_cols].tail(5), headers='keys', tablefmt='psql', showindex=False))
        else:
            print("  - 无法生成预览。")

    # --- C. 绘制图表对比 ---
    print("\n[图表对比]")
    fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.03,
                        subplot_titles=('收盘价对比 (close)', '成交量 (vol)'), row_heights=[0.8, 0.2])

    for name, df in data_frames.items():
        # 确保 DataFrame 有 trade_date 和 close 列才绘制
        if 'trade_date' in df.columns and 'close' in df.columns:
            df['trade_date_dt'] = pd.to_datetime(df['trade_date'], format='%Y%m%d')
            fig.add_trace(go.Scatter(x=df['trade_date_dt'], y=df['close'], name=f'收盘价 ({name})'), row=1, col=1)
    
    # 健壮的成交量绘制逻辑
    volume_source_df = None
    volume_source_name = ""
    if 'daily_none' in data_frames and 'vol' in data_frames['daily_none'].columns:
        volume_source_df = data_frames['daily_none']
        volume_source_name = 'daily_none'
    elif 'daily_qfq' in data_frames and 'vol' in data_frames['daily_qfq'].columns:
        volume_source_df = data_frames['daily_qfq']
        volume_source_name = 'daily_qfq'
        
    if volume_source_df is not None:
        if 'trade_date_dt' not in volume_source_df.columns:
            volume_source_df['trade_date_dt'] = pd.to_datetime(volume_source_df['trade_date'], format='%Y%m%d')
        fig.add_trace(go.Scatter(x=volume_source_df['trade_date_dt'], y=volume_source_df['vol'], name=f'成交量 ({volume_source_name})',
                                 fill='tozeroy', mode='lines', line=dict(width=0.5, color='rgba(44, 160, 44, 0.5)')),
                      row=2, col=1)
        if volume_source_name != 'daily_none':
             print(f"  - 提示: 未找到'不复权'数据，成交量图表使用'{volume_source_name}'数据绘制。")
    
    fig.update_yaxes(title_text="价格", row=1, col=1)
    fig.update_yaxes(title_text="成交量", tickformat=".2s", row=2, col=1)
    fig.update_layout(title_text=f'股票 {symbol} - 数据对比',
                      xaxis_rangeslider_visible=False, height=700,
                      legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1))
    fig.show()


# --- 执行校验 ---
SYMBOL_TO_VERIFY = "600519.SH" 
deep_dive_stock(SYMBOL_TO_VERIFY)

ModuleNotFoundError: No module named 'downloader.cache'