In [None]:
# ===================================================================
#           数据指挥中心仪表盘 (Data QA Dashboard) v2.4
# ===================================================================
#
# 目的: 修复因数据部分缺失导致的图表显示不全问题，并提供更清晰的诊断信息。
#
# -------------------------------------------------------------------

# 1. 导入必要的库
import pandas as pd
import yaml
from pathlib import Path
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from downloader.cache import cache
from tabulate import tabulate
import pprint
from datetime import datetime, timedelta

# 2. 加载配置和初始化
try:
    with open("config.yaml", "r", encoding="utf-8") as f:
        config = yaml.safe_load(f)
    BASE_PATH = Path(config["storage"]["base_path"])

    EXPECTED_DATA_TYPES = []
    for task in config.get("tasks", []):
        if task.get("type") == "daily" and task.get("enabled", False):
            adjust = task.get("adjust", "none")
            EXPECTED_DATA_TYPES.append(f"daily_{adjust or 'none'}")

    print(f"✅ 成功加载配置，数据根目录: {BASE_PATH.resolve()}")
    print(f"   预期的日线数据类型: {EXPECTED_DATA_TYPES}")
except FileNotFoundError:
    print("❌ 未找到 config.yaml 文件。")
    BASE_PATH = Path("data")
    EXPECTED_DATA_TYPES = ["daily_qfq", "daily_none"]


# --- 核心校验函数 ---
def deep_dive_stock(symbol: str):
    print("\n" + "=" * 20, f"对股票 {symbol} 进行深度钻取", "=" * 20)

    # --- A. 展示元数据 (冷却期检查) ---
    print("\n[元数据与冷却期检查]")
    for data_type in sorted(EXPECTED_DATA_TYPES):
        entity_id = f"{data_type}_{symbol}"
        metadata = cache.get(entity_id)
        print(f"--- 实体: {entity_id} ---")
        if metadata and "last_updated" in metadata:
            last_updated = datetime.fromisoformat(metadata["last_updated"])
            time_ago = datetime.now() - last_updated
            print(
                f"  ✅ 上次更新: {last_updated.strftime('%Y-%m-%d %H:%M:%S')} ({time_ago.total_seconds()/3600:.1f} 小时前)"
            )
        else:
            print(f"  ❌ 未找到有效的更新记录。")

    # --- B. 加载数据文件并预览 ---
    print("\n[数据文件加载与预览]")
    data_frames = {}
    for data_type in sorted(EXPECTED_DATA_TYPES):
        file_path = BASE_PATH / data_type / f"symbol={symbol}" / "data.parquet"
        print(f"\n--- 检查: {data_type} (文件: {file_path}) ---")
        if file_path.exists():
            df = pd.read_parquet(file_path)
            # 创建一个可用于绘图的 datetime 列
            df["trade_date_dt"] = pd.to_datetime(df["trade_date"], format="%Y%m%d")
            data_frames[data_type] = df
            print(f"  ✅ 已加载 {len(df)} 条记录。最近5条:")
            print(
                tabulate(
                    df[["trade_date", "open", "high", "low", "close", "vol"]].tail(5),
                    headers="keys",
                    tablefmt="psql",
                    showindex=False,
                )
            )
        else:
            # ---> 核心改进：清晰地报告缺失 <---
            print(f"  ❌ 未找到数据文件。此类型的数据将不会显示在图表中。")

    if not data_frames:
        print("\n未能加载任何数据文件，无法进行分析。")
        return

    # --- C. 绘制图表对比 ---
    print("\n[图表对比]")
    fig = make_subplots(
        rows=2,
        cols=1,
        shared_xaxes=True,
        vertical_spacing=0.03,
        subplot_titles=("收盘价对比 (close)", "成交量 (vol)"),
        row_heights=[0.8, 0.2],
    )

    # 绘制所有已加载的价格曲线
    if not data_frames:
        print("  - 无数据可供绘制。")
    else:
        for name, df in data_frames.items():
            fig.add_trace(
                go.Scatter(
                    x=df["trade_date_dt"], y=df["close"], name=f"收盘价 ({name})"
                ),
                row=1,
                col=1,
            )

    # ---> 核心改进：更健壮的成交量绘制逻辑 <---
    volume_plotted = False
    # 优先使用不复权数据绘制成交量
    if "daily_none" in data_frames:
        df_none = data_frames["daily_none"]
        fig.add_trace(
            go.Scatter(
                x=df_none["trade_date_dt"],
                y=df_none["vol"],
                name="成交量 (不复权)",
                fill="tozeroy",
                mode="lines",
                line=dict(width=0.5, color="rgba(44, 160, 44, 0.5)"),
            ),
            row=2,
            col=1,
        )
        volume_plotted = True
    # 如果不复权数据不存在，但有其他数据存在，则使用第一个可用的数据绘制成交量
    elif data_frames:
        # 获取字典中的第一个键和值
        fallback_name, fallback_df = next(iter(data_frames.items()))
        fig.add_trace(
            go.Scatter(
                x=fallback_df["trade_date_dt"],
                y=fallback_df["vol"],
                name=f"成交量 ({fallback_name})",
                fill="tozeroy",
                mode="lines",
                line=dict(width=0.5, color="rgba(255, 127, 14, 0.5)"),
            ),  # 使用不同颜色以示区别
            row=2,
            col=1,
        )
        print(
            "  - 提示: 未找到'不复权(daily_none)'数据，成交量图表使用'{}'数据进行绘制。".format(
                fallback_name
            )
        )
        volume_plotted = True

    if not volume_plotted:
        print("  - 无数据可供绘制成交量图表。")

    fig.update_yaxes(title_text="价格", row=1, col=1)
    fig.update_yaxes(title_text="成交量", tickformat=".2s", row=2, col=1)
    fig.update_layout(
        title_text=f"股票 {symbol} - 不同复权类型数据对比",
        xaxis_rangeslider_visible=False,
        height=700,
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    )
    fig.show()


# --- 执行校验 ---
SYMBOL_TO_VERIFY = "600519.SH"
deep_dive_stock(SYMBOL_TO_VERIFY)