# Import Formated Balance Sheet from DAI Data Platform

## 1. Library & Configuration

In [1]:
import dai
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option("display.float_format", lambda x: f"{x:,.2f}")

## 2. Data Collection

In [3]:
sd = '2023-01-01'
ed = '2026-01-01'

In [4]:
company_code = '600010.SH'
report_date = '2024-12-31'
change_type = '0'

In [5]:
sql = f"""

SELECT *
FROM cn_stock_financial_balance_general_pit
WHERE 1=1
AND change_type = {change_type}
AND instrument = '{company_code}'
AND report_date = '{report_date}'

"""

In [6]:
df = dai.query(sql, filters = {'date':[sd, ed]}).df().squeeze()
df.dropna()

date                                                 2025-04-19 00:00:00
instrument                                                     600010.SH
report_date                                          2024-12-31 00:00:00
fs_quarter_index                                                       4
change_type                                                            0
noncurr_liabilities_due_within_1y                      15,132,838,878.60
noncurr_assets_due_within_1y                              731,962,011.11
specific_reserve                                          308,253,111.20
tradable_fin_assets                                        23,205,500.00
right_of_use_assets                                        62,822,160.08
other_payables                                          9,350,099,622.81
other_payables_sum                                      9,353,891,842.28
other_receivables                                         320,321,177.44
other_receivables_sum                              

## 3. Balance Sheet Items Classification

In [7]:
item_dict = {
    "资产": {
        "资产": {
            "流动资产": {
                # 货币及金融类
                "货币资金": df.loc["moneytary_assets"],
                "结算备付金": df.loc["settlment_reserves"],
                "拆出资金": df.loc["loans_to_banks_and_fin_institutions"],
                "交易性金融资产": df.loc["tradable_fin_assets"],
                "衍生金融资产": df.loc["derivatives_fin_assets"],
                "买入返售金融资产": df.loc["fin_assets_purchased_under_resale"],

                # 应收类
                "应收票据": df.loc["notes_receivable"],
                "应收账款": df.loc["accounts_receivable"],
                "应收款项融资": df.loc["receivables_financing"],
                "应收利息": df.loc["interest_receivable"],
                "应收股利": df.loc["dividends_receivable"],
                "其他应收款": df.loc["other_receivables"],

                # 保险专用应收
                "应收保费": df.loc["premiums_receivable"],
                "应收分保账款": df.loc["reinsurance_receivables"],
                "应收分保合同准备金": df.loc["receivable_reinsurance_contract_reserve"],

                # 预付 / 存货 / 合同
                "预付款项": df.loc["prepayments"],
                "存货": df.loc["inventories"],
                "合同资产": df.loc["contract_assets"],

                # 其他流动资产
                "持有待售资产": df.loc["assets_held_for_sale"],
                "一年内到期的非流动资产": df.loc["noncurr_assets_due_within_1y"],
                "其他流动资产": df.loc["other_current_assets"],
            },

            # 合计：放入原表字段
            "流动资产合计": {
                "合计": df.loc["total_current_assets"],
            },

            "非流动资产": {
                # 长期金融资产
                "发放贷款及垫款": df.loc["loans_and_advances"],
                "以摊余成本计量的金融资产": df.loc["fin_assets_by_amortized_cost"],
                "以公允价值计量且其变动计入其他综合收益的金融资产": df.loc["fin_assets_by_fair_value"],
                "可供出售金融资产": df.loc["available_for_sale_fin_assets"],
                "持有至到期投资": df.loc["held_to_maturity_invesments"],
                "债权投资": df.loc["debt_investments"],
                "其他债权投资": df.loc["other_debt_investments"],
                "长期股权投资": df.loc["longterm_equity_investments"],
                "其他权益工具投资": df.loc["other_equity_investments"],
                "其他非流动金融资产": df.loc["other_noncurr_fin_assets"],

                # 长期应收
                "长期应收款": df.loc["longterm_receivables"],

                # 实物与使用权资产
                "投资性房地产": df.loc["investment_property"],
                "固定资产": df.loc["fixed_assets"],
                "在建工程": df.loc["construction_in_progress"],
                "工程物资": df.loc["project_materials"],
                "固定资产清理": df.loc["fixed_assets_disposal"],
                "生产性生物资产": df.loc["productive_biological_assets"],
                "油气资产": df.loc["oil_and_gas_assets"],
                "使用权资产": df.loc["right_of_use_assets"],

                # 无形及递延
                "无形资产": df.loc["intangible_assets"],
                "开发支出": df.loc["development_costs"],
                "商誉": df.loc["goodwill"],
                "长期待摊费用": df.loc["longterm_prepaid_expense"],
                "递延所得税资产": df.loc["deferred_tax_assets"],

                # 其他
                "其他非流动资产": df.loc["other_noncurr_assets"],
            },

            # 合计：放入原表字段
            "非流动资产合计": {
                "合计": df.loc["total_noncurr_assets"],
            },

            # 合计：总资产
            "资产合计": {
                "合计": df.loc["total_assets"],
            },
        }
    },

    "负债和所有者权益": {
        "负债": {
            "流动负债": {
                # 金融负债
                "短期借款": df.loc["shortterm_borrowings"],
                "向中央银行借款": df.loc["borrowing_from_central_bank"],
                "吸收存款及同业存放": df.loc["deposits_from_banks_and_fin_instiutions"],
                "拆入资金": df.loc["loans_from_banks_and_fin_institutions"],
                "交易性金融负债": df.loc["tradable_fin_liabilities"],
                "衍生金融负债": df.loc["derivatives_fin_liabilities"],
                "卖出回购金融资产款": df.loc["fin_assets_sold_under_resale"],
                "应付短期债券": df.loc["shortterm_bonds_payable"],

                # 应付类
                "应付票据": df.loc["notes_payable"],
                "应付账款": df.loc["accounts_payable"],
                "预收款项": df.loc["advances"],
                "合同负债": df.loc["contract_liabilities"],
                "应付手续费及佣金": df.loc["fees_and_commissions_payable"],
                "应付职工薪酬": df.loc["employee_benefits_payable"],
                "应交税费": df.loc["taxes_and_levies_payable"],
                "应付利息": df.loc["interest_payable"],
                "应付股利": df.loc["dividends_payable"],
                "其他应付款": df.loc["other_payables"],

                # 保险 / 证券
                "应付分保账款": df.loc["reinsurance_payables"],
                "保险合同准备金": df.loc["insurance_contract_reserves"],
                "代理买卖证券款": df.loc["acting_trading_payables"],
                "代理承销证券款": df.loc["underwriting_payables"],

                # 其他
                "持有待售负债": df.loc["liabilities_held_for_sale"],
                "一年内到期的非流动负债": df.loc["noncurr_liabilities_due_within_1y"],
                "递延收益-流动负债": df.loc["deferred_income_current_liabilities"],
                "其他流动负债": df.loc["other_current_liabilities"],
            },

            # 合计：放入原表字段
            "流动负债合计": {
                "合计": df.loc["total_current_liabilities"],
            },

            "非流动负债": {
                # 长期金融负债
                "长期借款": df.loc["longterm_borrowings"],
                "应付债券": df.loc["bonds_payable"],
                "永续债": df.loc["perpetual_bonds"],
                "优先股": df.loc["preference_shares"],
                "租赁负债": df.loc["lease_liabilities"],

                # 长期应付
                "长期应付款": df.loc["longterm_payables"],
                "长期应付职工薪酬": df.loc["longterm_employee_benefits"],
                "专项应付款": df.loc["specific_payables"],

                # 预计与递延
                "预计负债": df.loc["provisions"],
                "递延所得税负债": df.loc["deferred_tax_liabilities"],
                "递延收益-非流动负债": df.loc["deferred_income_noncurr_liabilities"],

                # 其他
                "其他非流动负债": df.loc["other_noncurr_liabilities"],
            },

            # 合计：放入原表字段
            "非流动负债合计": {
                "合计": df.loc["total_noncurr_liabilities"],
            },

            # 合计：总负债
            "负债合计": {
                "合计": df.loc["total_liabilities"],
            },
        },

        "所有者权益": {
            "所有者权益": {
                "实收资本或股本": df.loc["share_capital"],
                "资本公积": df.loc["capital_reserves"],
                "库存股": df.loc["treasury_shares"],
                "其他综合收益": df.loc["balance_othcom_income"],
                "其他权益工具": df.loc["other_equity_instruments"],
                "其中优先股": df.loc["preference_of_other_equity_instruments"],
                "专项储备": df.loc["specific_reserve"],
                "盈余公积": df.loc["surplus_reserve"],
                "一般风险准备": df.loc["general_reserve"],
                "未分配利润": df.loc["undistributed_profit"],
                "外币报表折算差额": df.loc["balance_translation_diff_of_foreign_currency"],
                "少数股东权益": df.loc["minority_interests"],
            },

            "所有者权益合计": {
                "合计": df.loc["total_owner_equity"],
            },
        },

        "负债和所有者权益": {
            "负债和所有者权益合计": {
                "合计": df.loc["total_liabilities_and_owner_equity"],
            }
        }
    }
}

## 4. Balance Sheet Generation

In [8]:
def get_df_bs_from_item_dict(item_dict, df):

    blocks = [
        ("资产", "资产", "流动资产"),
        ("资产", "资产", "流动资产合计"),
        ("资产", "资产", "非流动资产"),
        ("资产", "资产", "非流动资产合计"),
        ("资产", "资产", "资产合计"),

        ("负债和所有者权益", "负债", "流动负债"),
        ("负债和所有者权益", "负债", "流动负债合计"),
        ("负债和所有者权益", "负债", "非流动负债"),
        ("负债和所有者权益", "负债", "非流动负债合计"),
        ("负债和所有者权益", "负债", "负债合计"),

        ("负债和所有者权益", "所有者权益", "所有者权益"),
        ("负债和所有者权益", "所有者权益", "所有者权益合计"),

        ("负债和所有者权益", "负债和所有者权益", "负债和所有者权益合计"),
    ]
    block_order = {b: i for i, b in enumerate(blocks)}

    def to_number(x):
        if x is None:
            return None
        if isinstance(x, pd.Series):
            if x.size == 0:
                return None
            v = x.iloc[0]
            return None if pd.isna(v) else float(v)
        if isinstance(x, pd.DataFrame):
            if x.empty:
                return None
            v = x.iloc[0, 0]
            return None if pd.isna(v) else float(v)
        try:
            v = float(x)
            return None if pd.isna(v) else v
        except:
            return None

    rows = []
    values = []

    def walk(d, path):
        if not isinstance(d, dict):
            return
        for k, v in d.items():
            new_path = path + [k]
            if isinstance(v, dict):
                if len(new_path) == 3:
                    l1, l2, l3 = new_path
                    for item_name, item_val in v.items():
                        num = to_number(item_val)
                        if num is None:
                            continue
                        rows.append((l1, l2, l3, item_name))
                        values.append(num)
                else:
                    walk(v, new_path)

    walk(item_dict, [])

    if not rows:
        return pd.DataFrame(columns=["原表数值", "计算合计数值"])

    base = pd.DataFrame(
        {"原表数值": values},
        index=pd.MultiIndex.from_tuples(
            rows,
            names=["一级分类", "二级分类", "三级分类", "项目"]
        )
    )

    idx_df = base.index.to_frame(index=False)
    idx_df["_ord"] = idx_df.apply(
        lambda r: block_order.get((r["一级分类"], r["二级分类"], r["三级分类"]), 10**9),
        axis=1
    )
    idx_df["_seq"] = range(len(idx_df))
    idx_df = idx_df.sort_values(["_ord", "_seq"]).drop(columns=["_ord", "_seq"])
    base = base.loc[pd.MultiIndex.from_frame(idx_df)]

    base["计算合计数值"] = np.nan

    def sum_detail(l1, l2, l3):
        try:
            part = base.xs((l1, l2, l3), level=[0,1,2], drop_level=False)
        except KeyError:
            return None
        detail = part[part.index.get_level_values("项目") != "合计"]
        if detail.empty:
            return None
        return detail["原表数值"].sum()

    ca = sum_detail("资产", "资产", "流动资产")
    nca = sum_detail("资产", "资产", "非流动资产")
    assets = (ca or 0) + (nca or 0)

    cl = sum_detail("负债和所有者权益", "负债", "流动负债")
    ncl = sum_detail("负债和所有者权益", "负债", "非流动负债")
    liab = (cl or 0) + (ncl or 0)

    eq = sum_detail("负债和所有者权益", "所有者权益", "所有者权益")
    le = (liab or 0) + (eq or 0)

    def set_calc(l1, l2, l3, val):
        idx = (l1, l2, l3, "合计")
        if idx in base.index:
            base.loc[idx, "计算合计数值"] = val

    set_calc("资产", "资产", "流动资产合计", ca)
    set_calc("资产", "资产", "非流动资产合计", nca)
    set_calc("资产", "资产", "资产合计", assets)

    set_calc("负债和所有者权益", "负债", "流动负债合计", cl)
    set_calc("负债和所有者权益", "负债", "非流动负债合计", ncl)
    set_calc("负债和所有者权益", "负债", "负债合计", liab)

    set_calc("负债和所有者权益", "所有者权益", "所有者权益合计", eq)
    set_calc("负债和所有者权益", "负债和所有者权益", "负债和所有者权益合计", le)

    header_idx = pd.MultiIndex.from_tuples(
        [
            ("基本信息", "", "", "报表类型"),
            ("基本信息", "", "", "公司代码"),
            ("基本信息", "", "", "报表日期"),
            ("基本信息", "", "", "公告日期"),
        ],
        names=base.index.names
    )

    header_df = pd.DataFrame(
        {
            "原表数值": [
                "资产负债表",
                str(df.loc["instrument"]),
                str(pd.to_datetime(df.loc["report_date"]).date()),
                str(pd.to_datetime(df.loc["date"]).date()),
            ],
            "计算合计数值": [np.nan, np.nan, np.nan, np.nan],
        },
        index=header_idx
    )

    base = pd.concat([header_df, base])
    base = base.replace({pd.NA: np.nan})

    return base

In [9]:
df_bs = get_df_bs_from_item_dict(item_dict, df)
df_bs

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,原表数值,计算合计数值
一级分类,二级分类,三级分类,项目,Unnamed: 4_level_1,Unnamed: 5_level_1
基本信息,,,报表类型,资产负债表,
基本信息,,,公司代码,600010.SH,
基本信息,,,报表日期,2024-12-31,
基本信息,,,公告日期,2025-04-19,
资产,资产,流动资产,货币资金,14088316070.69,
资产,资产,流动资产,交易性金融资产,23205500.00,
资产,资产,流动资产,应收票据,1111063404.09,
资产,资产,流动资产,应收账款,3855735508.90,
资产,资产,流动资产,应收款项融资,4655648927.49,
资产,资产,流动资产,应收股利,9495237.00,


In [10]:
df_bs.to_csv(f"Balance Sheet {company_code} {report_date}.csv")