# Import Formated Income Statement from DAI Data Platform

## 1. Library & Configuration

In [1]:
import dai
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.max_rows', None)
pd.set_option("display.float_format", lambda x: f"{x:,.2f}")

## 2. Data Collection

In [3]:
sd = '2023-01-01'
ed = '2026-01-01'

In [4]:
company_code = '600010.SH'
report_date = '2024-12-31'
change_type = '0'

In [5]:
sql = f"""

SELECT *
FROM cn_stock_financial_income_general_pit
WHERE 1=1
AND change_type = {change_type}
AND instrument = '{company_code}'
AND report_date = '{report_date}'

"""

In [6]:
df = dai.query(sql, filters = {'date':[sd, ed]}).df().squeeze()
df.dropna()

date                                                 2025-04-19 00:00:00
instrument                                                     600010.SH
report_date                                          2024-12-31 00:00:00
fs_quarter_index                                                       4
change_type                                                            0
continuing_operation_net_profit                            20,748,912.46
othcom_income_cannt_reclass                                -5,421,902.53
othcom_income_reclass                                         136,838.36
credit_impairment_loss                                      4,772,840.30
fair_value_chg_gain                                         3,852,550.00
other_income                                              610,432,924.30
other_equity_instruments_fair_value_chg                    -5,421,902.53
income_othcom_income                                       -5,153,592.02
net_profit                                         

## 3. Income Statement Items Classification

In [7]:
item_dict = {

    "营业总收入": {
        "营业收入": df.loc["operating_revenue"],
        "利息收入": df.loc["interest_income"],
        "已赚保费": df.loc["insurance_premium_income"],
        "手续费及佣金收入": df.loc["fee_and_commission_income"],
    },

    "营业总收入合计": {
        "合计": df.loc["total_operating_revenue"]
    },

    "营业总成本": {
        "营业成本": df.loc["operating_costs"],
        "利息支出": df.loc["interest_costs"],
        "手续费及佣金支出": df.loc["fee_and_commission_costs"],
        "退保金": df.loc["surrenders"],
        "赔付支出净额": df.loc["net_insurance_claims_paid"],
        "提取保险合同准备金净额": df.loc["net_amount_of_insurance_reserve"],
        "保单红利支出": df.loc["expense_on_policy_dividends"],
        "分保费用": df.loc["reinsurance_premium_expense"],
        "税金及附加": df.loc["taxes_and_levies"],
        "销售费用": df.loc["selling_epense"],
        "管理费用": df.loc["administrative_expense"],
        "研发费用": df.loc["research_and_development_expense"],
        "财务费用": df.loc["finance_expense"],
        "资产减值损失": df.loc["asset_impairment_loss"],
        "信用减值损失": df.loc["credit_impairment_loss"],
    },

    "营业总成本合计": {
        "合计": df.loc["total_operating_costs"]
    },

    "其他经营收益": {
        "公允价值变动收益": df.loc["fair_value_chg_gain"],
        "投资收益": df.loc["invest_income"],
        "对联营企业和合营企业的投资收益": df.loc["invest_income_of_jv_and_associates"],
        "资产处置收益": df.loc["asset_disposal_income"],
        "其他收益": df.loc["other_income"],
    },

    "营业利润合计": {
        "合计": df.loc["operating_profit"]
    },

    "营业外收支": {
        "营业外收入": df.loc["nonoperating_income"],
        "营业外支出": df.loc["nonoperating_costs"],
    },

    "利润总额合计": {
        "合计": df.loc["total_profit"]
    },

    "所得税费用": {
        "所得税费用": df.loc["income_tax_expense"]
    },

    "净利润": {
        "净利润": df.loc["net_profit"],
        "归属于母公司所有者的净利润": df.loc["net_profit_to_parent_shareholders"],
        "少数股东损益": df.loc["net_profit_to_minority"],
    },

    "其他综合收益": {
        "其他综合收益": df.loc["income_othcom_income"],
        "归属于母公司所有者的其他综合收益": df.loc["othcom_income_to_parent_shareholders"],
        "归属于少数股东的其他综合收益": df.loc["othcom_income_to_minority"],
    },

    "综合收益总额": {
        "综合收益总额": df.loc["total_comprehensive_income"],
        "归属于母公司股东的综合收益总额": df.loc["total_comprehensive_income_to_parent_shareholders"],
    }
}


## 4. Income Statement Generation

In [8]:
def get_df_is_from_item_dict(item_dict_is, df):

    blocks = [
        "基本信息",
        "营业总收入",
        "营业总收入合计",
        "营业总成本",
        "营业总成本合计",
        "其他经营收益",
        "营业利润合计",
        "营业外收支",
        "利润总额合计",
        "所得税费用",
        "净利润",
        "其他综合收益",
        "综合收益总额",
    ]
    block_order = {b: i for i, b in enumerate(blocks)}

    def to_number(x):
        if x is None:
            return None
        if isinstance(x, pd.Series):
            if x.size == 0:
                return None
            v = x.iloc[0]
            return None if pd.isna(v) else float(v)
        if isinstance(x, pd.DataFrame):
            if x.empty:
                return None
            v = x.iloc[0, 0]
            return None if pd.isna(v) else float(v)
        try:
            v = float(x)
            return None if pd.isna(v) else v
        except Exception:
            return None

    rows, vals = [], []

    for cat, items in item_dict_is.items():
        if not isinstance(items, dict):
            continue
        for item_name, item_val in items.items():
            num = to_number(item_val)
            if num is None:
                continue
            rows.append((cat, item_name))
            vals.append(num)

    if not rows:
        return pd.DataFrame(columns=["加法数值", "减法数值", "计算值"])

    base = pd.DataFrame(
        {"原表数值": vals},
        index=pd.MultiIndex.from_tuples(rows, names=["分类", "项目"])
    )

    parent = ("其他经营收益", "投资收益")
    child = ("其他经营收益", "对联营企业和合营企业的投资收益")
    if parent in base.index and not pd.isna(base.loc[parent, "原表数值"]):
        if child in base.index:
            base = base.drop(index=child)

    idx_df = base.index.to_frame(index=False)
    idx_df["_ord"] = idx_df["分类"].map(lambda x: block_order.get(x, 10**9))
    idx_df["_seq"] = range(len(idx_df))
    idx_df = idx_df.sort_values(["_ord", "_seq"]).drop(columns=["_ord", "_seq"])
    base = base.loc[pd.MultiIndex.from_frame(idx_df)]

    base["计算值"] = np.nan

    def v(cat, item):
        idx = (cat, item)
        if idx not in base.index:
            return None
        x = base.loc[idx, "原表数值"]
        return None if pd.isna(x) else float(x)

    def set_calc(cat, item, val):
        idx = (cat, item)
        if idx in base.index and val is not None and not pd.isna(val):
            base.loc[idx, "计算值"] = float(val)

    def sum_cat(cat, exclude_item="合计"):
        if cat not in base.index.get_level_values("分类"):
            return None
        part = base.xs(cat, level="分类", drop_level=False)
        detail = part[part.index.get_level_values("项目") != exclude_item]
        if detail.empty:
            return None
        s = detail["原表数值"].sum()
        return None if pd.isna(s) else float(s)

    set_calc("营业总收入合计", "合计", sum_cat("营业总收入"))
    set_calc("营业总成本合计", "合计", sum_cat("营业总成本"))

    def sum_other_gain_dedup():
        cat = "其他经营收益"
        if cat not in base.index.get_level_values("分类"):
            return None

        def g(item):
            return v(cat, item)

        s, ok = 0.0, False

        def add(x):
            nonlocal s, ok
            if x is None:
                return
            ok = True
            s += x

        add(g("公允价值变动收益"))
        inv = g("投资收益")
        inv_jv = g("对联营企业和合营企业的投资收益")
        add(inv if inv is not None else inv_jv)
        add(g("摊余成本金融资产终止确认收益"))
        add(g("净敞口套期收益"))
        add(g("汇兑收益"))
        add(g("资产处置收益"))
        add(g("其他收益"))

        return None if not ok else s

    other_gain = sum_other_gain_dedup()

    total_rev = v("营业总收入合计", "合计")
    total_cost = v("营业总成本合计", "合计")
    if total_rev is not None and total_cost is not None:
        set_calc("营业利润合计", "合计", total_rev - total_cost + (0.0 if other_gain is None else other_gain))

    def nonop_net():
        cat = "营业外收支"
        if cat not in base.index.get_level_values("分类"):
            return None
        part = base.xs(cat, level="分类", drop_level=False)
        if part.empty:
            return None

        s_in, s_out, ok = 0.0, 0.0, False
        for (c, item), r in part.iterrows():
            x = r["原表数值"]
            if pd.isna(x):
                continue
            ok = True
            x = float(x)
            if ("收入" in item) or ("利得" in item):
                s_in += x
            elif ("支出" in item) or ("损失" in item):
                s_out += x
            else:
                s_in += x
        return None if not ok else (s_in - s_out)

    op_profit_orig = v("营业利润合计", "合计")
    net_nonop = nonop_net()
    if op_profit_orig is not None:
        set_calc("利润总额合计", "合计", op_profit_orig + (0.0 if net_nonop is None else net_nonop))

    tp = v("利润总额合计", "合计")
    tax = v("所得税费用", "所得税费用")
    if tp is not None and tax is not None:
        set_calc("净利润", "净利润", tp - tax)

    oci_parent = v("其他综合收益", "归属于母公司所有者的其他综合收益")
    oci_min = v("其他综合收益", "归属于少数股东的其他综合收益")
    if oci_parent is not None and oci_min is not None:
        set_calc("其他综合收益", "其他综合收益", oci_parent + oci_min)

    np_total = v("净利润", "净利润")
    oci_total = v("其他综合收益", "其他综合收益")
    if np_total is not None and oci_total is not None:
        set_calc("综合收益总额", "综合收益总额", np_total + oci_total)

    np_parent = v("净利润", "归属于母公司所有者的净利润")
    if np_parent is not None and oci_parent is not None:
        set_calc("综合收益总额", "归属于母公司股东的综合收益总额", np_parent + oci_parent)

    instrument = str(df.loc["instrument"])
    report_date = str(pd.to_datetime(df.loc["report_date"]).date())
    announce_date = str(pd.to_datetime(df.loc["date"]).date())

    header = pd.DataFrame(
        {"原表数值": ["利润表", instrument, report_date, announce_date],
         "计算值": [np.nan, np.nan, np.nan, np.nan]},
        index=pd.MultiIndex.from_tuples(
            [
                ("基本信息", "报表类型"),
                ("基本信息", "公司代码"),
                ("基本信息", "报表日期"),
                ("基本信息", "公告日期"),
            ],
            names=base.index.names
        )
    )

    out = pd.concat([header, base]).replace({pd.NA: np.nan})

    NEG_KW = ("成本", "费用", "支出", "损失", "税", "减值")

    def is_subtractive(cat, item):
        if cat == "基本信息":
            return False
        if any(k in cat for k in NEG_KW):
            return True
        if any(k in item for k in NEG_KW):
            return True
        return False

    add_vals, sub_vals = [], []
    for (cat, item), r in out.iterrows():
        val = r["原表数值"]
        if pd.isna(val):
            add_vals.append(np.nan)
            sub_vals.append(np.nan)
            continue

        if cat == "基本信息":
            add_vals.append(val)
            sub_vals.append(np.nan)
            continue

        if is_subtractive(cat, item):
            add_vals.append(np.nan)
            sub_vals.append(abs(float(val)))
        else:
            add_vals.append(float(val))
            sub_vals.append(np.nan)

    out["加法数值"] = add_vals
    out["减法数值"] = sub_vals

    out = out.drop(columns=["原表数值"])
    out = out[["加法数值", "减法数值", "计算值"]]

    return out

In [9]:
df_is = get_df_is_from_item_dict(item_dict, df)
df_is

Unnamed: 0_level_0,Unnamed: 1_level_0,加法数值,减法数值,计算值
分类,项目,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
基本信息,报表类型,利润表,,
基本信息,公司代码,600010.SH,,
基本信息,报表日期,2024-12-31,,
基本信息,公告日期,2025-04-19,,
营业总收入,营业收入,68089440629.67,,
营业总收入合计,合计,68089440629.67,,68089440629.67
营业总成本,营业成本,,62726063636.55,
营业总成本,税金及附加,,1482232703.79,
营业总成本,销售费用,,236254917.57,
营业总成本,管理费用,,1390661943.61,


In [10]:
df_is.to_csv(f"Income Statement {company_code} {report_date}.csv")