In [2]:
!python3 --version

Python 3.9.6


In [3]:
import sys
sys.executable, sys.version

('/Library/Developer/CommandLineTools/usr/bin/python3',
 '3.9.6 (default, Dec  2 2025, 07:27:58) \n[Clang 17.0.0 (clang-1700.6.3.2)]')

In [4]:
# Step 1/4: 在当前 Notebook 内核中安装依赖（必须用 sys.executable）
import sys

print("Kernel python:", sys.executable)
print("Kernel version:", sys.version)

!{sys.executable} -m pip install -U pip
!{sys.executable} -m pip install akshare pandas openpyxl

Kernel python: /Library/Developer/CommandLineTools/usr/bin/python3
Kernel version: 3.9.6 (default, Dec  2 2025, 07:27:58) 
[Clang 17.0.0 (clang-1700.6.3.2)]
Defaulting to user installation because normal site-packages is not writeable
Collecting pip
  Downloading pip-25.3-py3-none-any.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 497 kB/s eta 0:00:01
[?25hInstalling collected packages: pip
Successfully installed pip-25.3
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Defaulting to user installation because normal site-packages is not writeable


In [12]:
import akshare as ak
import pandas as pd
from pathlib import Path
import numpy as np

# ====== 参数区 ======
symbol = "SZ300673"           # 改成你要的
include_half_year = "2025-06-30"
years_back = 10
class_filter = "按产品"       # 可改：按行业 / 按地区 / 按产品

# ====== 1) 拉取数据 ======
df = ak.stock_zygc_em(symbol=symbol).copy()
df["报告日期"] = pd.to_datetime(df["报告日期"], errors="coerce")

# ====== 2) 分类筛选 ======
df = df[df["分类类型"].fillna("").str.contains(class_filter)].copy()

# ====== 3) 最近10个 12-31 年报 + 指定 2025-06-30 ======
annual = df[(df["报告日期"].dt.month == 12) & (df["报告日期"].dt.day == 31)].copy()
annual_years = sorted(annual["报告日期"].dt.year.dropna().unique(), reverse=True)[:years_back]
annual = annual[annual["报告日期"].dt.year.isin(annual_years)].copy()

half = df[df["报告日期"] == pd.to_datetime(include_half_year)].copy()

df_sel = pd.concat([half, annual], ignore_index=True).drop_duplicates().copy()

# ====== 4) 数值列处理 ======
money_cols = ["主营收入", "主营成本", "主营利润"]
ratio_cols  = ["收入比例", "成本比例", "利润比例", "毛利率"]

for c in money_cols:
    if c in df_sel.columns:
        df_sel[c] = pd.to_numeric(df_sel[c], errors="coerce")

for c in ratio_cols:
    if c in df_sel.columns:
        df_sel[c] = pd.to_numeric(df_sel[c], errors="coerce")

# 金额统一到“元”整数（保留到元）
for c in money_cols:
    if c in df_sel.columns:
        df_sel[c] = df_sel[c].round(0)

# ====== 5) 生成三列“验证” ======
# 验证收入占比 = 本行主营收入 / 同一报告日期（按产品分类）主营收入合计
if "主营收入" in df_sel.columns:
    total_rev = df_sel.groupby("报告日期")["主营收入"].transform("sum")
    df_sel["验证收入占比"] = np.where(total_rev.notna() & (total_rev != 0), df_sel["主营收入"] / total_rev, np.nan)
else:
    df_sel["验证收入占比"] = np.nan

# 验证毛利率 = (主营收入-主营成本)/主营收入（如果缺成本就用 主营利润/主营收入）
def calc_verify_gm(row):
    rev = row.get("主营收入", np.nan)
    cost = row.get("主营成本", np.nan)
    prof = row.get("主营利润", np.nan)
    if pd.isna(rev) or rev == 0:
        return np.nan
    if pd.notna(cost):
        return (rev - cost) / rev
    if pd.notna(prof):
        return prof / rev
    return np.nan

df_sel["验证毛利率"] = df_sel.apply(calc_verify_gm, axis=1)

# 验证毛利占比 = 本行主营利润 / 同一报告日期主营利润合计（无利润则 NaN）
if "主营利润" in df_sel.columns:
    total_gp = df_sel.groupby("报告日期")["主营利润"].transform("sum")
    df_sel["验证毛利占比"] = np.where(total_gp.notna() & (total_gp != 0), df_sel["主营利润"] / total_gp, np.nan)
else:
    df_sel["验证毛利占比"] = np.nan

# ====== 6) 排序 ======
df_sel = df_sel.sort_values(["报告日期", "主营收入"], ascending=[False, False]).copy()

# ====== 7) 报告日期去重显示（同一日期只显示第一行） ======
df_sel["报告日期_显示"] = df_sel["报告日期"].dt.strftime("%Y-%m-%d")
mask_repeat = df_sel["报告日期_显示"].eq(df_sel["报告日期_显示"].shift(1))
df_sel.loc[mask_repeat, "报告日期_显示"] = ""

# ====== 8) 百分比列格式化（两位小数） ======
pct_cols = ["收入比例", "成本比例", "利润比例", "毛利率", "验证收入占比", "验证毛利率", "验证毛利占比"]

def to_pct(x):
    if pd.isna(x):
        return ""
    return f"{x*100:.2f}%"

for c in pct_cols:
    if c in df_sel.columns:
        df_sel[c] = df_sel[c].map(to_pct)

# ====== 9) 输出列顺序 ======
cols = [
    "股票代码", "报告日期_显示", "分类类型", "主营构成",
    "主营收入", "收入比例", "验证收入占比",
    "主营成本", "成本比例",
    "主营利润", "利润比例", "验证毛利占比",
    "毛利率", "验证毛利率",
]
cols = [c for c in cols if c in df_sel.columns]
out = df_sel[cols].copy()

# ====== 10) 仅导出 Excel（不输出 head，不导出 CSV） ======
out_dir = Path.home() / "Downloads" / "akshare_exports"
out_dir.mkdir(parents=True, exist_ok=True)

xlsx_path = out_dir / f"{symbol}_主营构成_{class_filter}_最近{years_back}年报+{include_half_year}_元_百分比+校验_去重日期.xlsx"
out.to_excel(xlsx_path, index=False)

print("rows:", len(out))
print("Saved xlsx:", xlsx_path)

rows: 45
Saved xlsx: /Users/sam/Downloads/akshare_exports/SZ300673_主营构成_按产品_最近10年报+2025-06-30_元_百分比+校验_去重日期.xlsx


In [13]:
import akshare as ak
import pandas as pd
from pathlib import Path
import numpy as np

# ====== 参数区 ======
symbol = "SZ300673"           # 改成你要的
include_half_year = "2025-06-30"
years_back = 10
class_filter = "按产品"       # 可改：按行业 / 按地区 / 按产品

# ====== 1) 拉取数据 ======
df = ak.stock_zygc_em(symbol=symbol).copy()
df["报告日期"] = pd.to_datetime(df["报告日期"], errors="coerce")

# ====== 2) 分类筛选 ======
df = df[df["分类类型"].fillna("").str.contains(class_filter)].copy()

# ====== 3) 最近10个 12-31 年报 + 指定 2025-06-30 ======
annual = df[(df["报告日期"].dt.month == 12) & (df["报告日期"].dt.day == 31)].copy()
annual_years = sorted(annual["报告日期"].dt.year.dropna().unique(), reverse=True)[:years_back]
annual = annual[annual["报告日期"].dt.year.isin(annual_years)].copy()

half = df[df["报告日期"] == pd.to_datetime(include_half_year)].copy()

df_sel = pd.concat([half, annual], ignore_index=True).drop_duplicates().copy()

# ====== 4) 数值列处理 ======
money_cols = ["主营收入", "主营成本", "主营利润"]
ratio_cols  = ["收入比例", "成本比例", "利润比例", "毛利率"]

for c in money_cols + ratio_cols:
    if c in df_sel.columns:
        df_sel[c] = pd.to_numeric(df_sel[c], errors="coerce")

# 金额保留到“元”
for c in money_cols:
    if c in df_sel.columns:
        df_sel[c] = df_sel[c].round(0)

# ====== 5) 生成三列“验证”（数值态，稍后再格式化成百分比字符串） ======
if "主营收入" in df_sel.columns:
    total_rev = df_sel.groupby("报告日期")["主营收入"].transform("sum")
    df_sel["验证收入占比"] = np.where(total_rev.notna() & (total_rev != 0), df_sel["主营收入"] / total_rev, np.nan)
else:
    df_sel["验证收入占比"] = np.nan

def calc_verify_gm(row):
    rev = row.get("主营收入", np.nan)
    cost = row.get("主营成本", np.nan)
    prof = row.get("主营利润", np.nan)
    if pd.isna(rev) or rev == 0:
        return np.nan
    if pd.notna(cost):
        return (rev - cost) / rev
    if pd.notna(prof):
        return prof / rev
    return np.nan

df_sel["验证毛利率"] = df_sel.apply(calc_verify_gm, axis=1)

if "主营利润" in df_sel.columns:
    total_gp = df_sel.groupby("报告日期")["主营利润"].transform("sum")
    df_sel["验证毛利占比"] = np.where(total_gp.notna() & (total_gp != 0), df_sel["主营利润"] / total_gp, np.nan)
else:
    df_sel["验证毛利占比"] = np.nan

# ====== 6) 生成“合计行”（每个报告日期一行） ======
# 合计行：主营构成=合计；金额列=sum；比例列=1；毛利率按合计口径计算；验证列也=1
group_keys = ["报告日期", "分类类型"]
sum_map = {c: "sum" for c in money_cols if c in df_sel.columns}

totals = df_sel.groupby(group_keys, as_index=False).agg(sum_map)

totals["股票代码"] = df_sel["股票代码"].iloc[0] if "股票代码" in df_sel.columns else ""
totals["主营构成"] = "合计"

# 比例列统一设为 1（100%）
for c in ["收入比例", "成本比例", "利润比例", "验证收入占比", "验证毛利占比"]:
    totals[c] = 1.0

# 合计毛利率：优先用(合计收入-合计成本)/合计收入，否则用 合计利润/合计收入
def calc_total_gm(row):
    rev = row.get("主营收入", np.nan)
    cost = row.get("主营成本", np.nan)
    prof = row.get("主营利润", np.nan)
    if pd.isna(rev) or rev == 0:
        return np.nan
    if pd.notna(cost):
        return (rev - cost) / rev
    if pd.notna(prof):
        return prof / rev
    return np.nan

totals["毛利率"] = totals.apply(calc_total_gm, axis=1)
totals["验证毛利率"] = totals["毛利率"]

# ====== 7) 合并明细 + 合计，并排序（让合计在每个日期最后） ======
df_sel["__is_total__"] = 0
totals["__is_total__"] = 1

df_all = pd.concat([df_sel, totals], ignore_index=True)

# 同一日期：先明细（收入大到小），最后合计
df_all = df_all.sort_values(
    ["报告日期", "__is_total__", "主营收入"],
    ascending=[False, True, False]
).copy()

# ====== 8) 报告日期去重显示（同一日期只显示第一行；合计行也不重复） ======
df_all["报告日期_显示"] = df_all["报告日期"].dt.strftime("%Y-%m-%d")
mask_repeat = df_all["报告日期_显示"].eq(df_all["报告日期_显示"].shift(1))
df_all.loc[mask_repeat, "报告日期_显示"] = ""

# ====== 9) 百分比列格式化（两位小数） ======
pct_cols = ["收入比例", "成本比例", "利润比例", "毛利率", "验证收入占比", "验证毛利率", "验证毛利占比"]

def to_pct(x):
    if pd.isna(x):
        return ""
    return f"{x*100:.2f}%"

for c in pct_cols:
    if c in df_all.columns:
        df_all[c] = df_all[c].map(to_pct)

# ====== 10) 输出列顺序 ======
cols = [
    "股票代码", "报告日期_显示", "分类类型", "主营构成",
    "主营收入", "收入比例", "验证收入占比",
    "主营成本", "成本比例",
    "主营利润", "利润比例", "验证毛利占比",
    "毛利率", "验证毛利率",
]
cols = [c for c in cols if c in df_all.columns]
out = df_all[cols].copy()

# ====== 11) 仅导出 Excel ======
out_dir = Path.home() / "Downloads" / "akshare_exports"
out_dir.mkdir(parents=True, exist_ok=True)

xlsx_path = out_dir / f"{symbol}_主营构成_{class_filter}_最近{years_back}年报+{include_half_year}_元_百分比+校验_每期合计_去重日期.xlsx"
out.to_excel(xlsx_path, index=False)

print("rows:", len(out))
print("Saved xlsx:", xlsx_path)

rows: 56
Saved xlsx: /Users/sam/Downloads/akshare_exports/SZ300673_主营构成_按产品_最近10年报+2025-06-30_元_百分比+校验_每期合计_去重日期.xlsx


In [14]:
import akshare as ak
import pandas as pd
import numpy as np
from pathlib import Path
from openpyxl.utils import get_column_letter
from openpyxl.styles import Font
from openpyxl.worksheet.table import Table, TableStyleInfo

# ====== 参数 ======
symbol = "SZ300673"          # 改
include_half_year = "2025-06-30"
years_back = 10
class_filter = "按产品"      # 按产品 / 按行业 / 按地区
out_dir = Path.home() / "Downloads" / "akshare_exports"
out_dir.mkdir(parents=True, exist_ok=True)

# ====== 1) 拉数据 + 筛选期数 ======
df = ak.stock_zygc_em(symbol=symbol).copy()
df["报告日期"] = pd.to_datetime(df["报告日期"], errors="coerce")
df = df[df["分类类型"].fillna("").str.contains(class_filter)].copy()

annual = df[(df["报告日期"].dt.month == 12) & (df["报告日期"].dt.day == 31)].copy()
years = sorted(annual["报告日期"].dt.year.dropna().unique(), reverse=True)[:years_back]
annual = annual[annual["报告日期"].dt.year.isin(years)].copy()

half = df[df["报告日期"] == pd.to_datetime(include_half_year)].copy()
df = pd.concat([half, annual], ignore_index=True).drop_duplicates().copy()

# ====== 2) 数值列 + 校验列（数值态） ======
money = ["主营收入", "主营成本", "主营利润"]
ratio = ["收入比例", "成本比例", "利润比例", "毛利率"]

for c in money + ratio:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

for c in money:
    if c in df.columns:
        df[c] = df[c].round(0)

# 校验：收入占比、毛利率、毛利占比
tot_rev = df.groupby("报告日期")["主营收入"].transform("sum") if "主营收入" in df.columns else np.nan
tot_gp  = df.groupby("报告日期")["主营利润"].transform("sum") if "主营利润" in df.columns else np.nan

df["验证收入占比"] = np.where(pd.notna(tot_rev) & (tot_rev != 0), df["主营收入"] / tot_rev, np.nan)

def gm(row):
    rev, cost, prof = row.get("主营收入", np.nan), row.get("主营成本", np.nan), row.get("主营利润", np.nan)
    if pd.isna(rev) or rev == 0:
        return np.nan
    if pd.notna(cost):
        return (rev - cost) / rev
    if pd.notna(prof):
        return prof / rev
    return np.nan

df["验证毛利率"] = df.apply(gm, axis=1)
df["验证毛利占比"] = np.where(pd.notna(tot_gp) & (tot_gp != 0), df["主营利润"] / tot_gp, np.nan)

# ====== 3) 每期合计行 ======
sum_map = {c: "sum" for c in money if c in df.columns}
tot = df.groupby(["报告日期", "分类类型"], as_index=False).agg(sum_map)
tot["股票代码"] = df["股票代码"].iloc[0] if "股票代码" in df.columns else ""
tot["主营构成"] = "合计"
for c in ["收入比例", "成本比例", "利润比例", "验证收入占比", "验证毛利占比"]:
    tot[c] = 1.0
tot["毛利率"] = tot.apply(gm, axis=1)
tot["验证毛利率"] = tot["毛利率"]

df["__is_total__"] = 0
tot["__is_total__"] = 1
df = pd.concat([df, tot], ignore_index=True)

# ====== 4) 输出列 + 排序（日期每行都显示） ======
df["报告日期_显示"] = df["报告日期"].dt.strftime("%Y-%m-%d")

df = df.sort_values(["报告日期", "__is_total__", "主营收入"], ascending=[False, True, False]).copy()

# 百分比格式（保留两位小数）
pct_cols = ["收入比例", "成本比例", "利润比例", "毛利率", "验证收入占比", "验证毛利率", "验证毛利占比"]
for c in pct_cols:
    if c in df.columns:
        df[c] = df[c].map(lambda x: "" if pd.isna(x) else f"{x*100:.2f}%")

cols = [
    "股票代码", "报告日期_显示", "分类类型", "主营构成",
    "主营收入", "收入比例", "验证收入占比",
    "主营成本", "成本比例",
    "主营利润", "利润比例", "验证毛利占比",
    "毛利率", "验证毛利率",
]
cols = [c for c in cols if c in df.columns]
out = df[cols].copy()

# ====== 5) 导出 Excel，并给表头加筛选（Excel Table 自带筛选） ======
xlsx_path = out_dir / f"{symbol}_主营构成_{class_filter}_最近{years_back}年报+{include_half_year}_元_含合计_可筛选.xlsx"
sheet_name = "主营构成"

with pd.ExcelWriter(xlsx_path, engine="openpyxl") as writer:
    out.to_excel(writer, index=False, sheet_name=sheet_name)
    ws = writer.book[sheet_name]

    # 冻结首行
    ws.freeze_panes = "A2"

    # 设置列宽（粗略）
    for i, col in enumerate(out.columns, 1):
        ws.column_dimensions[get_column_letter(i)].width = max(12, min(30, len(str(col)) + 6))

    # 转成 Excel Table（自带筛选箭头）
    nrows, ncols = out.shape
    table_ref = f"A1:{get_column_letter(ncols)}{nrows+1}"
    tab = Table(displayName="ZYGCTable", ref=table_ref)
    style = TableStyleInfo(name="TableStyleMedium9", showFirstColumn=False,
                           showLastColumn=False, showRowStripes=True, showColumnStripes=False)
    tab.tableStyleInfo = style
    ws.add_table(tab)

print("rows:", len(out))
print("Saved xlsx:", xlsx_path)

rows: 56
Saved xlsx: /Users/sam/Downloads/akshare_exports/SZ300673_主营构成_按产品_最近10年报+2025-06-30_元_含合计_可筛选.xlsx
