In [1]:
import os
import time  # 新增请求间隔控制
from datetime import datetime

import pandas as pd
import requests


def get_fund_history(fund_code, start_date=None, end_date=None):
    base_url = "http://api.fund.eastmoney.com/f10/lsjz"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
        "Referer": f"http://fundf10.eastmoney.com/jjjz_{fund_code}.html"
    }

    # 修复1：转换日期格式为YYYYMMDD
    def format_date(date_str):
        return date_str.replace("-", "") if date_str else ""

    params = {
        "fundCode": fund_code,
        "pageIndex": 1,
        "pageSize": 100,
        "startDate": format_date(start_date),
        "endDate": format_date(end_date),
        "_": int(datetime.now().timestamp() * 1000)
    }

    all_data = []
    while True:
        try:
            response = requests.get(base_url, headers=headers, params=params)
            response.raise_for_status()
            data = response.json()

            if not data.get("Data", {}).get("LSJZList"):
                break

            current_page_data = data["Data"]["LSJZList"]
            all_data.extend(current_page_data)

            # 修复2：精确分页控制
            total_count = data["TotalCount"]
            fetched_count = params["pageIndex"] * params["pageSize"]
            if fetched_count >= total_count:
                break

            params["pageIndex"] += 1
            time.sleep(1)  # 新增请求间隔，避免触发反爬

        except Exception as e:
            print(f"请求失败: {e}")
            break

    df = pd.DataFrame(all_data)
    if not df.empty:
        df = df[['FSRQ', 'DWJZ', 'LJJZ', 'JZZZL']]
        df.columns = ['净值日期', '单位净值', '累计净值', '日增长率']
        df['净值日期'] = pd.to_datetime(df['净值日期'])

        # 修复3：本地日期过滤（双保险）
        if start_date:
            start_date_dt = pd.to_datetime(start_date)
            df = df[df['净值日期'] >= start_date_dt]
        if end_date:
            end_date_dt = pd.to_datetime(end_date)
            df = df[df['净值日期'] <= end_date_dt]

        df.sort_values('净值日期', inplace=True)
    return df


# 使用示例
if __name__ == "__main__":
    fund_code = "512890"
    start_date = "2020-01-01"
    end_date = "2025-02-18"

    # 多次爬取
    max_attempts = 5
    history_dfs = []

    save_path = r"C:\Users\vincent.fei\Downloads"  # ← 自定义路径示例
    file_name = f"fund_{fund_code}_{start_date}_{end_date}_天天基金.xlsx"
    full_path = os.path.join(save_path, file_name)

    for attempt in range(max_attempts):
        print(f"正在执行第{attempt + 1}次爬取...")
        df = get_fund_history(fund_code, start_date, end_date)
        if not df.empty:
            history_dfs.append(df)
        else:
            print(f"第{attempt + 1}次爬取未获取到数据")
        # 每次爬取间隔
        time.sleep(5)

    if history_dfs:
        combined_df = pd.concat(history_dfs, ignore_index=True)
        # 去重，按日期保留首次出现的数据
        combined_df.drop_duplicates(subset='净值日期', keep='first', inplace=True)
        # 重新排序
        combined_df.sort_values('净值日期', inplace=True)
        # 文本转换为数值
        combined_df[['单位净值', '累计净值', '日增长率']] = combined_df[['单位净值', '累计净值', '日增长率']].apply(pd.to_numeric)

        # 保存到Excel
        with pd.ExcelWriter(full_path, engine='openpyxl') as writer:
            combined_df.to_excel(writer, index=False, sheet_name='历史净值')

        print(f"合并后保存{len(combined_df)}条数据到{full_path}")
    else:
        print("所有爬取尝试均未获取到数据")

正在执行第1次爬取...
正在执行第2次爬取...
正在执行第3次爬取...
正在执行第4次爬取...
正在执行第5次爬取...
合并后保存1000条数据到C:\Users\vincent.fei\Downloads\fund_512890_2020-01-01_2025-02-18_ttjj.xlsx


In [9]:
# 定义本金
principal = 1000

# 定义每周定投函数
def simulate_investment(values_daily, frequency):
    # 按周重采样并取每周最后一个交易日的数据
    weekly_data = values_daily.set_index('净值日期').resample(frequency).last().dropna()
    # 计算购买份额
    weekly_data['购买份额'] = principal / weekly_data['单位净值']
    # 计算总份额
    total_shares = weekly_data['购买份额'].sum()
    # 计算最终资产价值
    final_value = total_shares * weekly_data['单位净值'].iloc[-1]
    # 计算投资期（年）
    years = (weekly_data.index[-1] - weekly_data.index[0]).days / 365
    # 计算年化收益率
    if years == 0:
        annual_return = 0
    else:
        annual_return = (final_value / (principal * len(weekly_data))) ** (1 / years) - 1
    return annual_return

#原数据未复权，从22年1月开始计算
combined_df = combined_df[combined_df['净值日期'] >= '2022-01-01']

# 计算每周定投的年化收益率
weekly_return = simulate_investment(combined_df, 'W')
# 计算每半月定投的年化收益率
biweekly_return = simulate_investment(combined_df, 'SME')
# 计算每月定投的年化收益率
monthly_return = simulate_investment(combined_df, 'ME')

# 输出结果
print(f'每周定投的年化收益率：{weekly_return * 100:.2f}%')
print(f'每半月定投的年化收益率：{biweekly_return * 100:.2f}%')
print(f'每月定投的年化收益率：{monthly_return * 100:.2f}%')

每周定投的年化收益率：5.39%
每半月定投的年化收益率：5.42%
每月定投的年化收益率：5.43%
