In [5]:
import random
import string
import time
import base64
from hashlib import sha256
from hmac import HMAC
import requests
import pandas as pd

# 配置API的APPID和SECRET
APPID = "tubmafwrzhpgfiuf"
SECRET = "eotpcqbvhycdshwscqnytiwzbgonposs"

def generate_nonce(length=32):
    """生成随机nonce"""
    return ''.join(random.choices(string.ascii_letters + string.digits, k=length))

def get_timestamp():
    """获取当前时间戳"""
    return int(time.time())

def build_sign_str(appid, nonce, timestamp):
    """构建需要签名的字符串"""
    return f'appid={appid}&nonce={nonce}&timestamp={timestamp}'

def calculate_signature(secret, message):
    """计算HMAC SHA-256签名"""
    return base64.urlsafe_b64encode(HMAC(secret.encode('utf-8'), message.encode('utf-8'), sha256).digest()).decode('utf-8')

def fetch_indicator_list(classify_id, page_size=25, current_index=1):
    """从分类目录获取所有指标列表"""
    nonce = generate_nonce()
    timestamp = get_timestamp()
    sign_str = build_sign_str(APPID, nonce, timestamp)
    signature = calculate_signature(SECRET, sign_str)

    headers = {
        'nonce': nonce,
        'timestamp': str(timestamp),
        'appid': APPID,
        'signature': signature,
        'Accept': "*/*",
        'Accept-Encoding': "gzip, deflate, br",
        'User-Agent': "PostmanRuntime-ApipostRuntime/1.1.0",
        'Connection': "keep-alive",
    }

    url = f"https://etahub.hzinsights.com/v1/edb/list?ClassifyId={classify_id}&PageSize={page_size}&CurrentIndex={current_index}"
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        return response.json().get('Data')
    else:
        print(f"Failed to fetch indicator list, status code: {response.status_code}")
        return None

def fetch_indicator_details(edb_code):
    """根据指标 EdbCode 获取时间序列数据"""
    nonce = generate_nonce()
    timestamp = get_timestamp()
    sign_str = build_sign_str(APPID, nonce, timestamp)
    signature = calculate_signature(SECRET, sign_str)

    headers = {
        'nonce': nonce,
        'timestamp': str(timestamp),
        'appid': APPID,
        'signature': signature,
        'Accept': "*/*",
        'Accept-Encoding': "gzip, deflate, br",
        'User-Agent': "PostmanRuntime-ApipostRuntime/1.1.0",
        'Connection': "keep-alive",
    }

    url = f"https://etahub.hzinsights.com/v1/edb/data?EdbCode={edb_code}"
    response = requests.get(url, headers=headers)

    if response.status_code == 200:
        return response.json().get('Data')
    else:
        print(f"Failed to fetch data for EdbCode {edb_code}, status code: {response.status_code}")
        return None

def get_all_data_from_list(classify_id):
    """根据分类 ID 获取所有 EdbCode，并请求数据"""
    all_data_frames = {}
    page_size = 25
    current_index = 1

    # Step 1: 获取分类目录中的所有 EdbCode
    all_edb_codes = []
    while True:
        indicator_list = fetch_indicator_list(classify_id, page_size, current_index)
        if not indicator_list:
            break

        # 打印 indicator_list 结构以便调试
        print(f"Fetched data for page {current_index}: {indicator_list}")

        for indicator in indicator_list:
            # 检查是否为字典类型
            if not isinstance(indicator, dict):
                print(f"Unexpected data type: {type(indicator)}, skipping: {indicator}")
                continue

            edb_code = indicator.get("EdbCode")
            edb_name = indicator.get("EdbName")
            if edb_code:
                all_edb_codes.append((edb_code, edb_name))

        # 判断是否需要翻页
        if len(indicator_list) < page_size:
            break

        current_index += 1

    # Step 2: 根据 EdbCode 获取时间序列数据
    for edb_code, edb_name in all_edb_codes:
        data = fetch_indicator_details(edb_code)
        if data:
            # 转换为 DataFrame
            df = pd.DataFrame(data)
            df['DataTime'] = pd.to_datetime(df['DataTime'])
            df.set_index('DataTime', inplace=True)
            df.sort_index(inplace=True)
            # 仅保留 'Value' 列，并用指标名称重命名列
            df = df[['Value']].rename(columns={'Value': edb_name})
            all_data_frames[edb_code] = df

    # Step 3: 合并所有数据
    if all_data_frames:
        result_df = pd.concat(all_data_frames.values(), axis=1)
        output_file = f"classify_{classify_id}_all_data.xlsx"
        result_df.to_excel(output_file)
        print(f"分类 {classify_id} 的所有数据已保存到 {output_file}")
    else:
        print(f"未获取到分类 {classify_id} 的数据。")

if __name__ == "__main__":
    # 示例分类ID，修改为你需要的分类ID
    classify_id = 554  # 替换为你的分类ID
    get_all_data_from_list(classify_id)


Fetched data for page 1: {'List': [], 'Paging': {'IsStart': True, 'IsEnd': True, 'PreviousIndex': 0, 'NextIndex': 0, 'CurrentIndex': 1, 'Pages': 0, 'Totals': 0, 'PageSize': 25}}
Unexpected data type: <class 'str'>, skipping: List
Unexpected data type: <class 'str'>, skipping: Paging
未获取到分类 554 的数据。
