In [6]:
import requests
import pandas as pd
import math
from functools import lru_cache
import numpy as np
from datetime import datetime, timedelta
import akshare as ak

In [7]:
import time
import functools

def retry(
    max_retries=3,
    delay=1,
    backoff=2,
    exceptions=(Exception,),
    logger=None
):
    """
    通用重试装饰器

    :param max_retries: 最大重试次数
    :param delay: 初始重试间隔（秒）
    :param backoff: 每次重试间隔的倍数（指数退避）
    :param exceptions: 需要捕获的异常类型
    :param logger: 日志记录器（可选）
    """
    def decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            _delay = delay
            for attempt in range(1, max_retries + 1):
                try:
                    return func(*args, **kwargs)
                except exceptions as e:
                    if logger:
                        logger.warning(f"第{attempt}次调用{func.__name__}失败: {e}")
                    else:
                        print(f"第{attempt}次调用{func.__name__}失败: {e}")
                    if attempt == max_retries:
                        raise
                    time.sleep(_delay)
                    _delay *= backoff
        return wrapper
    return decorator

In [8]:
def is_a_stock(code):
    # 上证A股  # 深证A股
    return code.startswith(('600', '601', '603', '605', '000', '001', '002', '003', '300', '301'))

def is_not_st(name):
    return not name.startswith(('*ST', 'ST'))

def not_new_stock(date):
    today = datetime.today()
    one_year_ago = today - timedelta(days=365)
    return date < one_year_ago

# 过滤价格，如果没有基本上是退市了。
def is_open(price):
    return not np.isnan(price)

def is_open_with_line(price):
    return price != '-'

In [9]:
def stock_zh_a_spot_em() -> pd.DataFrame:
    """
    东方财富网-沪深京 A 股-实时行情
    https://quote.eastmoney.com/center/gridlist.html#hs_a_board
    :return: 实时行情
    :rtype: pandas.DataFrame
    """
    url = "http://82.push2.eastmoney.com/api/qt/clist/get"
    page_size = 50
    page_current = 1
    params = {
        "pn": page_current,
        "pz": page_size,
        "po": "1",
        "np": "1",
        "ut": "bd1d9ddb04089700cf9c27f6f7426281",
        "fltt": "2",
        "invt": "2",
        "fid": "f12",
        "fs": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
        "fields": "f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f14,f15,f16,f17,f18,f20,f21,f22,f23,f24,f25,f26,f37,f38,f39,f40,f41,f45,f46,f48,f49,f57,f61,f100,f112,f113,f114,f115,f221",
        "_": "1623833739532",
    }
    r = requests.get(url, params=params)
    data_json = r.json()
    data = data_json["data"]["diff"]
    if not data:
        return pd.DataFrame()

    data_count = data_json["data"]["total"]
    page_count = math.ceil(data_count/page_size)
    while page_count > 1:
        page_current = page_current + 1
        params["pn"] = page_current
        r = requests.get(url, params=params)
        data_json = r.json()
        _data = data_json["data"]["diff"]
        data.extend(_data)
        page_count =page_count - 1

    temp_df = pd.DataFrame(data)
    temp_df.columns = [
        "最新价",
        "涨跌幅",
        "涨跌额",
        "成交量",
        "成交额",
        "振幅",
        "换手率",
        "市盈率动",
        "量比",
        "5分钟涨跌",
        "代码",
        "名称",
        "最高",
        "最低",
        "今开",
        "昨收",
        "总市值",
        "流通市值",
        "涨速",
        "市净率",
        "60日涨跌幅",
        "年初至今涨跌幅",
        "上市时间",
        "加权净资产收益率",
        "总股本",
        "已流通股份",
        "营业收入",
        "营业收入同比增长",
        "归属净利润",
        "归属净利润同比增长",
        "每股未分配利润",
        "毛利率",
        "资产负债率",
        "每股公积金",
        "所处行业",
        "每股收益",
        "每股净资产",
        "市盈率静",
        "市盈率TTM",
        "报告期"
    ]
    temp_df = temp_df[
        [
            "代码",
            "名称",
            "最新价",
            "涨跌幅",
            "涨跌额",
            "成交量",
            "成交额",
            "振幅",
            "换手率",
            "量比",
            "今开",
            "最高",
            "最低",
            "昨收",
            "涨速",
            "5分钟涨跌",
            "60日涨跌幅",
            "年初至今涨跌幅",
            "市盈率动",
            "市盈率TTM",
            "市盈率静",
            "市净率",
            "每股收益",
            "每股净资产",
            "每股公积金",
            "每股未分配利润",
            "加权净资产收益率",
            "毛利率",
            "资产负债率",
            "营业收入",
            "营业收入同比增长",
            "归属净利润",
            "归属净利润同比增长",
            "报告期",
            "总股本",
            "已流通股份",
            "总市值",
            "流通市值",
            "所处行业",
            "上市时间"
        ]
    ]
    temp_df["最新价"] = pd.to_numeric(temp_df["最新价"], errors="coerce")
    temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"], errors="coerce")
    temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"], errors="coerce")
    temp_df["成交量"] = pd.to_numeric(temp_df["成交量"], errors="coerce")
    temp_df["成交额"] = pd.to_numeric(temp_df["成交额"], errors="coerce")
    temp_df["振幅"] = pd.to_numeric(temp_df["振幅"], errors="coerce")
    temp_df["量比"] = pd.to_numeric(temp_df["量比"], errors="coerce")
    temp_df["换手率"] = pd.to_numeric(temp_df["换手率"], errors="coerce")
    temp_df["最高"] = pd.to_numeric(temp_df["最高"], errors="coerce")
    temp_df["最低"] = pd.to_numeric(temp_df["最低"], errors="coerce")
    temp_df["今开"] = pd.to_numeric(temp_df["今开"], errors="coerce")
    temp_df["昨收"] = pd.to_numeric(temp_df["昨收"], errors="coerce")
    temp_df["涨速"] = pd.to_numeric(temp_df["涨速"], errors="coerce")
    temp_df["5分钟涨跌"] = pd.to_numeric(temp_df["5分钟涨跌"], errors="coerce")
    temp_df["60日涨跌幅"] = pd.to_numeric(temp_df["60日涨跌幅"], errors="coerce")
    temp_df["年初至今涨跌幅"] = pd.to_numeric(temp_df["年初至今涨跌幅"], errors="coerce")
    temp_df["市盈率动"] = pd.to_numeric(temp_df["市盈率动"], errors="coerce")
    temp_df["市盈率TTM"] = pd.to_numeric(temp_df["市盈率TTM"], errors="coerce")
    temp_df["市盈率静"] = pd.to_numeric(temp_df["市盈率静"], errors="coerce")
    temp_df["市净率"] = pd.to_numeric(temp_df["市净率"], errors="coerce")
    temp_df["每股收益"] = pd.to_numeric(temp_df["每股收益"], errors="coerce")
    temp_df["每股净资产"] = pd.to_numeric(temp_df["每股净资产"], errors="coerce")
    temp_df["每股公积金"] = pd.to_numeric(temp_df["每股公积金"], errors="coerce")
    temp_df["每股未分配利润"] = pd.to_numeric(temp_df["每股未分配利润"], errors="coerce")
    temp_df["加权净资产收益率"] = pd.to_numeric(temp_df["加权净资产收益率"], errors="coerce")
    temp_df["毛利率"] = pd.to_numeric(temp_df["毛利率"], errors="coerce")
    temp_df["资产负债率"] = pd.to_numeric(temp_df["资产负债率"], errors="coerce")
    temp_df["营业收入"] = pd.to_numeric(temp_df["营业收入"], errors="coerce")
    temp_df["营业收入同比增长"] = pd.to_numeric(temp_df["营业收入同比增长"], errors="coerce")
    temp_df["归属净利润"] = pd.to_numeric(temp_df["归属净利润"], errors="coerce")
    temp_df["归属净利润同比增长"] = pd.to_numeric(temp_df["归属净利润同比增长"], errors="coerce")
    temp_df["报告期"] = pd.to_datetime(temp_df["报告期"], format='%Y%m%d', errors="coerce")
    temp_df["总股本"] = pd.to_numeric(temp_df["总股本"], errors="coerce")
    temp_df["已流通股份"] = pd.to_numeric(temp_df["已流通股份"], errors="coerce")
    temp_df["总市值"] = pd.to_numeric(temp_df["总市值"], errors="coerce")
    temp_df["流通市值"] = pd.to_numeric(temp_df["流通市值"], errors="coerce")
    temp_df["上市时间"] = pd.to_datetime(temp_df["上市时间"], format='%Y%m%d', errors="coerce")

    return temp_df

In [34]:
stock_zh_a_hist_min_em(start_date = '2025-07-25', end_date = '2025-07-25',symbol = '300077',period = "1")

Unnamed: 0,时间,开盘,收盘,最高,最低,成交量,成交额,最新价
0,2025-07-25 09:30:00,0.0,25.54,25.54,25.54,788,2012552.0,25.540
1,2025-07-25 09:31:00,0.0,25.51,25.57,25.42,7063,18019989.0,25.516
2,2025-07-25 09:32:00,0.0,25.58,25.62,25.50,3977,10172631.0,25.537
3,2025-07-25 09:33:00,0.0,25.88,25.88,25.56,6687,17205976.0,25.607
4,2025-07-25 09:34:00,0.0,25.78,25.90,25.71,6977,17999915.0,25.660
...,...,...,...,...,...,...,...,...
236,2025-07-25 14:56:00,0.0,25.79,25.79,25.78,5012,12924896.0,25.607
237,2025-07-25 14:57:00,0.0,25.79,25.80,25.79,6875,17731473.0,25.610
238,2025-07-25 14:58:00,0.0,25.79,25.79,25.79,210,541604.0,25.610
239,2025-07-25 14:59:00,0.0,25.79,25.79,25.79,0,0.0,25.610


In [None]:
import time


def stock_zh_a_hist_min_em(
    symbol: str = "000001",
    start_date: str = "1979-09-01 09:32:00",
    end_date: str = "2222-01-01 09:32:00",
    period: str = "5",
    adjust: str = "",
) -> pd.DataFrame:
    """
    东方财富网-行情首页-沪深京 A 股-每日分时行情
    https://quote.eastmoney.com/concept/sh603777.html?from=classic
    :param symbol: 股票代码
    :type symbol: str
    :param start_date: 开始日期
    :type start_date: str
    :param end_date: 结束
    
    //日期
    :type end_date: str
    :param period: choice of {'1', '5', '15', '30', '60'}
    :type period: str
    :param adjust: choice of {'', 'qfq', 'hfq'}
    :type adjust: str
    :return: 每日分时行情
    :rtype: pandas.DataFrame
    """
    code_id_dict = code_id_map_em()
    adjust_map = {
        "": "0",
        "qfq": "1",
        "hfq": "2",
    }
    if period == "1":
        url = "https://push2his.eastmoney.com/api/qt/stock/trends2/get"
        params = {
            "fields1": "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f13",
            "fields2": "f51,f52,f53,f54,f55,f56,f57,f58",
            "ut": "7eea3edcaed734bea9cbfc24409ed989",
            "ndays": "5",
            "iscr": "0",
            "secid": f"{code_id_dict[symbol]}.{symbol}",
            "_": "1623766962675",
        }
        r = requests.get(url, params=params)
        data_json = r.json()
        temp_df = pd.DataFrame(
            [item.split(",") for item in data_json["data"]["trends"]]
        )
        temp_df.columns = [
            "时间",
            "开盘",
            "收盘",
            "最高",
            "最低",
            "成交量",
            "成交额",
            "最新价",
        ]
        temp_df.index = pd.to_datetime(temp_df["时间"])
        temp_df = temp_df[start_date:end_date]
        temp_df.reset_index(drop=True, inplace=True)
        temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
        temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
        temp_df["最高"] = pd.to_numeric(temp_df["最高"])
        temp_df["最低"] = pd.to_numeric(temp_df["最低"])
        temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
        temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
        temp_df["最新价"] = pd.to_numeric(temp_df["最新价"])
        temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str)
        return temp_df
    else:
        url = "http://push2his.eastmoney.com/api/qt/stock/kline/get"
        params = {
            "fields1": "f1,f2,f3,f4,f5,f6",
            "fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61",
            "ut": "7eea3edcaed734bea9cbfc24409ed989",
            "klt": period,
            "fqt": adjust_map[adjust],
            "secid": f"{code_id_dict[symbol]}.{symbol}",
            "beg": "0",
            "end": "20500000",
            "_": "1630930917857",
        }
        r = requests.get(url, params=params)
        data_json = r.json()
        temp_df = pd.DataFrame(
            [item.split(",") for item in data_json["data"]["klines"]]
        )
        temp_df.columns = [
            "时间",
            "开盘",
            "收盘",
            "最高",
            "最低",
            "成交量",
            "成交额",
            "振幅",
            "涨跌幅",
            "涨跌额",
            "换手率",
        ]
        temp_df.index = pd.to_datetime(temp_df["时间"])
        temp_df = temp_df[start_date:end_date]
        temp_df.reset_index(drop=True, inplace=True)
        temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
        temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
        temp_df["最高"] = pd.to_numeric(temp_df["最高"])
        temp_df["最低"] = pd.to_numeric(temp_df["最低"])
        temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
        temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
        temp_df["振幅"] = pd.to_numeric(temp_df["振幅"])
        temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"])
        temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"])
        temp_df["换手率"] = pd.to_numeric(temp_df["换手率"])
        temp_df["时间"] = pd.to_datetime(temp_df["时间"]).astype(str)
        temp_df = temp_df[
            [
                "时间",
                "开盘",
                "收盘",
                "最高",
                "最低",
                "涨跌幅",
                "涨跌额",
                "成交量",
                "成交额",
                "振幅",
                "换手率",
            ]
        ]
        return temp_df

@retry(max_retries=3, delay=5)    
def stock_zh_a_hist(
    symbol: str = "000001",
    period: str = "daily",
    start_date: str = "19700101",
    end_date: str = "20500101",
    adjust: str = "",
) -> pd.DataFrame:
    """
    东方财富网-行情首页-沪深京 A 股-每日行情
    https://quote.eastmoney.com/concept/sh603777.html?from=classic
    :param symbol: 股票代码
    :type symbol: str
    :param period: choice of {'daily', 'weekly', 'monthly'}
    :type period: str
    :param start_date: 开始日期
    :type start_date: str
    :param end_date: 结束日期
    :type end_date: str
    :param adjust: choice of {"qfq": "前复权", "hfq": "后复权", "": "不复权"}
    :type adjust: str
    :return: 每日行情
    :rtype: pandas.DataFrame
    """
    code_id_dict = code_id_map_em()
    adjust_dict = {"qfq": "1", "hfq": "2", "": "0"}
    period_dict = {"daily": "101", "weekly": "102", "monthly": "103"}
    url = "http://push2his.eastmoney.com/api/qt/stock/kline/get"
    user_agents = [
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0'
    ]
    params = {
        "fields1": "f1,f2,f3,f4,f5,f6",
        "fields2": "f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f116",
        "ut": "7eea3edcaed734bea9cbfc24409ed989",
        "klt": period_dict[period],
        "fqt": adjust_dict[adjust],
        "secid": f"{code_id_dict[symbol]}.{symbol}",
        "beg": start_date,
        "end": end_date,
        # "_": int(time.time() * 1000),
        # "_": 1623766962675,
    }
    for i, ua in enumerate(user_agents, 1):
        headers = {
            'User-Agent': ua,
            'Accept': 'application/json, text/plain, */*',
            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'Accept-Encoding': 'gzip, deflate',
            'Connection': 'keep-alive',
            'Referer': 'http://quote.eastmoney.com/',
            'Host': 'push2his.eastmoney.com',
            'Cache-Control': 'no-cache',
            'Pragma': 'no-cache',
            'Sec-Fetch-Dest': 'empty',
            'Sec-Fetch-Mode': 'cors',
            'Sec-Fetch-Site': 'same-site'
        }
        # 添加延迟
        time.sleep(random.uniform(1, 2))
        r = requests.get(url, params=params, headers=headers)
        if r.status_code != 200:
            r.raise_for_status()
        data_json = r.json()
        # print(data_json)
        if not (data_json["data"] and data_json["data"]["klines"]):
            return pd.DataFrame()
        temp_df = pd.DataFrame(
            [item.split(",") for item in data_json["data"]["klines"]]
        )
        temp_df.columns = [
            "日期",
            "开盘",
            "收盘",
            "最高",
            "最低",
            "成交量",
            "成交额",
            "振幅",
            "涨跌幅",
            "涨跌额",
            "换手率",
        ]
        temp_df.index = pd.to_datetime(temp_df["日期"])
        temp_df.reset_index(inplace=True, drop=True)

        temp_df["开盘"] = pd.to_numeric(temp_df["开盘"])
        temp_df["收盘"] = pd.to_numeric(temp_df["收盘"])
        temp_df["最高"] = pd.to_numeric(temp_df["最高"])
        temp_df["最低"] = pd.to_numeric(temp_df["最低"])
        temp_df["成交量"] = pd.to_numeric(temp_df["成交量"])
        temp_df["成交额"] = pd.to_numeric(temp_df["成交额"])
        temp_df["振幅"] = pd.to_numeric(temp_df["振幅"])
        temp_df["涨跌幅"] = pd.to_numeric(temp_df["涨跌幅"])
        temp_df["涨跌额"] = pd.to_numeric(temp_df["涨跌额"])
        temp_df["换手率"] = pd.to_numeric(temp_df["换手率"])

        return temp_df

@lru_cache()
def code_id_map_em() -> dict:
    """
    东方财富-股票和市场代码
    http://quote.eastmoney.com/center/gridlist.html#hs_a_board
    :return: 股票和市场代码
    :rtype: dict
    """
    url = "http://80.push2.eastmoney.com/api/qt/clist/get"
    page_size = 50
    page_current = 1
    params = {
        "pn": page_current,
        "pz": page_size,
        "po": "1",
        "np": "1",
        "ut": "bd1d9ddb04089700cf9c27f6f7426281",
        "fltt": "2",
        "invt": "2",
        "fid": "f12",
        "fs": "m:1 t:2,m:1 t:23",
        "fields": "f12",
        "_": "1623833739532",
    }
    r = requests.get(url, params=params)
    data_json = r.json()
    data = data_json["data"]["diff"]
    if not data:
        return dict()

    data_count = data_json["data"]["total"]
    page_count = math.ceil(data_count/page_size)
    while page_count > 1:
        page_current = page_current + 1
        params["pn"] = page_current
        r = requests.get(url, params=params)
        data_json = r.json()
        _data = data_json["data"]["diff"]
        data.extend(_data)
        page_count =page_count - 1

    temp_df = pd.DataFrame(data)
    temp_df["market_id"] = 1
    temp_df.columns = ["sh_code", "sh_id"]
    code_id_dict = dict(zip(temp_df["sh_code"], temp_df["sh_id"]))
    page_current = 1
    params = {
        "pn": page_current,
        "pz": page_size,
        "po": "1",
        "np": "1",
        "ut": "bd1d9ddb04089700cf9c27f6f7426281",
        "fltt": "2",
        "invt": "2",
        "fid": "f12",
        "fs": "m:0 t:6,m:0 t:80",
        "fields": "f12",
        "_": "1623833739532",
    }
    r = requests.get(url, params=params)
    data_json = r.json()
    data = data_json["data"]["diff"]
    if not data:
        return dict()

    data_count = data_json["data"]["total"]
    page_count = math.ceil(data_count/page_size)
    while page_count > 1:
        page_current = page_current + 1
        params["pn"] = page_current
        r = requests.get(url, params=params)
        data_json = r.json()
        _data = data_json["data"]["diff"]
        data.extend(_data)
        page_count =page_count - 1

    temp_df_sz = pd.DataFrame(data)
    temp_df_sz["sz_id"] = 0
    code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["sz_id"])))
    page_current = 1
    params = {
        "pn": page_current,
        "pz": page_size,
        "po": "1",
        "np": "1",
        "ut": "bd1d9ddb04089700cf9c27f6f7426281",
        "fltt": "2",
        "invt": "2",
        "fid": "f12",
        "fs": "m:0 t:81 s:2048",
        "fields": "f12",
        "_": "1623833739532",
    }
    r = requests.get(url, params=params)
    data_json = r.json()
    data = data_json["data"]["diff"]
    if not data:
        return dict()

    data_count = data_json["data"]["total"]
    page_count = math.ceil(data_count/page_size)
    while page_count > 1:
        page_current = page_current + 1
        params["pn"] = page_current
        r = requests.get(url, params=params)
        data_json = r.json()
        _data = data_json["data"]["diff"]
        data.extend(_data)
        page_count =page_count - 1

    temp_df_sz = pd.DataFrame(data)
    temp_df_sz["bj_id"] = 0
    code_id_dict.update(dict(zip(temp_df_sz["f12"], temp_df_sz["bj_id"])))
    return code_id_dict

In [13]:
import requests

url = 'https://push2his.eastmoney.com/api/qt/stock/kline/get'
params = {
    'secid': '0.300077',  # 深市平安银行
    'fields1': 'f1,f2,f3,f4,f5,f6',
    'fields2': 'f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61',
    'klt': '101',         # 日K
    'fqt': '1',           # 前复权
    'beg': '20250101',
    'end': '20250201'
}
resp = requests.get(url, params=params)
print(resp.json())

{'rc': 0, 'rt': 17, 'svr': 177617932, 'lt': 1, 'full': 0, 'dlmkts': '', 'data': {'code': '300077', 'market': 0, 'name': '国民技术', 'decimal': 2, 'dktotal': 3617, 'preKPrice': 23.84, 'klines': ['2025-01-02,23.83,22.88,23.87,22.56,398870,924447820.00,5.49,-4.03,-0.96,7.03', '2025-01-03,23.06,21.42,23.18,21.28,456340,1002487259.00,8.30,-6.38,-1.46,8.05', '2025-01-06,21.22,21.04,21.65,20.88,334222,707556368.00,3.59,-1.77,-0.38,5.89', '2025-01-07,21.32,22.12,22.28,21.32,508315,1115568440.00,4.56,5.13,1.08,8.96', '2025-01-08,21.85,22.05,22.36,21.12,448145,977808441.00,5.61,-0.32,-0.07,7.90', '2025-01-09,21.85,22.17,22.58,21.85,359301,800664997.00,3.31,0.54,0.12,6.34', '2025-01-10,22.05,21.30,22.71,21.28,406561,899361670.00,6.45,-3.92,-0.87,7.17', '2025-01-13,20.85,21.31,21.47,20.47,295395,620550661.00,4.69,0.05,0.01,5.21', '2025-01-14,21.45,23.73,24.03,21.23,779416,1781728976.00,13.14,11.36,2.42,13.75', '2025-01-15,23.75,23.40,23.91,23.21,506363,1188536378.00,2.95,-1.39,-0.33,8.93', '2025-01-16

In [None]:
import random


def test_different_user_agents():
    """测试不同的User-Agent"""
    
    user_agents = [
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0'
    ]
    
    url = "http://push2his.eastmoney.com/api/qt/stock/kline/get"
    params = {
        'secid': '0.000001',
        'fields1': 'f1,f2,f3,f4,f5,f6',
        'fields2': 'f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61',
        'klt': '101',
        'fqt': '1',
        'beg': '20240101',
        'end': '20240131'
    }
    
    for i, ua in enumerate(user_agents, 1):
        print(f"\n测试User-Agent {i}: {ua[:50]}...")
        
        headers = {
            'User-Agent': ua,
            'Accept': 'application/json, text/plain, */*',
            'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8',
            'Accept-Encoding': 'gzip, deflate',
            'Connection': 'keep-alive',
            'Referer': 'http://quote.eastmoney.com/',
            'Host': 'push2his.eastmoney.com'
        }
        
        try:
            response = requests.get(url, params=params, headers=headers, timeout=10)
            print(f"  状态码: {response.status_code}")
            
            if response.status_code == 200:
                data = response.json()
                if data.get('data') and data['data'].get('klines'):
                    print(f"  成功获取数据")
                    return True
                else:
                    print(f"  数据格式错误")
            else:
                print(f"  请求失败")
                
            # 添加延迟
            time.sleep(random.uniform(1, 2))
            
        except Exception as e:
            print(f"  请求异常: {e}")
    
    return False

In [None]:
test_different_user_agents()


测试User-Agent 1: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWeb...
  状态码: 200
  成功获取数据


True

In [None]:
import requests
import time
import ssl
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
from urllib3.poolmanager import PoolManager

class SSLAdapter(HTTPAdapter):
    """自定义SSL适配器，模拟浏览器SSL握手行为"""
    def init_poolmanager(self, connections, maxsize, block=False):
        ctx = ssl.create_default_context()
        # 使用与现代浏览器匹配的密码套件
        ctx.set_ciphers('ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384')
        ctx.check_hostname = True
        ctx.verify_mode = ssl.CERT_REQUIRED
        self.poolmanager = PoolManager(
            num_pools=connections,
            maxsize=maxsize,
            block=block,
            ssl_context=ctx
        )

# 创建浏览器风格的持久会话
session = requests.Session()

# 配置重试策略，处理临时网络错误
retry_strategy = Retry(
    total=3,
    backoff_factor=1,
    status_forcelist=[429, 500, 502, 503, 504],
    method_whitelist=["GET", "POST"]
)

# 安装SSL适配器和重试机制
session.mount("https://", SSLAdapter(max_retries=retry_strategy))

url = 'https://push2his.eastmoney.com/api/qt/stock/kline/get'
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
    "Accept": "application/json, text/javascript, */*; q=0.01",
    "Referer": "https://quote.eastmoney.com/",
    "Connection": "keep-alive",
    "Accept-Language": "zh-CN,zh;q=0.9",
    "Cookie": "st_si=1234567890123456; st_asi=delete; qgqp_b_id=abcdef123456; em_hq_fls=js",
    "DNT": "1",
    "Cache-Control": "no-cache",
    "Sec-Fetch-Dest": "empty",
    "Sec-Fetch-Mode": "cors",
    "Sec-Fetch-Site": "same-site"
}
params = {
    'secid': '0.300077',  # 深市平安银行
    "ut": "7eea3edcaed734bea9cbfc24409ed989",
    'fields1': 'f1,f2,f3,f4,f5,f6',
    'fields2': 'f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61',
    'klt': '101',         # 日K
    'fqt': '1',           # 前复权
    'beg': '20230101',
    'end': '20230131',
    '_': int(time.time() * 1000),
}

session.headers.update(headers)
resp = session.get(url, params=params)
print(resp.json())

TypeError: Retry.__init__() got an unexpected keyword argument 'method_whitelist'

In [None]:
test_df = stock_zh_a_hist(symbol = '300077', period = 'daily',start_date = '20250301', end_date = '20250602')
print(test_df)

            日期     开盘     收盘     最高     最低     成交量           成交额     振幅   涨跌幅  \
0   2025-03-03  26.66  26.16  26.88  25.60  484130  1.272813e+09   4.87 -0.49   
1   2025-03-04  25.90  28.44  29.00  25.78  968419  2.677107e+09  12.31  8.72   
2   2025-03-05  28.20  27.59  28.51  27.25  681409  1.891739e+09   4.43 -2.99   
3   2025-03-06  27.81  27.79  28.28  27.50  610473  1.702471e+09   2.83  0.72   
4   2025-03-07  27.52  27.05  27.96  26.79  493624  1.349951e+09   4.21 -2.66   
..         ...    ...    ...    ...    ...     ...           ...    ...   ...   
56  2025-05-26  23.10  23.43  23.69  23.10  165498  3.864330e+08   2.56  1.65   
57  2025-05-27  23.39  23.21  23.39  23.06  132001  3.059440e+08   1.41 -0.94   
58  2025-05-28  23.35  23.57  24.47  23.05  301492  7.152333e+08   6.12  1.55   
59  2025-05-29  23.57  24.71  25.32  23.57  553061  1.362684e+09   7.42  4.84   
60  2025-05-30  24.26  23.78  24.29  23.71  305113  7.294103e+08   2.35 -3.76   

     涨跌额    换手率  
0  -0.13 

In [14]:
all_stocks = stock_zh_a_spot_em()
all_stocks = all_stocks.loc[(all_stocks['代码'].apply(is_a_stock)) & (all_stocks['名称'].apply(is_not_st)) & (all_stocks['最新价'].apply(is_open)) & (all_stocks['上市时间'].apply(not_new_stock))]
all_stocks

Unnamed: 0,代码,名称,最新价,涨跌幅,涨跌额,成交量,成交额,振幅,换手率,量比,...,营业收入同比增长,归属净利润,归属净利润同比增长,报告期,总股本,已流通股份,总市值,流通市值,所处行业,上市时间
848,605599,菜百股份,15.42,-0.90,-0.14,57600.0,8.893348e+07,2.06,0.74,1.13,...,30.178498,3.196672e+08,17.324494,2025-03-31,7.777778e+08,7.777778e+08,1.199333e+10,1.199333e+10,珠宝首饰,2021-09-09
849,605598,上海港湾,24.38,-5.72,-1.48,110868.0,2.713944e+08,3.75,4.56,1.04,...,29.248448,3.569670e+07,18.588085,2025-03-31,2.445848e+08,2.430168e+08,5.962978e+09,5.924750e+09,工程建设,2021-09-17
850,605589,圣泉集团,30.81,0.42,0.13,127146.0,3.916458e+08,2.35,1.63,0.63,...,15.140380,2.067124e+08,50.462526,2025-03-31,8.464020e+08,7.808933e+08,2.607765e+10,2.405932e+10,化学制品,2021-08-10
851,605588,冠石科技,46.42,2.56,1.16,22073.0,1.018334e+08,4.15,3.02,1.31,...,22.320285,-9.364254e+06,-189.549212,2025-03-31,7.346820e+07,7.309956e+07,3.410394e+09,3.393282e+09,半导体,2021-08-12
852,605580,恒盛能源,15.49,1.84,0.28,81229.0,1.258962e+08,3.29,2.90,1.29,...,21.668091,4.430988e+07,77.428781,2025-03-31,2.800000e+08,2.800000e+08,4.337200e+09,4.337200e+09,电力行业,2021-08-19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5726,000008,神州高铁,2.90,0.69,0.02,528855.0,1.528273e+08,1.39,1.95,0.78,...,23.620107,-4.262589e+07,12.325628,2025-03-31,2.716378e+09,2.716234e+09,7.877495e+09,7.877078e+09,交运设备,1992-05-07
5727,000007,全新好,7.44,5.38,0.38,145650.0,1.065443e+08,6.37,4.20,2.75,...,120.411126,1.772379e+06,176.587917,2025-03-31,3.464480e+08,3.464480e+08,2.577573e+09,2.577573e+09,房地产服务,1992-04-13
5728,000006,深振业Ａ,6.27,0.97,0.06,284058.0,1.772163e+08,1.93,2.10,1.64,...,540.278331,-2.593602e+07,2.487580,2025-03-31,1.349995e+09,1.349987e+09,8.464469e+09,8.464421e+09,房地产开发,1992-04-27
5732,000002,万 科Ａ,6.81,1.34,0.09,1842330.0,1.250384e+09,2.53,1.90,1.76,...,-38.314514,-6.246209e+09,-1625.626843,2025-03-31,1.193071e+10,9.716813e+09,8.124813e+10,6.617150e+10,房地产开发,1991-01-29


In [15]:
from sklearn.preprocessing import LabelEncoder
industry_encoder = LabelEncoder()

In [16]:
code_name_map = all_stocks[['代码','名称','所处行业']].set_index('代码')[['名称', '所处行业']].apply(tuple, axis=1).to_dict()


In [17]:
len(code_name_map)

4309

In [26]:
import tqdm
import random


daily_stock_df = pd.DataFrame()
cnt = 0
for code in tqdm.tqdm(list(code_name_map.keys())[4000:5000]):
    # if cnt % 100 == 0 and cnt : print(cnt)
    #if cnt >= 10:break
    # print(code)
    try:
        tmp_df = ak.stock_zh_a_hist(symbol = code, period = 'daily',start_date = '20230701', end_date = '20250718')
        tmp_df.insert(0,'代码',code)
        if len(daily_stock_df.columns) == 0:
            daily_stock_df = pd.DataFrame(columns = tmp_df.columns)
        daily_stock_df = pd.concat([daily_stock_df, tmp_df], axis = 0,ignore_index=True)
        cnt += 1   
    except Exception as ex:
        print(ex)
    time.sleep(6)
    

  daily_stock_df = pd.concat([daily_stock_df, tmp_df], axis = 0,ignore_index=True)


100%|██████████| 309/309 [37:01<00:00,  7.19s/it] 


In [27]:
print(daily_stock_df)
daily_stock_df.to_csv('daily_stock_df_4000_5000.csv', index=None)

            代码          日期    股票代码     开盘     收盘     最高     最低      成交量  \
0       000878  2023-07-03  000878  11.08  11.26  11.38  11.07   274371   
1       000878  2023-07-04  000878  11.40  11.29  11.44  11.24   212409   
2       000878  2023-07-05  000878  11.25  11.40  11.45  11.20   256914   
3       000878  2023-07-06  000878  11.41  11.34  11.42  11.25   189938   
4       000878  2023-07-07  000878  11.27  11.34  11.44  11.16   174805   
...        ...         ...     ...    ...    ...    ...    ...      ...   
153299  000001  2025-07-14  000001  12.87  12.98  13.17  12.86  1872896   
153300  000001  2025-07-15  000001  12.99  12.73  13.04  12.71  1810531   
153301  000001  2025-07-16  000001  12.73  12.64  12.76  12.50  1927864   
153302  000001  2025-07-17  000001  12.61  12.59  12.70  12.54   944641   
153303  000001  2025-07-18  000001  12.62  12.70  12.75  12.60  1241206   

                 成交额    振幅   涨跌幅   涨跌额   换手率  
0       3.089837e+08  2.81  1.90  0.21  1.37  
1    

In [None]:
with open('code_ind_map.pkl', 'rb') as f:
    code_ind_map = pickle.load(f)

In [None]:
daily_stock_df['行业代码'] = daily_stock_df['代码'].map(code_ind_map)

In [None]:
daily_stock_df

Unnamed: 0,代码,日期,开盘,收盘,最高,最低,成交量,成交额,振幅,涨跌幅,涨跌额,换手率,行业代码
0,605599,2023-03-01,9.81,9.98,10.00,9.81,28890,2.869968e+07,1.93,1.22,0.12,0.49,49
1,605599,2023-03-02,9.97,9.94,10.00,9.90,16537,1.643643e+07,1.00,-0.40,-0.04,0.28,49
2,605599,2023-03-03,9.92,9.96,9.97,9.88,22056,2.190787e+07,0.91,0.20,0.02,0.38,49
3,605599,2023-03-06,9.99,10.04,10.04,9.93,34728,3.473102e+07,1.10,0.80,0.08,0.59,49
4,605599,2023-03-07,10.03,9.99,10.13,9.97,39024,3.919996e+07,1.59,-0.50,-0.05,0.67,49
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2141974,000001,2025-03-12,11.60,11.85,11.87,11.56,1877318,2.210904e+09,2.67,2.07,0.24,0.97,82
2141975,000001,2025-03-13,11.81,11.84,11.91,11.78,1312371,1.555123e+09,1.10,-0.08,-0.01,0.68,82
2141976,000001,2025-03-14,11.82,11.97,12.00,11.82,1722418,2.057970e+09,1.52,1.10,0.13,0.89,82
2141977,000001,2025-03-17,11.63,11.50,11.67,11.46,4603612,5.315523e+09,1.75,-3.93,-0.47,2.37,82


In [None]:
daily_stock_df.to_csv('csv_data/stock_2_years_0318.csv')

In [None]:
daily_stock_df['代码'] = daily_stock_df['代码'].astype(str)
daily_stock_df

Unnamed: 0,代码,日期,开盘,收盘,最高,最低,成交量,成交额,振幅,涨跌幅,涨跌额,换手率
0,605599,2024-12-02,11.48,11.61,11.66,11.46,85644,9.906744e+07,1.74,0.96,0.11,1.10
1,605599,2024-12-03,11.61,11.43,11.71,11.37,60301,6.942522e+07,2.93,-1.55,-0.18,0.78
2,605599,2024-12-04,11.41,11.25,11.47,11.19,51459,5.821320e+07,2.45,-1.57,-0.18,0.66
3,605599,2024-12-05,11.17,11.26,11.31,11.12,45908,5.149200e+07,1.69,0.09,0.01,0.59
4,605599,2024-12-06,11.26,11.36,11.36,11.17,41491,4.681981e+07,1.69,0.89,0.10,0.53
...,...,...,...,...,...,...,...,...,...,...,...,...
295793,000001,2025-03-10,11.66,11.59,11.67,11.55,663834,7.692614e+08,1.03,-0.69,-0.08,0.34
295794,000001,2025-03-11,11.54,11.61,11.61,11.52,608975,7.042405e+08,0.78,0.17,0.02,0.31
295795,000001,2025-03-12,11.60,11.85,11.87,11.56,1877318,2.210904e+09,2.67,2.07,0.24,0.97
295796,000001,2025-03-13,11.81,11.84,11.91,11.78,1312371,1.555123e+09,1.10,-0.08,-0.01,0.68


In [None]:
daily_stock_df.to_csv('csv_data/track_stock.csv',index=False)

In [None]:
import pickle
with open('code_name_map_ind.pkl', 'wb') as f:
    pickle.dump(code_name_map, f)

In [None]:
code_name_map

{'605599': '菜百股份',
 '605598': '上海港湾',
 '605589': '圣泉集团',
 '605588': '冠石科技',
 '605580': '恒盛能源',
 '605577': '龙版传媒',
 '605567': '春雪食品',
 '605566': '福莱蒽特',
 '605555': '德昌股份',
 '605507': '国邦医药',
 '605500': '森林包装',
 '605499': '东鹏饮料',
 '605488': '福莱新材',
 '605399': '晨光新材',
 '605398': '新炬网络',
 '605389': '长龄液压',
 '605388': '均瑶健康',
 '605378': '野马电池',
 '605377': '华旺科技',
 '605376': '博迁新材',
 '605369': '拱东医疗',
 '605368': '蓝天燃气',
 '605366': '宏柏新材',
 '605365': '立达信',
 '605358': '立昂微',
 '605339': '南侨食品',
 '605338': '巴比食品',
 '605337': '李子园',
 '605336': '帅丰电器',
 '605333': '沪光股份',
 '605319': '无锡振华',
 '605318': '法狮龙',
 '605305': '中际联合',
 '605303': '园林股份',
 '605300': '佳禾食品',
 '605299': '舒华体育',
 '605298': '必得科技',
 '605296': '神农集团',
 '605289': '罗曼股份',
 '605288': '凯迪股份',
 '605287': '德才股份',
 '605286': '同力日升',
 '605277': '新亚电子',
 '605268': '王力安防',
 '605266': '健之佳',
 '605259': '绿田机械',
 '605258': '协和电子',
 '605255': '天普股份',
 '605228': '神通科技',
 '605222': '起帆电缆',
 '605218': '伟时电子',
 '605208': '永茂泰',
 '605199': '葫芦娃',


In [None]:
import akshare as ak
#使用新浪的数据集
etf = ak.fund_etf_category_sina(symbol="ETF基金")
etf

Unnamed: 0,代码,名称,最新价,涨跌额,涨跌幅,买入,卖出,昨收,今开,最高,最低,成交量,成交额
0,sz159998,计算机ETF,0.918,-0.006,-0.649,0.918,0.919,0.924,0.925,0.925,0.913,98051300,89932149
1,sz159997,电子ETF,1.173,-0.002,-0.170,1.173,1.174,1.175,1.173,1.179,1.167,15212900,17844485
2,sz159996,家电ETF,1.394,0.007,0.505,1.394,1.395,1.387,1.382,1.394,1.381,45963461,63749606
3,sz159995,芯片ETF,1.225,0.003,0.245,1.224,1.225,1.222,1.222,1.232,1.217,530147900,648785781
4,sz159994,5GETF,1.085,-0.004,-0.367,1.084,1.085,1.089,1.084,1.110,1.077,43945860,48046632
...,...,...,...,...,...,...,...,...,...,...,...,...,...
932,sh510060,央企ETF,2.664,0.014,0.528,2.661,2.668,2.650,2.650,2.673,2.640,1234500,3277613
933,sh510050,上证50ETF,2.914,0.024,0.830,2.913,2.914,2.890,2.892,2.916,2.879,658976345,1911014128
934,sh510030,价值ETF,1.108,0.001,0.090,1.105,1.108,1.107,1.107,1.109,1.094,5342800,5899242
935,sh510020,超大盘ETF,3.396,0.032,0.951,3.386,3.396,3.364,3.349,3.399,3.349,363602,1229117


In [None]:
import tushare as ts


pro = ts.pro_api('2d884a7e7c0468f3af578b61146ddb764c2e12a0ccfaf8fbb6d63528')

#查询当前所有正常上市交易的股票列表

data = pro.stock_basic(exchange='', list_status='L', fields='ts_code,symbol,name,area,industry,list_date')

In [None]:
data

Unnamed: 0,ts_code,symbol,name,area,industry,list_date
0,000001.SZ,000001,平安银行,深圳,银行,19910403
1,000002.SZ,000002,万科A,深圳,全国地产,19910129
2,000004.SZ,000004,*ST国华,深圳,软件服务,19910114
3,000006.SZ,000006,深振业A,深圳,区域地产,19920427
4,000007.SZ,000007,全新好,深圳,其他商业,19920413
...,...,...,...,...,...,...
5413,920489.BJ,920489,佳先股份,安徽,化工原料,20200727
5414,920682.BJ,920682,球冠电缆,浙江,电气设备,20200727
5415,920799.BJ,920799,艾融软件,上海,软件服务,20200727
5416,920819.BJ,920819,颖泰生物,北京,农药化肥,20200727
