diff --git a/examples/data_runner/kdata_runner.py b/examples/data_runner/kdata_runner.py index fef6253a..6198bde2 100644 --- a/examples/data_runner/kdata_runner.py +++ b/examples/data_runner/kdata_runner.py @@ -19,6 +19,7 @@ Index, Index1dKdata, StockNews, + StockEvents, LimitUpInfo, BlockStock, ) @@ -46,6 +47,21 @@ def record_stock_news(data_provider="em"): ) +def record_stock_events(data_provider="em"): + normal_stock_ids = get_entity_ids_by_filter( + provider="em", ignore_delist=True, ignore_st=False, ignore_new_stock=False + ) + + run_data_recorder( + entity_ids=normal_stock_ids, + day_data=True, + domain=StockEvents, + data_provider=data_provider, + force_update=False, + sleeping_time=0.01, + ) + + def report_limit_up(): latest_data = LimitUpInfo.query_data(order=LimitUpInfo.timestamp.desc(), limit=1, return_type="domain") timestamp = latest_data[0].timestamp diff --git a/examples/reports/report_tops.py b/examples/reports/report_tops.py index 52e58a42..015c0981 100644 --- a/examples/reports/report_tops.py +++ b/examples/reports/report_tops.py @@ -3,10 +3,12 @@ from apscheduler.schedulers.background import BackgroundScheduler -from examples.report_utils import report_top_entities +from examples.report_utils import report_top_entities, inform from zvt import init_log -from zvt.api import TopType +from zvt.contract import AdjustType +from zvt.api import TopType, get_latest_kdata_date from zvt.domain import Block, BlockCategory +from zvt.factors.top_stocks import compute_top_stocks, get_top_stocks from zvt.informer import EmailInformer logger = logging.getLogger(__name__) @@ -18,46 +20,35 @@ @sched.scheduled_job("cron", hour=17, minute=0, day_of_week="mon-fri") def report_top_stocks(): - - short_period = report_top_entities( - entity_type="stock", - entity_provider="em", - data_provider="em", - periods=[*range(1, 20)], - ignore_new_stock=False, - ignore_st=True, - adjust_type=None, - top_count=30, - turnover_threshold=0, - turnover_rate_threshold=0, - informer=email_informer, - title="短期最强", + compute_top_stocks() + provider = "em" + entity_type = "stock" + target_date = get_latest_kdata_date(provider=provider, entity_type=entity_type, adjust_type=AdjustType.hfq) + selected = get_top_stocks(target_date=target_date, return_type="short") + + inform( + email_informer, + entity_ids=selected, + target_date=target_date, + title=f"stock 短期最强({len(selected)})", + entity_provider=provider, + entity_type=entity_type, em_group="短期最强", em_group_over_write=True, em_group_over_write_tag=True, - return_type=TopType.positive, - include_limit_up=True, ) - - long_period_start = short_period + 1 - - report_top_entities( - entity_type="stock", - entity_provider="em", - data_provider="em", - periods=[*range(long_period_start, long_period_start + 30)], - ignore_new_stock=False, - ignore_st=True, - adjust_type=None, - top_count=30, - turnover_threshold=0, - turnover_rate_threshold=0, - informer=email_informer, - title="中期最强", + selected = get_top_stocks(target_date=target_date, return_type="long") + + inform( + email_informer, + entity_ids=selected, + target_date=target_date, + title=f"stock 中期最强({len(selected)})", + entity_provider=provider, + entity_type=entity_type, em_group="中期最强", em_group_over_write=True, em_group_over_write_tag=False, - return_type=TopType.positive, ) # report_top_entities( diff --git a/examples/reports/report_vol_up.py b/examples/reports/report_vol_up.py index c27c6cd1..ad24dded 100644 --- a/examples/reports/report_vol_up.py +++ b/examples/reports/report_vol_up.py @@ -3,12 +3,13 @@ from apscheduler.schedulers.background import BackgroundScheduler -from examples.report_utils import report_targets +from examples.report_utils import report_targets, inform from zvt import init_log from zvt.api.kdata import get_latest_kdata_date from zvt.api.selector import get_mini_and_small_stock, get_middle_and_big_stock from zvt.contract import AdjustType from zvt.factors import VolumeUpMaFactor +from zvt.factors.top_stocks import get_top_stocks from zvt.informer import EmailInformer logger = logging.getLogger(__name__) @@ -20,50 +21,34 @@ @sched.scheduled_job("cron", hour=17, minute=0, day_of_week="mon-fri") def report_vol_up_stocks(): - target_date = get_latest_kdata_date(entity_type="stock", adjust_type=AdjustType.hfq, provider="em") - entity_ids = get_mini_and_small_stock(timestamp=target_date, provider="em") + provider = "em" + entity_type = "stock" + target_date = get_latest_kdata_date(provider=provider, entity_type=entity_type, adjust_type=AdjustType.hfq) + selected = get_top_stocks(target_date=target_date, return_type="small_vol_up") - report_targets( - factor_cls=VolumeUpMaFactor, - entity_provider="em", - data_provider="em", - informer=email_informer, + inform( + email_informer, + entity_ids=selected, + target_date=target_date, + title=f"stock 放量突破(半)年线小市值股票({len(selected)})", + entity_provider=provider, + entity_type=entity_type, em_group="年线股票", - title="放量突破(半)年线小市值股票", - entity_type="stock", em_group_over_write=True, - filter_by_volume=False, - adjust_type=AdjustType.hfq, - start_timestamp="2021-01-01", - # factor args - windows=[120, 250], - over_mode="or", - up_intervals=60, - turnover_threshold=300000000, - turnover_rate_threshold=0.02, - entity_ids=entity_ids, + em_group_over_write_tag=False, ) + selected = get_top_stocks(target_date=target_date, return_type="big_vol_up") - entity_ids = get_middle_and_big_stock(timestamp=target_date) - report_targets( - factor_cls=VolumeUpMaFactor, - entity_provider="em", - data_provider="em", - informer=email_informer, + inform( + email_informer, + entity_ids=selected, + target_date=target_date, + title=f"stock 放量突破(半)年线大市值股票({len(selected)})", + entity_provider=provider, + entity_type=entity_type, em_group="年线股票", - title="放量突破(半)年线大市值股票", - entity_type="stock", em_group_over_write=False, - filter_by_volume=False, - adjust_type=AdjustType.hfq, - start_timestamp="2021-01-01", - # factor args - windows=[120, 250], - over_mode="or", - up_intervals=60, - turnover_threshold=300000000, - turnover_rate_threshold=0.01, - entity_ids=entity_ids, + em_group_over_write_tag=False, ) diff --git a/examples/tag_utils.py b/examples/tag_utils.py index 7151dbe6..cdbaddbd 100644 --- a/examples/tag_utils.py +++ b/examples/tag_utils.py @@ -11,6 +11,14 @@ from zvt.utils import current_date, next_date, pd_is_not_null +def get_concept(code): + with open(os.path.join(os.path.dirname(__file__), "concept.json")) as f: + concept_map = json.load(f) + concepts = [item for sublist in concept_map.values() for item in sublist] + df = BlockStock.query_data(filters=[BlockStock.stock_code == code, BlockStock.name.in_(concepts)]) + return df["name"].tolist() + + def industry_to_tag(industry): if industry in ["风电设备", "电池", "光伏设备", "能源金属", "电源设备"]: return "赛道" @@ -30,7 +38,7 @@ def industry_to_tag(industry): return "消费电子" if industry in ["汽车零部件", "汽车服务", "汽车整车"]: return "汽车" - if industry in ["电机", "通用设备", "专用设备"]: + if industry in ["电机", "通用设备", "专用设备", "仪器仪表"]: return "智能机器" if industry in ["电网设备", "电力行业"]: return "电力" @@ -56,8 +64,6 @@ def industry_to_tag(industry): return "军工" if industry in ["专业服务"]: return "专业服务" - if industry in ["仪器仪表"]: - return "仪器仪表" def build_default_tags(codes, provider="em"): @@ -96,7 +102,7 @@ def get_main_line_hidden_tags(): return json.load(f) -def replace_tags(old_tag="次新股"): +def replace_tags(old_tag="仪器仪表"): with open(os.path.join(os.path.dirname(__file__), "stock_tags.json")) as f: stock_tags = json.load(f) for stock_tag in stock_tags: @@ -169,7 +175,7 @@ def get_core_tag(codes): if tags: code_tag_hidden_tag_list.append((code, tags[0], None)) else: - code_tag_hidden_tag_list.append((code, tags[0], get_hidden_code(code))) + code_tag_hidden_tag_list.append((code, "未知", get_hidden_code(code))) return code_tag_hidden_tag_list @@ -277,7 +283,8 @@ def refresh_hidden_tags(): if __name__ == "__main__": # build_stock_tags(block_name="化工原料", tag="化工", hidden_tag=None) # merge_tags(tags_file="stock_tags.json", hidden_tags_file="化工.json", result_file="result.json", force_update=False) - # replace_tags() - check_tags() + # replace_tags(old_tag="仪器仪表") + # check_tags() # complete_tags() # refresh_hidden_tags() + print(get_concept(code="688787")) diff --git a/requirements.txt b/requirements.txt index a29d89cb..5f2078fc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ requests==2.28.2 -SQLAlchemy == 1.4.46 +SQLAlchemy == 1.4.52 pandas==1.5.3 arrow==1.2.3 openpyxl==3.1.1 diff --git a/src/zvt/api/selector.py b/src/zvt/api/selector.py index 7b2f76d6..78413318 100644 --- a/src/zvt/api/selector.py +++ b/src/zvt/api/selector.py @@ -33,13 +33,16 @@ def get_entity_ids_by_filter( target_date=None, entity_schema=Stock, entity_ids=None, + ignore_bj=False, ): filters = [] + if not target_date: + target_date = current_date() if ignore_new_stock: - if not target_date: - target_date = current_date() pre_year = next_date(target_date, -365) filters += [entity_schema.timestamp <= pre_year] + else: + filters += [entity_schema.timestamp <= target_date] if ignore_delist: filters += [ entity_schema.name.not_like("%退%"), @@ -50,6 +53,8 @@ def get_entity_ids_by_filter( entity_schema.name.not_like("%ST%"), entity_schema.name.not_like("%*ST%"), ] + if ignore_bj: + filters += [entity_schema.exchange != "bj"] return get_entity_ids(provider=provider, entity_schema=entity_schema, filters=filters, entity_ids=entity_ids) diff --git a/src/zvt/domain/misc/__init__.py b/src/zvt/domain/misc/__init__.py index 4da05788..3589e20b 100644 --- a/src/zvt/domain/misc/__init__.py +++ b/src/zvt/domain/misc/__init__.py @@ -12,6 +12,12 @@ __all__ += _overall_all +# import all from submodule stock_events +from .stock_events import * +from .stock_events import __all__ as _stock_events_all + +__all__ += _stock_events_all + # import all from submodule money_flow from .money_flow import * from .money_flow import __all__ as _money_flow_all diff --git a/src/zvt/domain/misc/stock_events.py b/src/zvt/domain/misc/stock_events.py new file mode 100644 index 00000000..4b3b99a2 --- /dev/null +++ b/src/zvt/domain/misc/stock_events.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +from sqlalchemy import Column, String, DateTime +from sqlalchemy.orm import declarative_base + +from zvt.contract import Mixin +from zvt.contract.register import register_schema + +EventsBase = declarative_base() + + +class StockEvents(EventsBase, Mixin): + __tablename__ = "stock_events" + event_type = Column(String) + specific_event_type = Column(String) + notice_date = Column(DateTime) + level1_content = Column(String) + level2_content = Column(String) + + +register_schema(providers=["em"], db_name="stock_events", schema_base=EventsBase, entity_type="stock") +# the __all__ is generated +__all__ = ["StockEvents"] diff --git a/src/zvt/factors/top_stocks.py b/src/zvt/factors/top_stocks.py new file mode 100644 index 00000000..5b0502a4 --- /dev/null +++ b/src/zvt/factors/top_stocks.py @@ -0,0 +1,261 @@ +# -*- coding: utf-8 -*- +import json +from typing import List + +from sqlalchemy import Column, String +from sqlalchemy.orm import declarative_base +from sqlalchemy import Column, String, Integer, DateTime, Boolean, Float + +from zvt.api import get_latest_kdata_date, get_mini_and_small_stock, get_middle_and_big_stock, get_trade_dates +from zvt.api.selector import get_entity_ids_by_filter, get_limit_up_stocks +from zvt.api.stats import get_top_performance_entities_by_periods, TopType +from zvt.contract import Mixin, AdjustType +from zvt.contract.api import get_db_session +from zvt.contract.factor import TargetType +from zvt.contract.register import register_schema +from zvt.domain import Stock, Stock1dHfqKdata, LimitUpInfo +from zvt.factors import VolumeUpMaFactor +from zvt.utils import next_date, to_time_str, TIME_FORMAT_DAY, today, count_interval, to_pd_timestamp + +TopStocksBase = declarative_base() + + +class TopStocks(TopStocksBase, Mixin): + __tablename__ = "top_stocks" + + short_count = Column(Integer) + short_stocks = Column(String(length=2048)) + + long_count = Column(Integer) + long_stocks = Column(String(length=2048)) + + small_vol_up_count = Column(Integer) + small_vol_up_stocks = Column(String(length=2048)) + + big_vol_up_count = Column(Integer) + big_vol_up_stocks = Column(String(length=2048)) + + all_stocks_count = Column(Integer) + + +register_schema(providers=["zvt"], db_name="top_stocks", schema_base=TopStocksBase) + + +def get_vol_up_stocks(target_date, provider="em", stock_type="small", entity_ids=None): + if stock_type == "small": + current_entity_pool = get_mini_and_small_stock(timestamp=target_date, provider=provider) + turnover_threshold = 300000000 + turnover_rate_threshold = 0.02 + elif stock_type == "big": + current_entity_pool = get_middle_and_big_stock(timestamp=target_date, provider=provider) + turnover_threshold = 300000000 + turnover_rate_threshold = 0.01 + else: + assert False + + if entity_ids: + current_entity_pool = set(current_entity_pool) & set(entity_ids) + + kdata_schema = Stock1dHfqKdata + filters = [ + kdata_schema.timestamp == to_pd_timestamp(target_date), + kdata_schema.turnover >= turnover_threshold, + kdata_schema.turnover_rate >= turnover_rate_threshold, + ] + kdata_df = kdata_schema.query_data( + provider=provider, filters=filters, columns=["entity_id", "timestamp"], index="entity_id" + ) + if current_entity_pool: + current_entity_pool = set(current_entity_pool) & set(kdata_df.index.tolist()) + else: + current_entity_pool = kdata_df.index.tolist() + + factor = VolumeUpMaFactor( + entity_schema=Stock, + entity_provider=provider, + provider=provider, + entity_ids=current_entity_pool, + start_timestamp=next_date(target_date, -600), + end_timestamp=target_date, + adjust_type=AdjustType.hfq, + windows=[120, 250], + over_mode="or", + up_intervals=60, + turnover_threshold=turnover_threshold, + turnover_rate_threshold=turnover_rate_threshold, + ) + + stocks = factor.get_targets(timestamp=target_date, target_type=TargetType.positive) + return stocks + + +def update_with_limit_up(): + session = get_db_session(provider="zvt", data_schema=TopStocks) + + top_stocks: List[TopStocks] = TopStocks.query_data( + end_timestamp="2021-07-18", return_type="domain", session=session + ) + for top_stock in top_stocks: + limit_up_stocks = get_limit_up_stocks(timestamp=top_stock.timestamp) + short_stocks = json.loads(top_stock.short_stocks) + stock_list = list(set(short_stocks + limit_up_stocks)) + top_stock.short_stocks = json.dumps(stock_list, ensure_ascii=False) + top_stock.short_count = len(stock_list) + session.add_all(top_stocks) + session.commit() + + +def update_vol_up(): + session = get_db_session(provider="zvt", data_schema=TopStocks) + + top_stocks: List[TopStocks] = TopStocks.query_data( + return_type="domain", start_timestamp="2019-03-27", session=session + ) + for top_stock in top_stocks: + target_date = top_stock.timestamp + count_bj = count_interval("2023-09-01", target_date) + ignore_bj = count_bj < 0 + + entity_ids = get_entity_ids_by_filter( + target_date=target_date, + provider="em", + ignore_delist=False, + ignore_st=False, + ignore_new_stock=False, + ignore_bj=ignore_bj, + ) + small_vol_up_stocks = get_vol_up_stocks( + target_date=target_date, provider="em", stock_type="small", entity_ids=entity_ids + ) + top_stock.small_vol_up_count = len(small_vol_up_stocks) + top_stock.small_vol_up_stocks = json.dumps(small_vol_up_stocks, ensure_ascii=False) + + big_vol_up_stocks = get_vol_up_stocks( + target_date=target_date, provider="em", stock_type="big", entity_ids=entity_ids + ) + top_stock.big_vol_up_count = len(big_vol_up_stocks) + top_stock.big_vol_up_stocks = json.dumps(big_vol_up_stocks, ensure_ascii=False) + session.add(top_stock) + session.commit() + print(f"finish {target_date}") + + +def compute_top_stocks(provider="em"): + latest = TopStocks.query_data(limit=1, order=TopStocks.timestamp.desc(), return_type="domain") + if latest: + start = next_date(to_time_str(latest[0].timestamp, fmt=TIME_FORMAT_DAY)) + else: + start = "2018-01-01" + trade_days = get_trade_dates(start=start, end=today()) + + for target_date in trade_days: + print(f"to {target_date}") + session = get_db_session(provider="zvt", data_schema=TopStocks) + top_stocks = TopStocks( + id=f"block_zvt_000001_{target_date}", entity_id="block_zvt_000001", timestamp=target_date + ) + + count_bj = count_interval("2023-09-01", target_date) + ignore_bj = count_bj < 0 + + entity_ids = get_entity_ids_by_filter( + target_date=target_date, + provider=provider, + ignore_delist=False, + ignore_st=False, + ignore_new_stock=False, + ignore_bj=ignore_bj, + ) + + short_selected, short_period = get_top_performance_entities_by_periods( + entity_provider=provider, + data_provider=provider, + target_date=target_date, + periods=[*range(1, 20)], + ignore_new_stock=False, + ignore_st=False, + entity_ids=entity_ids, + entity_type="stock", + adjust_type=None, + top_count=30, + turnover_threshold=0, + turnover_rate_threshold=0, + return_type=TopType.positive, + ) + limit_up_stocks = get_limit_up_stocks(timestamp=target_date) + short_selected = list(set(short_selected + limit_up_stocks)) + top_stocks.short_count = len(short_selected) + top_stocks.short_stocks = json.dumps(short_selected, ensure_ascii=False) + + long_period_start = short_period + 1 + long_selected, long_period = get_top_performance_entities_by_periods( + entity_provider=provider, + data_provider=provider, + target_date=target_date, + periods=[*range(long_period_start, long_period_start + 30)], + ignore_new_stock=False, + ignore_st=False, + entity_ids=entity_ids, + entity_type="stock", + adjust_type=None, + top_count=30, + turnover_threshold=0, + turnover_rate_threshold=0, + return_type=TopType.positive, + ) + top_stocks.long_count = len(long_selected) + top_stocks.long_stocks = json.dumps(long_selected, ensure_ascii=False) + + small_vol_up_stocks = get_vol_up_stocks( + target_date=target_date, provider=provider, stock_type="small", entity_ids=entity_ids + ) + top_stocks.small_vol_up_count = len(small_vol_up_stocks) + top_stocks.small_vol_up_stocks = json.dumps(small_vol_up_stocks, ensure_ascii=False) + + big_vol_up_stocks = get_vol_up_stocks( + target_date=target_date, provider=provider, stock_type="big", entity_ids=entity_ids + ) + top_stocks.big_vol_up_count = len(big_vol_up_stocks) + top_stocks.big_vol_up_stocks = json.dumps(big_vol_up_stocks, ensure_ascii=False) + + top_stocks.all_stocks_count = len(entity_ids) + + print(top_stocks) + session.add(top_stocks) + session.commit() + + +def get_top_stocks(target_date, return_type="short"): + datas: List[TopStocks] = TopStocks.query_data( + start_timestamp=target_date, end_timestamp=target_date, return_type="domain" + ) + stocks = [] + if datas: + assert len(datas) == 1 + top_stock = datas[0] + if return_type == "short": + stocks = json.loads(top_stock.short_stocks) + elif return_type == "long": + stocks = json.loads(top_stock.long_stocks) + elif return_type == "small_vol_up": + stocks = json.loads(top_stock.small_vol_up_stocks) + elif return_type == "big_vol_up": + stocks = json.loads(top_stock.big_vol_up_stocks) + else: + assert False + return stocks + + +if __name__ == "__main__": + compute_top_stocks() + # update_with_limit_up() + # update_vol_up() + # target_date = "2024-03-06" + # stocks = get_top_stocks(target_date=target_date, return_type="short") + # print(stocks) + # stocks = get_top_stocks(target_date=target_date, return_type="long") + # print(stocks) + # stocks = get_top_stocks(target_date=target_date, return_type="small_vol_up") + # print(stocks) + # stocks = get_top_stocks(target_date=target_date, return_type="big_vol_up") + # print(stocks) diff --git a/src/zvt/fill_project.py b/src/zvt/fill_project.py index 6e92d78c..cf73967f 100644 --- a/src/zvt/fill_project.py +++ b/src/zvt/fill_project.py @@ -96,4 +96,4 @@ def gen_kdata_schemas(): # gen_exports("ml") # gen_kdata_schemas() gen_exports("recorders") - gen_exports("domain") + # gen_exports("domain") diff --git a/src/zvt/recorders/__init__.py b/src/zvt/recorders/__init__.py index a0b017ae..0402dd96 100644 --- a/src/zvt/recorders/__init__.py +++ b/src/zvt/recorders/__init__.py @@ -114,6 +114,12 @@ def init_main_index(provider="exchange"): __all__ += _eastmoney_all +# import all from submodule qmt +from .qmt import * +from .qmt import __all__ as _qmt_all + +__all__ += _qmt_all + # import all from submodule sina from .sina import * from .sina import __all__ as _sina_all diff --git a/src/zvt/recorders/em/__init__.py b/src/zvt/recorders/em/__init__.py index a2a845a0..5e83e4b4 100644 --- a/src/zvt/recorders/em/__init__.py +++ b/src/zvt/recorders/em/__init__.py @@ -6,6 +6,12 @@ # common code of the package # export interface in __all__ which contains __all__ of its sub modules +# import all from submodule misc +from .misc import * +from .misc import __all__ as _misc_all + +__all__ += _misc_all + # import all from submodule quotes from .quotes import * from .quotes import __all__ as _quotes_all @@ -24,12 +30,6 @@ __all__ += _macro_all -# import all from submodule news -from .news import * -from .news import __all__ as _news_all - -__all__ += _news_all - # import all from submodule em_api from .em_api import * from .em_api import __all__ as _em_api_all diff --git a/src/zvt/recorders/em/em_api.py b/src/zvt/recorders/em/em_api.py index 7c296260..4abef240 100644 --- a/src/zvt/recorders/em/em_api.py +++ b/src/zvt/recorders/em/em_api.py @@ -20,7 +20,10 @@ to_time_str, now_pd_timestamp, current_date, + flatten_list, + is_same_date, ) +from zvt.utils.utils import to_str logger = logging.getLogger(__name__) @@ -29,6 +32,7 @@ def get_treasury_yield(pn=1, ps=2000, fetch_all=True): results = get_em_data( request_type="RPTA_WEB_TREASURYYIELD", + source=None, fields="ALL", sort_by="SOLAR_DATE", sort="desc", @@ -197,7 +201,7 @@ def get_url(type, sty, source="SECURITIES", filters=None, order_by="", order="as sr = _order_param(order=order) v = random.randint(1000000000000000, 9000000000000000) - if filters: + if filters or source: url = f"https://datacenter.eastmoney.com/securities/api/data/get?type={type}&sty={sty}&filter={filters}&client=APP&source={source}&p={pn}&ps={ps}&sr={sr}&st={order_by}&v=0{v}" else: url = f"https://datacenter.eastmoney.com/api/data/get?type={type}&sty={sty}&st={order_by}&sr={sr}&p={pn}&ps={ps}&_={now_timestamp()}" @@ -209,8 +213,11 @@ def get_url(type, sty, source="SECURITIES", filters=None, order_by="", order="as def get_exchange(code): - if code >= "333333": + code_ = int(code) + if 800000 >= code_ >= 600000: return "SH" + elif code_ >= 400000: + return "BJ" else: return "SZ" @@ -264,6 +271,7 @@ def get_em_data( pn=1, ps=2000, fetch_all=True, + fetch_count=1, params=None, ): url = get_url( @@ -277,14 +285,24 @@ def get_em_data( ps=ps, params=params, ) + print(f"current url: {url}") resp = requests.get(url) if resp.status_code == 200: json_result = resp.json() resp.close() - if json_result and json_result["result"]: - data: list = json_result["result"]["data"] - if fetch_all: - if pn < json_result["result"]["pages"]: + + if json_result: + if "result" in json_result: + data: list = json_result["result"]["data"] + need_next = pn < json_result["result"]["pages"] + elif "data" in json_result: + data: list = json_result["data"] + need_next = json_result["hasNext"] == 1 + else: + data = [] + need_next = False + if fetch_all or fetch_count - 1 > 0: + if need_next: next_data = get_em_data( request_type=request_type, fields=fields, @@ -295,6 +313,8 @@ def get_em_data( pn=pn + 1, ps=ps, fetch_all=fetch_all, + fetch_count=fetch_count - 1, + params=params, ) if next_data: data = data + next_data @@ -490,7 +510,7 @@ def get_tradable_list( if entity_type == TradableType.index: if exchange == Exchange.sh: - entity_flag = "fs=i:1.000001,i:1.000002,i:1.000003,i:1.000009,i:1.000010,i:1.000011,i:1.000012,i:1.000016,i:1.000300,i:1.000903,i:1.000905,i:1.000906,i:1.000688" + entity_flag = "fs=i:1.000001,i:1.000002,i:1.000003,i:1.000009,i:1.000010,i:1.000011,i:1.000012,i:1.000016,i:1.000300,i:1.000903,i:1.000905,i:1.000906,i:1.000688,i:1.000852,i:2.932000" if exchange == Exchange.sz: entity_flag = "fs=i:0.399001,i:0.399002,i:0.399003,i:0.399004,i:0.399005,i:0.399006,i:0.399100,i:0.399106,i:0.399305,i:0.399550" elif entity_type == TradableType.currency: @@ -597,6 +617,47 @@ def get_block_stocks(block_id, name=""): return the_list +def get_events(entity_id, fetch_count=2000): + _, _, code = decode_entity_id(entity_id) + + datas = get_em_data( + fields=None, + request_type="RTP_F10_DETAIL", + source="SECURITIES", + params=f"{code}.{get_exchange(code)}", + fetch_all=False, + fetch_count=fetch_count, + ) + if not datas: + return None + datas = flatten_list(datas) + stock_events = [] + checking_date = None + index = 0 + for item in datas: + the_date = item["NOTICE_DATE"] + event_type = item["EVENT_TYPE"] + if checking_date == the_date: + index = index + 1 + the_id = f"{entity_id}_{the_date}_{event_type}_{index}" + else: + checking_date = the_date + index = 0 + the_id = f"{entity_id}_{the_date}_{event_type}" + event = { + "id": the_id, + "entity_id": entity_id, + "timestamp": to_pd_timestamp(the_date), + "event_type": event_type, + "specific_event_type": item["SPECIFIC_EVENTTYPE"], + "notice_date": to_pd_timestamp(the_date), + "level1_content": to_str(item["LEVEL1_CONTENT"]), + "level2_content": to_str(item["LEVEL2_CONTENT"]), + } + stock_events.append(event) + return stock_events + + def get_news(entity_id, ps=200, index=1, start_timestamp=None): sec_id = to_em_sec_id(entity_id=entity_id) url = f"https://np-listapi.eastmoney.com/comm/wap/getListInfo?cb=callback&client=wap&type=1&mTypeAndCode={sec_id}&pageSize={ps}&pageIndex={index}&callback=jQuery1830017478247906740352_{now_timestamp() - 1}&_={now_timestamp()}" @@ -625,7 +686,7 @@ def get_news(entity_id, ps=200, index=1, start_timestamp=None): "timestamp": to_pd_timestamp(item["Art_ShowTime"]), "news_title": item["Art_Title"], } - for item in json_result + for index, item in enumerate(json_result) if not start_timestamp or (to_pd_timestamp(item["Art_ShowTime"]) >= start_timestamp) ] if len(news) < len(json_result): @@ -788,11 +849,13 @@ def to_zvt_code(code): # # df_delist = df[df["name"].str.contains("退")] # print(df_delist[["id", "name"]].values.tolist()) # print(get_block_stocks(block_id="block_cn_BK1144")) - # df = get_tradable_list(entity_type="stock", exchange=Exchange.bj) + # df = get_tradable_list(entity_type="index") # print(df) # df = get_kdata(entity_id="stock_bj_873693", level="1d") # print(df) - print(get_controlling_shareholder(code="000338")) + # print(get_controlling_shareholder(code="000338")) + events = get_events(entity_id="stock_sz_300684") + print(events) # the __all__ is generated __all__ = [ @@ -805,6 +868,7 @@ def to_zvt_code(code): "get_ii_holder", "get_ii_summary", "get_free_holders", + "get_controlling_shareholder", "get_holders", "get_url", "get_exchange", @@ -815,6 +879,8 @@ def to_zvt_code(code): "get_basic_info", "get_future_list", "get_tradable_list", + "get_block_stocks", + "get_events", "get_news", "to_em_fc", "to_em_entity_flag", diff --git a/src/zvt/recorders/em/news/__init__.py b/src/zvt/recorders/em/misc/__init__.py similarity index 64% rename from src/zvt/recorders/em/news/__init__.py rename to src/zvt/recorders/em/misc/__init__.py index be419f46..fa240ff0 100644 --- a/src/zvt/recorders/em/news/__init__.py +++ b/src/zvt/recorders/em/misc/__init__.py @@ -11,3 +11,9 @@ from .em_stock_news_recorder import __all__ as _em_stock_news_recorder_all __all__ += _em_stock_news_recorder_all + +# import all from submodule em_stock_events_recorder +from .em_stock_events_recorder import * +from .em_stock_events_recorder import __all__ as _em_stock_events_recorder_all + +__all__ += _em_stock_events_recorder_all diff --git a/src/zvt/recorders/em/misc/em_stock_events_recorder.py b/src/zvt/recorders/em/misc/em_stock_events_recorder.py new file mode 100644 index 00000000..c062f44d --- /dev/null +++ b/src/zvt/recorders/em/misc/em_stock_events_recorder.py @@ -0,0 +1,48 @@ +# -*- coding: utf-8 -*- +import pandas as pd + +from zvt.contract.api import df_to_db +from zvt.contract.recorder import FixedCycleDataRecorder +from zvt.domain import Stock +from zvt.domain.misc.stock_events import StockEvents +from zvt.domain.misc.stock_news import StockNews +from zvt.recorders.em import em_api +from zvt.utils import to_pd_timestamp, count_interval, now_pd_timestamp + + +class EMStockEventsRecorder(FixedCycleDataRecorder): + original_page_url = ( + "https://emh5.eastmoney.com/html/detail.html?fc=300684.SZ&shareFlag=1&color=w&appfenxiang=1#/gsds" + ) + url = "https://datacenter.eastmoney.com/securities/api/data/get?type=RTP_F10_DETAIL¶ms=300684.SZ&source=SECURITIES&client=APP&p=1&v=05132741154833669" + + entity_schema = Stock + data_schema = StockEvents + entity_provider = "em" + provider = "em" + + def record(self, entity, start, end, size, timestamps): + if not start: + start = to_pd_timestamp("2005-01-01") + days = count_interval(start, now_pd_timestamp()) + if days < 0: + fetch_count = 1 + elif days <= 10: + fetch_count = 3 + elif days <= 30: + fetch_count = 5 + else: + fetch_count = 2000 + + stock_events = em_api.get_events(entity_id=entity.id, fetch_count=fetch_count) + if stock_events: + df = pd.DataFrame.from_records(stock_events) + self.logger.info(df.head()) + df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) + + +if __name__ == "__main__": + r = EMStockEventsRecorder(entity_ids=["stock_sz_000338"], sleeping_time=0) + r.run() +# the __all__ is generated +__all__ = ["EMStockEventsRecorder"] diff --git a/src/zvt/recorders/em/news/em_stock_news_recorder.py b/src/zvt/recorders/em/misc/em_stock_news_recorder.py similarity index 100% rename from src/zvt/recorders/em/news/em_stock_news_recorder.py rename to src/zvt/recorders/em/misc/em_stock_news_recorder.py diff --git a/src/zvt/recorders/qmt/__init__.py b/src/zvt/recorders/qmt/__init__.py index 40a96afc..b8724466 100644 --- a/src/zvt/recorders/qmt/__init__.py +++ b/src/zvt/recorders/qmt/__init__.py @@ -1 +1,13 @@ # -*- coding: utf-8 -*- +# the __all__ is generated +__all__ = [] + +# __init__.py structure: +# common code of the package +# export interface in __all__ which contains __all__ of its sub modules + +# import all from submodule quotes +from .quotes import * +from .quotes import __all__ as _quotes_all + +__all__ += _quotes_all diff --git a/src/zvt/recorders/qmt/quotes/__init__.py b/src/zvt/recorders/qmt/quotes/__init__.py index 40a96afc..5e91b1db 100644 --- a/src/zvt/recorders/qmt/quotes/__init__.py +++ b/src/zvt/recorders/qmt/quotes/__init__.py @@ -1 +1,13 @@ # -*- coding: utf-8 -*- +# the __all__ is generated +__all__ = [] + +# __init__.py structure: +# common code of the package +# export interface in __all__ which contains __all__ of its sub modules + +# import all from submodule qmt_kdata_recorder +# from .qmt_kdata_recorder import * +# from .qmt_kdata_recorder import __all__ as _qmt_kdata_recorder_all +# +# __all__ += _qmt_kdata_recorder_all diff --git a/src/zvt/recorders/qmt/quotes/qmt_kdata_recorder.py b/src/zvt/recorders/qmt/quotes/qmt_kdata_recorder.py index 36a390f4..28ffb54b 100644 --- a/src/zvt/recorders/qmt/quotes/qmt_kdata_recorder.py +++ b/src/zvt/recorders/qmt/quotes/qmt_kdata_recorder.py @@ -97,3 +97,7 @@ def record(self, entity, start, end, size, timestamps): class EMStockKdataRecorder(BaseEMStockKdataRecorder): entity_schema = Stock data_schema = StockKdataCommon + + +# the __all__ is generated +__all__ = ["BaseEMStockKdataRecorder", "EMStockKdataRecorder"] diff --git a/src/zvt/utils/utils.py b/src/zvt/utils/utils.py index ef13a550..cebc7331 100644 --- a/src/zvt/utils/utils.py +++ b/src/zvt/utils/utils.py @@ -219,6 +219,30 @@ def set_one_and_only_one(**kwargs): return True +def flatten_list(input_list): + if not input_list: + return input_list + result = [] + for item in input_list: + if isinstance(item, list): + result.extend(item) + elif isinstance(item, dict): + result.append(item) + else: + result.append(item) + return result + + +def to_str(str_or_list): + if not str_or_list: + return None + if isinstance(str_or_list, str): + return str_or_list + if isinstance(str_or_list, list): + str_list = [str(item) for item in str_or_list] + return ";".join(str_list) + + if __name__ == "__main__": url = url_unquote( "https://datacenter.eastmoney.com/securities/api/data/get?type=RPT_DAILYBILLBOARD_DETAILS&sty=ALL&source=DataCenter&client=WAP&p=1&ps=20&sr=-1,1&st=TRADE_DATE,SECURITY_CODE&filter=(TRADE_DATE%3E=%272022-04-01%27)(TRADE_DATE%3C=%272022-04-29%27)(MARKET=%22SH%22)&?v=05160638952989893" @@ -246,4 +270,5 @@ def set_one_and_only_one(**kwargs): "iterate_with_step", "url_unquote", "parse_url_params", + "flatten_list", ] diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 780c2467..8d870960 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- from zvt.contract.api import get_entities -from zvt.utils.utils import iterate_with_step +from zvt.utils.utils import iterate_with_step, to_str def test_iterate_with_step(): @@ -30,3 +30,11 @@ def test_iterate_entities(): assert len(first) == 100 assert len(last) <= 100 + + +def test_to_str(): + assert to_str(None) is None + assert to_str("") is None + assert to_str("a") == "a" + assert to_str(["a", "b"]) == "a;b" + assert to_str([1, 2]) == "1;2"