In [1]:
import tushare as ts
import csv
import time
import pandas as pd
pro = ts.pro_api('20240220161230-78b00d30-7dc1-4ea5-99e2-924e60d68931')
pro._DataApi__http_url = 'http://tsapi.majors.ltd:7000'

In [17]:
start_date = '20200101'
end_date = '20240220'
ts_code = '000002.SZ'
year='2020'

### 1. 股票数据获取（Tushare）

#### 1.1 股票基本信息

In [2]:
stock_basic = pro.stock_basic(list_status='L', fields='ts_code, symbol, name, industry')

# 重命名字段（便于后续导入neo4j）
# 保存为stock_basic.csv
stock_basic.to_csv('./financial_data/stock_basic.csv', encoding='utf-8', index=False)

stock_basic.head()

Unnamed: 0,ts_code,symbol,name,industry
0,000001.SZ,1,平安银行,银行
1,000002.SZ,2,万科A,全国地产
2,000004.SZ,4,国华网安,软件服务
3,000005.SZ,5,ST星源,环境保护
4,000006.SZ,6,深振业A,区域地产


In [15]:
import logging
from datetime import datetime

In [5]:
logging.basicConfig(filename='log.txt', level=logging.INFO, format='%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S')


In [6]:
graph.run("""
LOAD CSV WITH HEADERS FROM 'https://raw.githubusercontent.com/wuqicyber/Financial-Knowledge-Graphs/master/code/financial_data/stock_basic.csv' AS row
CREATE (company:Company)
SET company = row
""")

#### 1.2 股票Top10股东信息

In [13]:
ts_codes = stock_basic['ts_code'].tolist()
ts_codes

['000001.SZ',
 '000002.SZ',
 '000004.SZ',
 '000005.SZ',
 '000006.SZ',
 '000007.SZ',
 '000008.SZ',
 '000009.SZ',
 '000010.SZ',
 '000011.SZ',
 '000012.SZ',
 '000014.SZ',
 '000016.SZ',
 '000017.SZ',
 '000019.SZ',
 '000020.SZ',
 '000021.SZ',
 '000023.SZ',
 '000025.SZ',
 '000026.SZ',
 '000027.SZ',
 '000028.SZ',
 '000029.SZ',
 '000030.SZ',
 '000031.SZ',
 '000032.SZ',
 '000034.SZ',
 '000035.SZ',
 '000036.SZ',
 '000037.SZ',
 '000039.SZ',
 '000040.SZ',
 '000042.SZ',
 '000045.SZ',
 '000048.SZ',
 '000049.SZ',
 '000050.SZ',
 '000055.SZ',
 '000056.SZ',
 '000058.SZ',
 '000059.SZ',
 '000060.SZ',
 '000061.SZ',
 '000062.SZ',
 '000063.SZ',
 '000065.SZ',
 '000066.SZ',
 '000068.SZ',
 '000069.SZ',
 '000070.SZ',
 '000078.SZ',
 '000088.SZ',
 '000089.SZ',
 '000090.SZ',
 '000096.SZ',
 '000099.SZ',
 '000100.SZ',
 '000151.SZ',
 '000153.SZ',
 '000155.SZ',
 '000156.SZ',
 '000157.SZ',
 '000158.SZ',
 '000159.SZ',
 '000166.SZ',
 '000301.SZ',
 '000333.SZ',
 '000338.SZ',
 '000400.SZ',
 '000401.SZ',
 '000402.SZ',
 '0004

In [None]:
for ts_code in ts_codes:
    logging.info(f'Start importing holders for {ts_code}')
    start_time = datetime.now()

    top10_holders = pro.top10_holders(ts_code=ts_code, start_date=start_date, end_date=end_date)
    for index, row in top10_holders.iterrows():
        # 查找或创建Company节点
        company = Node("Company", ts_code=row['ts_code'])
        graph.merge(company, "Company", "ts_code")

        # 查找或创建Holder节点
        holder = Node("Holder", name=row['holder_name'])
        graph.merge(holder, "Holder", "name")

        # 创建或更新HOLDS关系，并添加关系属性
        holds_relation = Relationship(holder, "HOLDS", company,
                                      ann_date=row['ann_date'],
                                      end_date=row['end_date'],
                                      hold_amount=row['hold_amount'],
                                      hold_ratio=row['hold_ratio'])
        graph.merge(holds_relation)

    end_time = datetime.now()
    logging.info(f'Finished importing holders for {ts_code}. Duration: {end_time - start_time}')


In [11]:
holders = pd.DataFrame(columns=('ts_code', 'ann_date', 'end_date', 'holder_name', 'hold_amount', 'hold_ratio'))

top10_holders = pro.top10_holders(ts_code=ts_code, start_date=start_date, end_date=end_date)

top10_holders

Unnamed: 0,ts_code,ann_date,end_date,holder_name,hold_amount,hold_ratio,hold_float_ratio,hold_change,holder_type
0,000001.SZ,20231025,20230930,全国社保基金一零一组合,5.847760e+07,0.3013,0.3013,400000.0,社保基金、社保机构
1,000001.SZ,20231025,20230930,交通银行股份有限公司-易方达上证50指数增强型证券投资基金,6.608347e+07,0.3405,0.3405,7000000.0,开放式投资基金
2,000001.SZ,20231025,20230930,中国平安人寿保险股份有限公司-自有资金,1.186100e+09,6.1121,6.1122,0.0,金融机构—保险公司
3,000001.SZ,20231025,20230930,中国平安人寿保险股份有限公司-传统-普通保险产品,4.404787e+08,2.2698,2.2699,0.0,保险投资组合
4,000001.SZ,20231025,20230930,中国工商银行股份有限公司-华泰柏瑞沪深300交易型开放式指数证券投资基金,6.345764e+07,0.3270,0.3270,,开放式投资基金
...,...,...,...,...,...,...,...,...,...
145,000001.SZ,20200421,20200331,中央汇金资产管理有限责任公司,2.162130e+08,1.1142,1.1142,0.0,资产管理公司
146,000001.SZ,20200421,20200331,深圳中电投资股份有限公司,1.625233e+08,0.8375,0.8375,0.0,投资、咨询公司
147,000001.SZ,20200421,20200331,河南鸿宝企业管理有限公司,1.027358e+08,0.5294,0.5294,0.0,一般企业
148,000001.SZ,20200421,20200331,全国社保基金一一七组合,9.502952e+07,0.4897,0.4897,29999936.0,社保基金、社保机构


#### 1.3 股票概念信息

In [8]:
concept_details = pd.DataFrame(columns=('id', 'concept_name', 'ts_code', 'name'))

# concept_detail = pro.concept_detail(id='TS0')  # 该接口 TOKEN 受限
    
# concept_detail.head()

#### 1.4 股票公告信息

In [12]:
# notices = pro.anns(ts_code=ts_code, start_date=start_date, end_date=end_date, year=year)  # 该接口 TOKEN 受限

# notices.to_csv("financial_data\\notices\\"+str(code)+".csv",encoding='utf_8_sig',index=False)

# notices.head()

#### 1.5 财经新闻信息

In [7]:
news = pro.news(src='sina', start_date='2023-11-21 09:00:00', end_date='2024-11-22 10:10:00')
news.head()

Unnamed: 0,datetime,content,title
0,2024-02-20 17:07:36,消息人士：印度将邀请私营企业投资260亿美元用于核能发电。,
1,2024-02-20 17:05:29,【香港联交：对新明中国3名现任董事采取纪律行动】香港联交所(香港交易及结算所有限公司全资附属...,
2,2024-02-20 17:03:46,【深交所：对影响市场正常交易秩序、损害投资者合法权益的违法违规行为，始终保持严的基调和“零容...,
3,2024-02-20 17:02:41,【富宝资讯：场内多见低价抛售，碳酸锂期货大幅下跌】富宝分析，锂矿端：锂矿市场询货问价频次增加...,
4,2024-02-20 17:02:25,【深交所对宁波灵均限制交易并启动公开谴责程序】2月19日，深交所在交易监控中发现，9:30:...,


#### 1.6 概念信息

In [19]:
# concept = pro.concept()  # 该接口 TOKEN 受限

# concept.head()

#### 1.7 沪股通成分和深股通成分信息 

In [20]:
sh = pro.hs_const(hs_type='SH')  # 获取沪股通成分

sz = pro.hs_const(hs_type='SZ')  # 获取深股通成分

sh.head()

Unnamed: 0,ts_code,hs_type,in_date,out_date,is_new
0,601628.SH,SH,20141117,,1
1,601099.SH,SH,20141117,,1
2,601808.SH,SH,20141117,,1
3,601107.SH,SH,20141117,,1
4,601880.SH,SH,20141117,,1


#### 1.8 股票价格信息

In [29]:
end_date = '20200201'  # 使用更短数据目的是1.9绘制走势图数值不重叠

price = pro.query('daily', ts_code=ts_code, start_date=start_date, end_date=end_date)

price.head()

Unnamed: 0,ts_code,trade_date,open,high,low,close,pre_close,change,pct_chg,vol,amount
0,000001.SZ,20200123,15.92,15.92,15.39,15.54,16.09,-0.55,-3.4183,1100592.07,1723394.336
1,000001.SZ,20200122,15.92,16.16,15.71,16.09,16.0,0.09,0.5625,719464.91,1150933.398
2,000001.SZ,20200121,16.34,16.34,15.93,16.0,16.45,-0.45,-2.7356,896603.1,1442171.431
3,000001.SZ,20200120,16.43,16.61,16.35,16.45,16.39,0.06,0.3661,746074.75,1226464.649
4,000001.SZ,20200117,16.38,16.55,16.35,16.39,16.33,0.06,0.3674,605436.69,995909.007


#### 1.9 股票价格走势图

In [34]:
from pyecharts.charts import Line
from pyecharts import options as opts
import numpy as np

price = pro.query('daily', ts_code=ts_code, start_date=start_date, end_date=end_date)
(
    Line()
    .add_xaxis(xaxis_data=list(price['trade_date'])[::-1])
    .add_yaxis(series_name="收盘价",y_axis=list(price['close'])[::-1],symbol="circle")
#     .add_yaxis(series_name="开盘价",y_axis=list(price['open'])[::-1],symbol="circle")
#     .add_yaxis(series_name="最高价",y_axis=list(price['high'])[::-1],symbol="circle")
#     .add_yaxis(series_name="最低价",y_axis=list(price['low'])[::-1],symbol="circle")
    .set_global_opts(title_opts=opts.TitleOpts(title="价格走势图"))
    .render_notebook()
)

#### 1.10 基本面数据

In [110]:
stock_company = pro.stock_company(exchange='SZSE', fields='ts_code,chairman,manager,secretary,reg_capital,setup_date,province')
stock_company.head()

Unnamed: 0,ts_code,chairman,manager,secretary,reg_capital,setup_date,province
0,300952.SZ,王咸华,王咸华,张武芬,14492.7653,20040415,江苏
1,300268.SZ,汤捷,汤捷,杨振刚,17420.0,20030508,湖南
2,300447.SZ,陈祥楼,何亮,孙璐,29106.9255,20010929,江苏
3,300451.SZ,葛航,张吕峥,胡燕,119260.8104,19971210,浙江
4,002531.SZ,严俊旭,严俊旭,朱彬,180250.9062,20050118,江苏


In [113]:
import tushare as ts
ts.get_notices("000001").head()

Unnamed: 0,title,type,date,url
0,平安银行：平安银行股份有限公司2020年年度权益分派实施公告2021-05-07,临时公告,2021-05-07,http://vip.stock.finance.sina.com.cn/corp/view...
1,平安银行：2021年第一季度报告全文,一季度报告,2021-04-21,http://vip.stock.finance.sina.com.cn/corp/view...
2,平安银行：一季报监事会决议公告,临时公告,2021-04-21,http://vip.stock.finance.sina.com.cn/corp/view...
3,平安银行：一季报董事会决议公告,临时公告,2021-04-21,http://vip.stock.finance.sina.com.cn/corp/view...
4,平安银行：2021年第一季度报告正文,一季度报告（摘要）,2021-04-21,http://vip.stock.finance.sina.com.cn/corp/view...


In [114]:
ts.guba_sina().head()

Unnamed: 0,title,ptime,rcounts
0,机构减仓或在战役性转移,05月07日 14:57,360.0
1,170家电子行业公司发布年度业绩预告,,0.0
2,券商开户专属通道：新客专享理财福利多多,,0.0
3,热门股票如何选择盘中介入点,05月07日 07:40,419.0
4,节后综合症再现 依然不乏结构性机会,05月07日 08:03,507.0


In [115]:
ts.get_hist_data("000001").head()

本接口即将停止更新，请尽快使用Pro版接口：https://tushare.pro/document/2


Unnamed: 0_level_0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-05-07,23.67,24.3,24.05,23.39,802214.25,0.55,2.34,23.556,23.294,22.153,614633.11,766603.84,700531.86,0.41
2021-05-06,23.1,23.7,23.5,23.1,500295.19,0.21,0.9,23.334,23.058,22.034,548276.55,769859.69,680476.29,0.26
2021-04-30,23.35,23.49,23.29,23.01,561981.31,-0.3,-1.27,23.222,22.823,21.934,622701.05,831058.44,684515.71,0.29
2021-04-29,23.34,23.71,23.59,23.11,614836.88,0.24,1.03,23.222,22.52,21.859,674950.82,847860.13,683639.09,0.32
2021-04-28,23.29,23.45,23.35,22.78,593837.94,0.41,1.79,23.1,22.197,21.78,717922.23,859962.95,691748.65,0.31


In [116]:
ts.get_hist_data("000001",ktype="w").head()

本接口即将停止更新，请尽快使用Pro版接口：https://tushare.pro/document/2


Unnamed: 0_level_0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-05-07,23.1,24.3,24.05,23.1,1302509.5,0.76,3.26,22.438,21.912,21.823,2882347.97,3893787.24,4866394.11,0.67
2021-04-30,23.87,24.23,23.29,22.78,3113505.25,0.0,0.0,21.928,21.645,21.538,3307222.47,4560954.59,5004209.48,1.6
2021-04-23,20.03,23.65,23.29,19.91,5197079.0,3.03,14.96,21.498,21.701,21.3,3544311.92,4492385.46,5036850.99,2.68
2021-04-16,21.51,21.51,20.26,19.81,3097949.0,-1.04,-4.88,20.934,21.754,21.1,3492326.92,4367200.06,5044014.37,1.6
2021-04-09,21.55,22.09,21.3,21.08,1700697.12,-0.2,-0.93,21.178,22.221,21.072,3922926.32,4865955.41,5078637.83,0.88


In [117]:
ts.get_hist_data("000001",ktype="m").head()

本接口即将停止更新，请尽快使用Pro版接口：https://tushare.pro/document/2


Unnamed: 0_level_0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-05-07,23.1,24.3,24.05,23.1,1302509.5,0.76,3.26,22.764,20.09,17.241,17859144.9,20308849.85,20748658.73,0.67
2021-04-30,22.08,24.23,23.29,19.81,14234763.0,1.28,5.82,21.822,19.019,16.818,21521885.8,24854576.1,21952860.65,7.34
2021-03-31,21.54,23.49,22.01,20.28,23400600.0,0.63,2.95,21.112,17.97,16.361,22789929.6,25258599.6,22737767.7,12.06
2021-02-26,23.0,25.31,21.38,21.21,22432726.0,-1.71,-7.41,20.26,17.069,15.967,22277667.2,24135001.5,22393905.6,11.56
2021-01-29,19.1,23.54,23.09,17.8,27925126.0,3.75,19.39,19.018,16.324,15.587,22094764.0,23598847.1,22103348.45,14.39


In [118]:
ts.get_hist_data("000001",ktype="5").head()  # 5min (15,30,60)

本接口即将停止更新，请尽快使用Pro版接口：https://tushare.pro/document/2


Unnamed: 0_level_0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20,turnover
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2021-05-07 15:00:00,24.01,24.03,24.01,23.98,14157.8,0.0,0.0,24.028,23.998,24.0555,13524.9,12686.8,11663.7,0.01
2021-05-07 14:55:00,24.03,24.04,24.01,23.96,14808.0,-0.02,-0.08,24.024,24.002,24.063,12897.5,13060.2,11526.3,0.01
2021-05-07 14:50:00,24.08,24.1,24.02,23.96,20615.3,-0.06,-0.25,23.998,24.011,24.067,12849.7,13332.0,11336.4,0.01
2021-05-07 14:45:00,24.01,24.08,24.08,24.01,9387.44,0.07,0.29,23.99,24.012,24.068,10501.1,12905.3,11421.0,0.01
2021-05-07 14:40:00,24.0,24.03,24.02,23.95,8656.08,0.02,0.08,23.978,24.012,24.0655,11153.4,12820.4,11752.8,0.01


In [119]:
# sh上证指数； sz深圳成指； hs300沪深300； sz50上证50； zxb中小板指数； cyb创业板指数
ts.get_hist_data("cyb").head()

本接口即将停止更新，请尽快使用Pro版接口：https://tushare.pro/document/2


Unnamed: 0_level_0,open,high,close,low,volume,price_change,p_change,ma5,ma10,ma20,v_ma5,v_ma10,v_ma20
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2021-05-07,3021.87,3026.85,2910.41,2910.41,14489343.0,-104.4,-3.46,3023.724,2992.49,2898.924,16444300.8,16648376.3,15426893.45
2021-05-06,3059.58,3059.58,3014.81,2961.99,16552269.0,-76.59,-2.48,3038.848,2991.06,2894.984,16974533.4,16827430.9,15346704.55
2021-04-30,3040.65,3110.16,3091.4,3037.31,16606502.0,39.98,1.31,3029.766,2979.452,2886.855,17752532.8,17050663.6,15277921.7
2021-04-29,3053.74,3068.6,3051.42,3014.55,16647988.0,0.84,0.03,3010.384,2948.649,2873.055,17834241.6,16717391.8,15101499.0
2021-04-28,2990.7,3052.04,3050.58,2980.65,17925402.0,64.55,2.16,2987.978,2922.571,2858.409,17635713.4,16185039.8,14900826.45


### 2.股票数据获取（巨宽数据源）

In [40]:
from jqdatasdk import *

auth('18280180192', 'Tencent123')  # 请自行前往 https://www.joinquant.com/ 免费申请TOKEN

security = '000300.XSHG'
start_date = '2020-01-01'
end_date = '2020-03-01'

In [42]:
# security 股票代码
# frequency 时间粒度（1d=日）
# skip_paused 是否跳过缺失交易数据时间点
stock_price = get_price(security=security, start_date=start_date, end_date=end_date, frequency='1d',skip_paused=False)

stock_price.head()

Unnamed: 0,open,close,high,low,volume,money
2020-01-02,4121.35,4152.24,4172.66,4121.35,18211680000.0,270105500000.0
2020-01-03,4161.22,4144.96,4164.3,4131.86,14282620000.0,215216300000.0
2020-01-06,4120.52,4129.3,4170.64,4102.38,17531000000.0,250182100000.0
2020-01-07,4137.4,4160.23,4161.25,4135.1,13948900000.0,196389100000.0
2020-01-08,4139.63,4112.32,4149.81,4101.98,16758580000.0,212406300000.0
