- 20190620
    - 制作stock标签，并存入MySQL
    - 基本面标签：业绩、营运、盈利、成长、偿债、现金
    - 分类标签：行业、地域、概念；是否属于某个类别，如上证50、退市等
    - 投资参考标签：分配预案、业绩、新股、融资融券    
    - 宏观经济数据：GDP、PPI、CPI、存款、货币
    - 龙虎榜数据：

# Basic set

In [1]:
import tushare as ts
import datetime
from sqlalchemy.types import VARCHAR

In [2]:
import pandas as pd
import numpy as np
pd.set_option('display.float_format', lambda x:'%.3f' %x) # 不采用科学计数法显示

import warnings  #  -----------------
warnings.filterwarnings('ignore') # 为了整洁，去除弹出的warnings

In [3]:
import seaborn as sns

from pylab import mpl
mpl.rcParams['font.sans-serif'] = ['FangSong']  # 指定默认字体
mpl.rcParams['axes.unicode_minus'] = False  # 解决保存图像是负号'-'显示为方块的问题

import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
# 自动重新加载更改的模块  ----------------------
%load_ext autoreload
%autoreload 2

## Functions

# Connect to Mysql

In [5]:
from sqlalchemy import create_engine
from pandas.io import sql

In [6]:
DB_CON_STR = 'mysql+pymysql://root:123456@localhost/stocks?charset=utf8'  
engine = create_engine(DB_CON_STR, echo=False) 

In [7]:
now = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
now

'2019-06-20 13:22:59'

# 基本面信息 Fundamental Information
    一、股票列表：get_stock_basics
    二、业绩报告（主表）：get_report_data
    三、盈利能力：get_profit_data
    四、营运能力：get_operation_data
    五、成长能力：get_growth_data
    六、偿债能力：get_growth_data
    七、现金流量：get_cashflow_data

In [35]:
# classified = ts.get_report_data(2018, 4) # year, quarter
classified = ts.get_profit_data(2018, 4)
# classified = ts.get_operation_data()
# classified = ts.get_growth_data()
# classified = ts.get_growth_data()
# classified = ts.get_cashflow_data()

print(classified.shape)
print(classified.head())
print()

[Getting data:]#############################################################(3623, 9)
     code   name     roe  net_profit_ratio  gross_profit_rate  net_profits  \
0  600198   大唐电信 288.280            23.980             25.323      579.596   
1  000585  *ST东电 169.340            45.170             17.071       14.596   
2  000707   ST双环  72.840             3.600             -2.575      135.745   
3  000737   ST南风  70.090            14.610             24.754      267.072   
4  600247   ST成城  66.550            23.440             31.650        7.418   

    eps  business_income  bips  
0 0.657         2416.500 2.739  
1 0.017           32.312 0.037  
2 0.292         3769.946 8.122  
3 0.487         1827.271 3.330  
4 0.022           31.633 0.094  



## 股票列表：get_stock_basics
- 获取沪深上市公司基本情况。属性包括：
  -  code,代码
  -  name,名称
  -  industry,所属行业
  -  area,地区
  -  pe,市盈率
  -  outstanding,流通股本(亿)
  -  totals,总股本(亿)
  -  totalAssets,总资产(万)
  -  liquidAssets,流动资产
  -  fixedAssets,固定资产
  -  reserved,公积金
  -  reservedPerShare,每股公积金
  -  esp,每股收益
  -  bvps,每股净资
  -  pb,市净率
  -  timeToMarket,上市日期
  -  undp,未分利润
  -  perundp, 每股未分配
  -  rev,收入同比(%)
  -  profit,利润同比(%)
  -  gpr,毛利率(%)
  -  npr,净利润率(%)
  -  holders,股东人数-

In [8]:
stock_basics = ts.get_stock_basics()
stock_basics['create_time'] = now
stock_basics.to_sql('stock_basics', engine, schema='stocks', if_exists='replace',
                    dtype={'code':VARCHAR(stock_basics.index.get_level_values('code').str.len().max())}) 
print(stock_basics.shape)
stock_basics.sort_values(by = 'code')

(3636, 23)


Unnamed: 0_level_0,name,industry,area,pe,outstanding,totals,totalAssets,liquidAssets,fixedAssets,reserved,...,pb,timeToMarket,undp,perundp,rev,profit,gpr,npr,holders,create_time
code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
000001,平安银行,银行,深圳,7.530,171.700,171.700,353017984.000,0.000,1062300.000,5646500.000,...,0.970,19910403,10160900.000,5.920,15.880,12.900,0.000,22.930,354508.000,2019-06-20 13:22:59
000002,万 科Ａ,全国地产,深圳,69.920,97.150,113.020,155116640.000,129272768.000,1152238.630,796198.880,...,1.940,19910129,9284548.000,8.210,56.930,25.230,35.040,2.320,222979.000,2019-06-20 13:22:59
000004,国农科技,生物制药,深圳,84.210,0.830,0.840,16133.150,11622.100,96.660,78.230,...,15.730,19910114,1876.040,0.220,41.470,159.640,76.620,5.000,10044.000,2019-06-20 13:22:59
000005,世纪星源,环境保护,深圳,0.000,10.580,10.590,295217.880,162958.560,2774.750,68830.050,...,2.090,19901210,-31474.810,-0.300,-44.980,-209.930,25.620,-21.200,125079.000,2019-06-20 13:22:59
000006,深振业Ａ,区域地产,深圳,16.620,13.480,13.500,1401971.250,1084175.750,1077.410,48432.150,...,1.170,19920427,299037.190,2.220,-14.750,-61.260,39.270,26.820,73575.000,2019-06-20 13:22:59
000007,全新好,酒店餐饮,深圳,69.380,3.090,3.460,39623.120,13818.020,6573.870,15702.400,...,12.360,19920413,-31287.050,-0.900,-13.640,214.910,67.200,96.200,13946.000,2019-06-20 13:22:59
000008,神州高铁,运输设备,北京,225.020,25.990,27.810,1051398.750,568977.310,30335.430,277197.840,...,1.440,19920507,168112.170,0.600,47.710,11.550,53.070,3.800,91600.000,2019-06-20 13:22:59
000009,中国宝安,综合类,深圳,45.940,21.200,21.490,3001209.500,2006542.250,404901.160,82464.930,...,2.300,19910625,205483.310,0.960,10.830,87.570,35.770,2.950,163306.000,2019-06-20 13:22:59
000010,*ST美丽,建筑施工,深圳,245.400,5.220,8.200,343369.470,284357.000,6932.720,149537.160,...,6.700,19951027,-190160.270,-2.320,146.490,106.190,28.990,1.440,45100.000,2019-06-20 13:22:59
000011,深物业A,区域地产,深圳,17.910,1.760,5.960,871487.380,769887.310,3291.280,11893.800,...,1.640,19920330,240368.050,4.030,-29.100,46.720,48.850,20.320,45610.000,2019-06-20 13:22:59


In [9]:
stock_basics.columns

Index(['name', 'industry', 'area', 'pe', 'outstanding', 'totals',
       'totalAssets', 'liquidAssets', 'fixedAssets', 'reserved',
       'reservedPerShare', 'esp', 'bvps', 'pb', 'timeToMarket', 'undp',
       'perundp', 'rev', 'profit', 'gpr', 'npr', 'holders', 'create_time'],
      dtype='object')

### industry

In [11]:
industry = stock_basics['industry'].value_counts()
industry.to_excel('industry_counts.xlsx', now[:10])
print(industry.shape)
industry

(110,)


软件服务    175
元器件     171
电气设备    170
化工原料    157
汽车配件    133
专用机械    130
通信设备    114
化学制药     95
建筑施工     88
中成药      74
机械基件     73
医疗保健     71
环境保护     67
互联网      65
区域地产     64
食品       62
电器仪表     60
服饰       56
半导体      52
影视音像     47
百货       46
塑料       45
农药化肥     45
生物制药     45
证券       43
家用电器     42
纺织       41
广告包装     39
仓储物流     38
家居用品     38
       ... 
纺织机械     11
超市连锁     11
红黄药酒     10
轻工机械     10
新型电力      9
电信运营      9
公共交通      9
汽车服务      9
农用机械      9
酒店餐饮      9
房产服务      9
特种钢       9
摩托车       8
批发业       8
空运        8
石油加工      8
渔业        8
船舶        8
公路        7
保险        7
石油贸易      7
啤酒        7
焦炭加工      7
软饮料       6
陶瓷        6
铁路        4
林业        4
机场        4
商品城       3
电器连锁      2
Name: industry, Length: 110, dtype: int64

'2019-06-20'

### area

In [12]:
area = stock_basics['area'].value_counts()
area.to_excel('area_counts.xlsx', now[:10])
print(area.shape)
area

(32,)


浙江     440
江苏     410
北京     325
广东     310
上海     289
深圳     287
山东     201
福建     133
四川     124
安徽     104
湖南     104
湖北     103
河南      79
辽宁      74
新疆      55
河北      55
天津      51
陕西      50
重庆      50
吉林      41
江西      41
广西      38
山西      37
黑龙江     37
云南      36
甘肃      33
海南      31
贵州      29
内蒙      25
西藏      18
宁夏      14
青海      12
Name: area, dtype: int64

### industry & area

In [13]:
table = stock_basics.pivot_table(index = ['industry'], values = ['name'], columns = ['area'], 
                     aggfunc = [len], fill_value=None, margins=True, dropna=True, margins_name='All')
table.to_excel('industry_area_counts.xlsx', now[:10])
table

Unnamed: 0_level_0,len,len,len,len,len,len,len,len,len,len,len,len,len,len,len,len,len,len,len,len,len
Unnamed: 0_level_1,name,name,name,name,name,name,name,name,name,name,name,name,name,name,name,name,name,name,name,name,name
area,上海,云南,内蒙,北京,吉林,四川,天津,宁夏,安徽,山东,...,甘肃,福建,西藏,贵州,辽宁,重庆,陕西,青海,黑龙江,All
industry,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
专用机械,11.000,,,7.000,,6.000,1.000,,5.000,4.000,...,1.000,4.000,,,7.000,,1.000,,,130
中成药,3.000,3.000,,2.000,5.000,2.000,3.000,,,3.000,...,3.000,1.000,3.000,4.000,,1.000,3.000,1.000,2.000,74
乳制品,2.000,,1.000,1.000,,1.000,,,,,...,1.000,,,,,,,,,12
互联网,4.000,,,12.000,,1.000,,,1.000,3.000,...,,6.000,,,2.000,1.000,,1.000,,65
仓储物流,6.000,,,3.000,,2.000,1.000,,,2.000,...,,1.000,,,1.000,,,,1.000,38
供气供热,1.000,,,1.000,3.000,2.000,,,1.000,2.000,...,,,,1.000,3.000,1.000,1.000,,,28
保险,1.000,,1.000,3.000,,,,,,,...,,,,,,,,,,7
元器件,3.000,,,7.000,,5.000,1.000,,3.000,3.000,...,,11.000,,2.000,1.000,,2.000,,1.000,171
全国地产,4.000,1.000,,6.000,,,,,,,...,,2.000,,,1.000,2.000,1.000,,,37
公共交通,5.000,,,1.000,,,,,,,...,,,,,,,,,,9


## 业绩报告（主表）：get_report_data
- 按年度、季度获取业绩报表数据。数据获取需要一定的时间，网速取决于您的网速，请耐心等待。
   - code,代码
   - name,名称
   - esp,每股收益
   - eps_yoy,每股收益同比(%)
   - bvps,每股净资产
   - roe,净资产收益率(%)
   - epcf,每股现金流量(元)
   - net_profits,净利润(万元)
   - profits_yoy,净利润同比(%)
   - distrib,分配方案
   - report_date,发布日期

In [33]:
#获取2018年第4季度的业绩报表数据
classified = ts.get_report_data(2019, 1) # year, quarter
print(classified.shape)
classified.head()

[Getting data:]#############################################################(3621, 11)


Unnamed: 0,code,name,eps,eps_yoy,bvps,roe,epcf,net_profits,profits_yoy,distrib,report_date
0,2956,西麦食品,0.8,21.21,,8.78,,4796.27,20.61,,06-03
1,300780,德恩精工,0.12,20.0,4.93,2.44,0.03,1308.84,17.98,,05-30
2,300782,卓胜微,0.56,72.98,,8.43,,4177.28,72.99,,05-27
3,603327,福蓉科技,0.19,50.35,1.98,9.41,0.14,6761.52,50.31,,05-22
4,2955,鸿合科技,0.17,-10.53,9.0,1.95,-3.22,1789.51,-10.16,,05-22


In [46]:
year_list = [2015, 2016, 2017, 2018]
quarter_list = [1, 2, 3, 4]

report_data = pd.DataFrame()
for year in year_list:
    for quarter in quarter_list:
        tmp_data = ts.get_report_data(year, quarter) # year, quarter
        tmp_data['year'] = year
        tmp_data['quarter'] = quarter
        report_data = pd.concat([report_data, tmp_data], axis = 0)
        
report_data.to_sql('report_data', engine, schema='stocks', if_exists='append',
                   # dtype={'code':VARCHAR(report_data.index.get_level_values('code').str.len().max())}
                  ) 

[Getting data:]#################################################[Getting data:]####################################################[Getting data:]####################################################[Getting data:]#############################################################[Getting data:]########################################################[Getting data:]##########################################################[Getting data:]##########################################################[Getting data:]#############################################################[Getting data:]###########################################################[Getting data:]############################################################[Getting data:]############################################################[Getting data:]#############################################################[Getting data:]#############################################################[Getting data:]##########################################

In [44]:
report_data_2019 = ts.get_report_data(2019, 1)
report_data_2019['year'] = 2019
report_data_2019['quarter'] = 1
report_data_2019.to_sql('report_data', engine, schema='stocks', if_exists='append',
                   # dtype={'code':VARCHAR(report_data_2019.index.get_level_values('code').str.len().max())}
                       ) 

In [45]:
report_data_2019.head()

Unnamed: 0,code,name,eps,eps_yoy,bvps,roe,epcf,net_profits,profits_yoy,distrib,report_date,year,quarter
0,2956,西麦食品,0.8,21.21,,8.78,,4796.27,20.61,,06-03,2019,1
1,300780,德恩精工,0.12,20.0,4.93,2.44,0.03,1308.84,17.98,,05-30,2019,1
2,300782,卓胜微,0.56,72.98,,8.43,,4177.28,72.99,,05-27,2019,1
3,600145,*ST新亿,-0.0,512.5,,-0.0,,-7.32,-93.57,,05-22,2019,1
4,603327,福蓉科技,0.19,50.35,1.98,9.41,0.14,6761.52,50.31,,05-22,2019,1


## 盈利能力：get_profit_data
- 按年度、季度获取盈利能力数据，结果返回的数据属性说明如下：
   - code,代码
   - name,名称
   - roe,净资产收益率(%)
   - net_profit_ratio,净利率(%)
   - gross_profit_rate,毛利率(%)
   - net_profits,净利润(万元)
   - esp,每股收益
   - business_income,营业收入(百万元)
   - bips,每股主营业务收入(元)

In [48]:
year_list = [2015, 2016, 2017, 2018]
quarter_list = [1, 2, 3, 4]

profit_data = pd.DataFrame()
for year in year_list:
    for quarter in quarter_list:
        tmp_data = ts.get_profit_data(year, quarter) # year, quarter
        tmp_data['year'] = year
        tmp_data['quarter'] = quarter
        profit_data = pd.concat([profit_data, tmp_data], axis = 0)
        
profit_data.to_sql('profit_data', engine, schema='stocks', if_exists='append',
                   # dtype={'code':VARCHAR(profit_data.index.get_level_values('code').str.len().max())}
                  ) 

[Getting data:]#################################################[Getting data:]####################################################[Getting data:]####################################################[Getting data:]#############################################################[Getting data:]########################################################[Getting data:]##########################################################[Getting data:]##########################################################[Getting data:]#############################################################[Getting data:]############################################################[Getting data:]############################################################[Getting data:]############################################################[Getting data:]#############################################################[Getting data:]#############################################################[Getting data:]#########################################

In [47]:
profit_data_2019 = ts.get_profit_data(2019, 1)
profit_data_2019['year'] = 2019
profit_data_2019['quarter'] = 1
profit_data_2019.to_sql('profit_data', engine, schema='stocks', if_exists='append',
                   # dtype={'code':VARCHAR(profit_data_2019.index.get_level_values('code').str.len().max())}
                       ) 

[Getting data:]#############################################################

## 营运能力：get_operation_data
- 按年度、季度获取营运能力数据，结果返回的数据属性说明如下：
   - code,代码
   - name,名称
   - arturnover,应收账款周转率(次)
   - arturndays,应收账款周转天数(天)
   - inventory_turnover,存货周转率(次)
   - inventory_days,存货周转天数(天)
   - currentasset_turnover,流动资产周转率(次)
   - currentasset_days,流动资产周转天数(天)

In [53]:
year_list = [2015, 2016, 2017, 2018]
quarter_list = [1, 2, 3, 4]

operation_data = pd.DataFrame()
for year in year_list:
    for quarter in quarter_list:
        tmp_data = ts.get_operation_data(year, quarter) # year, quarter
        tmp_data['year'] = year
        tmp_data['quarter'] = quarter
        operation_data = pd.concat([operation_data, tmp_data], axis = 0)
        
operation_data.to_sql('operation_data', engine, schema='stocks', if_exists='append',
                   # dtype={'code':VARCHAR(operation_data.index.get_level_values('code').str.len().max())}
                  ) 

[Getting data:]#################################################[Getting data:]####################################################[Getting data:]####################################################[Getting data:]#############################################################[Getting data:]########################################################[Getting data:]##########################################################[Getting data:]##########################################################[Getting data:]#############################################################[Getting data:]###########################################################[Getting data:]############################################################[Getting data:]############################################################[Getting data:]#############################################################[Getting data:]#############################################################[Getting data:]##########################################

In [49]:
operation_data_2019 = ts.get_operation_data(2019, 1)
operation_data_2019['year'] = 2019
operation_data_2019['quarter'] = 1
operation_data_2019.to_sql('operation_data', engine, schema='stocks', if_exists='append',
                   # dtype={'code':VARCHAR(operation_data_2019.index.get_level_values('code').str.len().max())}
                       ) 

[Getting data:]#############################################################

## 成长能力：get_growth_data
- 按年度、季度获取成长能力数据，结果返回的数据属性说明如下：
   - code,代码
   - name,名称
   - mbrg,主营业务收入增长率(%)
   - nprg,净利润增长率(%)
   - nav,净资产增长率
   - targ,总资产增长率
   - epsg,每股收益增长率
   - seg,股东权益增长率

In [54]:
year_list = [2015, 2016, 2017, 2018]
quarter_list = [1, 2, 3, 4]

growth_data = pd.DataFrame()
for year in year_list:
    for quarter in quarter_list:
        tmp_data = ts.get_growth_data(year, quarter) # year, quarter
        tmp_data['year'] = year
        tmp_data['quarter'] = quarter
        growth_data = pd.concat([growth_data, tmp_data], axis = 0)
        
growth_data.to_sql('growth_data', engine, schema='stocks', if_exists='append',
                   # dtype={'code':VARCHAR(growth_data.index.get_level_values('code').str.len().max())}
                  ) 

[Getting data:]##############################################[Getting data:]###############################################[Getting data:]###############################################[Getting data:]#############################################################[Getting data:]#################################################[Getting data:]###################################################[Getting data:]####################################################[Getting data:]#############################################################[Getting data:]#######################################################[Getting data:]##########################################################[Getting data:]##########################################################[Getting data:]#############################################################[Getting data:]###########################################################[Getting data:]############################################################[Getting data:]##########

In [50]:
growth_data_2019 = ts.get_growth_data(2019, 1)
growth_data_2019['year'] = 2019
growth_data_2019['quarter'] = 1
growth_data_2019.to_sql('growth_data', engine, schema='stocks', if_exists='append',
                   # dtype={'code':VARCHAR(growth_data_2019.index.get_level_values('code').str.len().max())}
                       ) 

[Getting data:]#############################################################

## 偿债能力：get_debtpaying_data
- 按年度、季度获取偿债能力数据，结果返回的数据属性说明如下：
   - code,代码
   - name,名称
   - currentratio,流动比率
   - quickratio,速动比率
   - cashratio,现金比率
   - icratio,利息支付倍数
   - sheqratio,股东权益比率
   - adratio,股东权益增长率

In [None]:
year_list = [2015, 2016, 2017, 2018]
quarter_list = [1, 2, 3, 4]

debtpaying_data = pd.DataFrame()
for year in year_list:
    for quarter in quarter_list:
        tmp_data = ts.get_debtpaying_data(year, quarter) # year, quarter
        tmp_data['year'] = year
        tmp_data['quarter'] = quarter
        debtpaying_data = pd.concat([debtpaying_data, tmp_data], axis = 0)
        
debtpaying_data.to_sql('debtpaying_data', engine, schema='stocks', if_exists='append',
                   # dtype={'code':VARCHAR(debtpaying_data.index.get_level_values('code').str.len().max())}
                  ) 

In [51]:
debtpaying_data_2019 = ts.get_debtpaying_data(2019, 1)
debtpaying_data_2019['year'] = 2019
debtpaying_data_2019['quarter'] = 1
debtpaying_data_2019.to_sql('debtpaying_data', engine, schema='stocks', if_exists='append',
                   # dtype={'code':VARCHAR(debtpaying_data_2019.index.get_level_values('code').str.len().max())}
                       ) 

[Getting data:]#############################################################

## 现金流量：get_cashflow_data
- 按年度、季度获取现金流量数据，结果返回的数据属性说明如下：
   - code,代码
   - name,名称
   - cf_sales,经营现金净流量对销售收入比率
   - rateofreturn,资产的经营现金流量回报率
   - cf_nm,经营现金净流量与净利润的比率
   - cf_liabilities,经营现金净流量对负债比率
   - cashflowratio,现金流量比率

In [None]:
year_list = [2015, 2016, 2017, 2018]
quarter_list = [1, 2, 3, 4]

cashflow_data = pd.DataFrame()
for year in year_list:
    for quarter in quarter_list:
        tmp_data = ts.get_cashflow_data(year, quarter) # year, quarter
        tmp_data['year'] = year
        tmp_data['quarter'] = quarter
        cashflow_data = pd.concat([cashflow_data, tmp_data], axis = 0)
        
cashflow_data.to_sql('cashflow_data', engine, schema='stocks', if_exists='append',
                   # dtype={'code':VARCHAR(cashflow_data.index.get_level_values('code').str.len().max())}
                  ) 

In [52]:
cashflow_data_2019 = ts.get_cashflow_data(2019, 1)
cashflow_data_2019['year'] = 2019
cashflow_data_2019['quarter'] = 1
cashflow_data_2019.to_sql('cashflow_data', engine, schema='stocks', if_exists='append',
                   # dtype={'code':VARCHAR(cashflow_data_2019.index.get_level_values('code').str.len().max())}
                       ) 

[Getting data:]#############################################################

# 股票分类 Classify stocks
    一、行业分类（sina财经）：get_industry_classified
    二、概念分类：get_concept_classified
    三、地域分类：get_area_classified
    四、中小板分类：get_sme_classified
    五、创业板分类：get_gem_classified
    六、风险警示板分类：get_st_classified
    七、沪深300成份及权重：get_hs300s
    八、上证50成份股：get_sz50s
    九、中证500成份股：get_zz500s
    十、终止上市股票列表：get_terminated
    十一、暂停上市股票列表：get_suspended

In [27]:
# list name
# classified = ts.get_industry_classified()
classified = ts.get_concept_classified()
# classified = ts.get_area_classified()
print(classified.shape)
print(classified.head())
print()

# list
classified = ts.get_sme_classified()
# classified = ts.get_gem_classified()
# classified = ts.get_st_classified()
# classified = ts.get_hs300s()
# classified = ts.get_sz50s()
# classified = ts.get_zz500s()
# classified = ts.get_terminated()
# classified = ts.get_suspended()

print(classified.shape)
classified.head()

(9442, 3)
     code  name c_name
0  600007  中国国贸   外资背景
1  600114  东睦股份   外资背景
2  600132  重庆啤酒   外资背景
3  600182   S佳通   外资背景
4  600595  中孚实业   外资背景

(934, 2)


Unnamed: 0,code,name
0,2001,新 和 成
1,2002,鸿达兴业
2,2003,伟星股份
3,2004,华邦健康
4,2005,*ST德豪


In [None]:
industry_classified = ts.get_industry_classified()
industry_classified.rename(columns={'c_name': 'industry_name'}, inplace=True) 
print(industry_classified.shape)
print(industry_classified.head())

industry = industry_classified['industry_name'].value_counts()
print(industry.shape)
industry

In [None]:
concept_classified = ts.get_concept_classified()
concept_classified.rename(columns={'c_name': 'concept_name'}, inplace=True) 
print(concept_classified.shape)
print(concept_classified.head())

concept = concept_classified['concept_name'].value_counts()
print(concept.shape)
concept

In [None]:
area_classified = ts.get_area_classified()
area_classified.rename(columns={'c_name': 'area_name'}, inplace=True) 
print(area_classified.shape)
print(area_classified.head())

area = area_classified['area_name'].value_counts()
print(area.shape)
area

In [None]:
industry_concept = pd.merge(industry_classified, concept_classified, on = 'code', how = 'outer')
print(industry_concept.shape)
industry_concept_area = pd.merge(industry_concept, area_classified, on = 'code', how = 'outer')
print(industry_concept_area.shape)
industry_concept_area.head()

# 投资参考 Investment Reference
    一、分配预案：profit_data
    二、业绩预告：forecast_data
    三、限售股解禁：xsg_data
    四、基金持股：fund_holdings
    五、新股数据：new_stocks
    六、融资融券（沪市）：sh_margins、sh_margin_details
        沪市融资融券汇总数据
        沪市融资融券明细数据
    七、融资融券（深市）
        深市融资融券汇总数据：sz_margins
        深市融资融券明细数据：sz_margin_details

# 宏观经济 Macroeconomic Information
    一、存款利率：get_deposit_rate
    二、贷款利率：get_loan_rate
    三、存款准备金率：get_rrr
    四、货币供应量：get_money_supply
    五、货币供应量(年底余额)：get_money_supply_bal
    六、国内生产总值(年度)：get_gdp_year
    七、国内生产总值(季度)：get_gdp_quarter
    八、三大需求对GDP贡献：get_gdp_for
    九、三大产业对GDP拉动：get_gdp_pull
    十、三大产业贡献率：get_gdp_contrib
    十一、居民消费价格指数：get_cpi
    十二、工业品出厂价格指数：get_ppi

# 龙虎榜数据 
    一、每日龙虎榜列表：top_list
    二、个股上榜统计：cap_tops
    三、营业部上榜统计：broker_tops
    四、机构席位追踪：inst_tops
    五、机构成交明细：inst_detail
    
- 沪深交易龙虎榜指每日两市中涨跌幅、换手率等由大到小的排名榜单，并从中可以看到龙虎榜单中的股票在哪个证券营业部的成交量较大。该数据有助于了解当日异动个股的资金进出情况，判断是游资所为还是机构所为。

# 股票黑名单 Black List

In [15]:
black_list = pd.read_excel('data/Black_list.xlsx')
print(black_list.shape)
black_list.sort_values(by = 'code')

(0, 3)


Unnamed: 0,code,name,reason


In [20]:
black_list.to_sql('black_list', engine, schema='stocks', if_exists='replace',
                    dtype={'code':VARCHAR(black_list.index.get_level_values('code').str.len().max())}) 

KeyError: 'Level code must be same as name (None)'