In [24]:
from data_tools.api import *
import pymysql
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from joblib import Parallel,delayed
import datetime

DB_INFO = dict(host='192.168.1.234',
               user='winduser',
               password='1qaz@WSX',
               db='wind')

conn = pymysql.connect(**DB_INFO, charset='utf8mb4', cursorclass=pymysql.cursors.DictCursor)

TODAY_DATE=datetime.datetime.now().date()

| 二级分类   | 简称             | 因子构建方式                                                                                                | 财报数据类型 | 是否哑变量 |
|--------|----------------|-------------------------------------------------------------------------------------------------------|--------|-------|
| 关联交易   | risk\_关联进货销货   | （向关联方采购产品金额合计\+向关联方销售产品金额合计）/净资产绝对值                                                                   | \-     | \-    |
| 关联交易   | risk\_关联融资余额   | 向关联方提供资金余额/净资产绝对值                                                                                     | \-     | \-    |
| 关联交易   | risk\_关联并购     | 3年内关联并购总金额/过去3年平均总资产                                                                                  | \-     | \-    |
| 关联交易   | risk\_关联交易次数   | 1年内公司关联交易记录次数                                                                                         | \-     | \-    |
| 分析师预期  | risk\_评级周下调    | 近一周分析师评级下调                                                                                            | \-     | \-    |
| 分析师预期  | risk\_目标价周下调   | 近一周分析师目标价下调                                                                                           | \-     | \-    |
| 分析师预期  | risk\_预期EPS周下调 | 近一周分析师预期每股收益下调（最近预测年度FY1）                                                                             | \-     | \-    |
| 减持     | risk\_大股东减持    | 近3个月大股东减持比例，减持比例=减持股票数/总股本                                                                            | \-     | \-    |
| 减持     | risk\_高管减持     | 近3个月高管减持比例，减持比例=减持股票数/总股本                                                                             | \-     | \-    |
| 重组失败   | risk\_重组失败     | 近6个月是否有重大资产重组失败记录，有为1分，无为0分                                                                           | \-     | 哑变量   |
| 限售股解禁  | risk\_限售股解禁    | 未来1个月内限售股解禁比例，解禁比例=解禁股票数/总股本                                                                          | \-     | \-    |
| 债券负面   | risk\_债券违约     | 近2年内是否有债券违约，有为1分，无为0分（担保违约、担保人代偿、兑付风险警示、未按时兑付本金...） | \-     | 哑变量   |
| 负面公告   | risk\_问询函      | 近一个月是否收到问询函，有为1分，无为0分                                                                                 | \-     | 哑变量   |
| 预快报绩效  | risk\_业绩预亏     | 关于下一期业绩的业绩预告是否显示了公司净利润为负，是为1分，不是为0分                                                                   | \-     | 哑变量   |
| 定期报告绩效 | risk\_审计意见     | 财报审计意见是否为标准无保留意见，不是记为1分，是记为0分                                                                         | \-     | 哑变量   |
| 定期报告绩效 | risk\_政府补助     | 最新一期政府补助比净利润                                                                                          | 最新     | \-    |
| 预快报绩效  | risk\_业绩预告大幅下滑 | 关于下一期业绩的业绩预告是否显示了公司未来及利润将大幅下滑（50%以上），上限与下限的平均，是为1分，不是为0分                                              | \-     | 哑变量   |
| 定期报告绩效 | risk\_业绩实亏     | 最新一期净利润是否为负，是为1分，不是为0分                                                                                | 最新     | 哑变量   |


In [None]:
#日期转化为str
def str_my(s):
    return str(s).replace("-","")

#公司代码和股票代码对照表
sql="SELECT S_INFO_WINDCODE,S_INFO_COMPCODE from WINDCUSTOMCODE where S_INFO_SECURITIESTYPES='A'"
df_codemap=pd.read_sql_query(sql,conn)

## 关联交易

In [325]:
def Get_Related_Trade_Info():
    ###输出为一个元组：（一年内关联交易购销信息的 DataFrame ，一年内关联交易融资信息的 DataFrame， 三年内关联交易并购信息的 DataFrame， 一年内关联交易次数的 DataFrame）
    
    #公司近三年资产平均值
    sql="""select S_INFO_WINDCODE,REPORT_PERIOD,TOT_ASSETS from ASHAREBALANCESHEET where STATEMENT_TYPE ='408004000'and REPORT_PERIOD>='20161231'"""
    df_Total_Asset=pd.read_sql(sql,conn)
    df_Total_Asset=df_Total_Asset.sort_values(["S_INFO_WINDCODE","REPORT_PERIOD"])
    df_Total_Asset["is_major_sec"]=df_Total_Asset["S_INFO_WINDCODE"].apply(lambda x:((x[0] in ['0','3','6'])&(len(x)==9)))
    df_Total_Asset=df_Total_Asset[df_Total_Asset["is_major_sec"]]
    Three_year_avg_asset=df_Total_Asset.set_index(["S_INFO_WINDCODE","REPORT_PERIOD"]).unstack(level=1)["TOT_ASSETS"].mean(axis=1).to_frame("data")

    #关联交易销售商品、购买商品；
    sql="""select S_INFO_WINDCODE,ANN_DT,S_RELATEDTRADE_TRADETYPE,CRNCY_CODE,S_RELATEDTRADE_AMOUNT from ASHARERALATEDTRADE where ANN_DT>={}""".format(str_my(TODAY_DATE-datetime.timedelta(days=360)))
    df_Related_Trade=pd.read_sql(sql,conn)
    df_Related_Trade["S_RELATEDTRADE_AMOUNT"]=df_Related_Trade["S_RELATEDTRADE_AMOUNT"].fillna(0).apply(float)
    type_sell_buy=['销售商品', '购买商品', '向关联方销售产品和提供劳务', '向关联方采购产品和接受劳务', '采购', '销售',
                   '提供服务','接受服务','购买','购销','出售','销售,提供劳务','采购货物,接受劳务','购销商品','销售,提供']
    df_Related_sellbuy=df_Related_Trade[df_Related_Trade.S_RELATEDTRADE_TRADETYPE.isin(type_sell_buy)]
    df_Related_sellbuy["is_major_sec"]=df_Related_sellbuy["S_INFO_WINDCODE"].apply(lambda x:((x[0] in ['0','3','6'])&(len(x)==9)))
    df_Related_sellbuy=df_Related_sellbuy[df_Related_sellbuy["is_major_sec"]]
    df_Related_sellbuy_sum=df_Related_sellbuy["S_RELATEDTRADE_AMOUNT"].groupby(df_Related_sellbuy["S_INFO_WINDCODE"]).apply(np.sum).to_frame("data")
    df_Related_sellbuy_to_asset=df_Related_sellbuy_sum/Three_year_avg_asset

    #关联交易融资
    sql="""select S_INFO_COMPCODE,ANN_DT,REPORT_PERIOD,ASSOCIATED_FUNDING_BALANCE from ASHARERELATEDCLAIMSDEBTS where ANN_DT>={}""".format(str_my(TODAY_DATE-datetime.timedelta(days=360)))
    df_Related_debts=pd.read_sql(sql,conn)
    df_Related_debts["ASSOCIATED_FUNDING_BALANCE"]=df_Related_debts["ASSOCIATED_FUNDING_BALANCE"].fillna(0)*10000
    df_Related_debts=pd.merge(df_Related_debts,df_codemap,on='S_INFO_COMPCODE')
    df_Related_debts["is_major_sec"]=df_Related_debts["S_INFO_WINDCODE"].apply(lambda x:((x[0] in ['0','3','6'])&(len(x)==9)))
    df_Related_debts=df_Related_debts[df_Related_debts["is_major_sec"]]
    df_Related_debts_sum=df_Related_debts["ASSOCIATED_FUNDING_BALANCE"].groupby(df_Related_debts["S_INFO_WINDCODE"]).apply(np.sum).to_frame("data")
    df_Related_debts_to_asset=df_Related_debts_sum/Three_year_avg_asset

    #关联交易并购
    sql="""select EVENT_ID,ANN_DATE,IS_RELATED_PARTY_TRANSAC,CRNCY_CODE,TRADE_VALUE from MERGEREVENT where ANN_DATE>={}""".format(str_my(TODAY_DATE-datetime.timedelta(days=1080)))
    df_Merge_amount=pd.read_sql(sql,conn)
    df_Merge_amount=df_Merge_amount[df_Merge_amount["IS_RELATED_PARTY_TRANSAC"]==1]
    sql="""select EVENT_ID,S_INFO_WINDCODE,RELATIONSHIP,PARTY_TYPE_CODE,PARTY_ROLE_CODE from MERGERPARTICIPANT"""
    df_Merge_Participant=pd.read_sql(sql,conn)
    df_Merge_Participant=df_Merge_Participant[pd.notnull(df_Merge_Participant["S_INFO_WINDCODE"])]
    df_Merge_amount=pd.merge(df_Merge_amount,df_Merge_Participant,on="EVENT_ID",how="right")
    df_Merge_amount["TRADE_VALUE"]=df_Merge_amount["TRADE_VALUE"].fillna(0)*10000
    df_Merge_amount["is_major_sec"]=df_Merge_amount["S_INFO_WINDCODE"].apply(lambda x:((x[0] in ['0','3','6'])&(len(x)==9)))
    df_Merge_amount=df_Merge_amount[df_Merge_amount["is_major_sec"]]
    df_Merge_amount_sum=df_Merge_amount["TRADE_VALUE"].groupby(df_Merge_amount["S_INFO_WINDCODE"]).apply(np.sum).to_frame("data")
    df_Merge_amount_to_asset=df_Merge_amount_sum/Three_year_avg_asset


    #关联交易次数
    df_Related_Trade["is_major_sec"]=df_Related_Trade["S_INFO_WINDCODE"].apply(lambda x:((x[0] in ['0','3','6'])&(len(x)==9)))
    df_Related_Trade=df_Related_Trade[df_Related_Trade["is_major_sec"]]
    df_Related_trade_times=df_Related_Trade.groupby("S_INFO_WINDCODE").apply(len)
    
    return df_Related_sellbuy_to_asset,df_Related_debts_to_asset,df_Merge_amount_to_asset,df_Related_trade_times

In [326]:
Get_Related_Trade_Info()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


(                     data
 S_INFO_WINDCODE          
 000001.SZ             NaN
 000002.SZ             NaN
 000003.SZ             NaN
 000004.SZ             NaN
 000005.SZ             NaN
 000006.SZ             NaN
 000007.SZ             NaN
 000008.SZ        0.000015
 000009.SZ             NaN
 000010.SZ             NaN
 000011.SZ             NaN
 000012.SZ             NaN
 000014.SZ             NaN
 000015.SZ             NaN
 000016.SZ        0.003247
 000017.SZ             NaN
 000018.SZ             NaN
 000019.SZ             NaN
 000020.SZ        0.880573
 000021.SZ        0.004280
 000023.SZ        0.029676
 000025.SZ        0.010900
 000026.SZ             NaN
 000027.SZ             NaN
 000028.SZ        0.000000
 000029.SZ             NaN
 000030.SZ        0.861984
 000031.SZ        0.000000
 000032.SZ        0.055432
 000034.SZ        0.046621
 ...                   ...
 603966.SH        0.001706
 603968.SH        0.062345
 603969.SH        0.113006
 603970.SH             NaN
 

## 分析师预期

In [291]:
def Get_Analyst_Expect_Info():
    ###输出为一个元组： （ 分析师一致预期目标价一周同比 DataFrame， 分析师评级一周同比 DataFrame， 分析师一致预期每股收益同比 DataFrame）
    
    #分析师评级（绝对数值）、目标价下调（比例）、一致预期利润下调(比例)
    sql="""select S_INFO_WINDCODE,RATING_DT,S_WRATING_AVG,S_EST_PRICE from ASHARESTOCKRATINGCONSUS where RATING_DT>={}""".format(str_my(TODAY_DATE-datetime.timedelta(days=7)))
    df_stock_rating=pd.read_sql(sql,conn)
    df_stock_rating.sort_values("RATING_DT",inplace=True)

    sql="""select S_INFO_WINDCODE,EST_DT,EST_EPS from ASHARECONSENSUSROLLINGDATA where  ROLLING_TYPE='FY1' and EST_DT>={}""".format(str_my(TODAY_DATE-datetime.timedelta(days=7)))
    df_con_eps=pd.read_sql(sql,conn)
    df_con_eps.sort_values("EST_DT",inplace=True)


    df_1_week_rating_decre=df_stock_rating.groupby("S_INFO_WINDCODE").apply(lambda x:x["S_WRATING_AVG"].iloc[-1]-x["S_WRATING_AVG"].iloc[0])
    df_1_week_objprice_decre=df_stock_rating.groupby("S_INFO_WINDCODE").apply(lambda x:x["S_EST_PRICE"].iloc[-1]/x["S_EST_PRICE"].iloc[0]-1)
    df_1_week_eps_decre=df_con_eps.groupby("S_INFO_WINDCODE").apply(lambda x:x["EST_EPS"].iloc[-1]/x["EST_EPS"].iloc[0]-1)
    
    return df_1_week_objprice_decre,df_1_week_rating_decre,df_1_week_eps_decre

In [303]:
Get_Analyst_Expect_Info()

(S_INFO_WINDCODE
 000001.SZ   -0.040774
 000002.SZ    0.014047
 000008.SZ         NaN
 000012.SZ         NaN
 000026.SZ         NaN
 000027.SZ         NaN
 000028.SZ    0.000000
 000034.SZ    0.000000
 000035.SZ         NaN
 000043.SZ    0.008045
 000050.SZ    0.000000
 000059.SZ         NaN
 000062.SZ         NaN
 000063.SZ   -0.000588
 000089.SZ   -0.070881
 000157.SZ   -0.012239
 000338.SZ    0.000000
 000400.SZ    0.010733
 000401.SZ    0.150342
 000402.SZ    0.000000
 000403.SZ    0.000000
 000423.SZ    0.000000
 000488.SZ    0.005405
 000498.SZ    0.000000
 000513.SZ    0.112447
 000524.SZ    0.000000
 000525.SZ         NaN
 000546.SZ    0.179866
 000547.SZ    0.000000
 000550.SZ         NaN
                ...   
 603786.SH    0.000000
 603788.SH   -0.023184
 603799.SH    0.000000
 603806.SH    0.000000
 603808.SH    0.000000
 603818.SH    0.000000
 603826.SH    0.059447
 603848.SH   -0.129222
 603859.SH    0.000000
 603866.SH    0.000000
 603868.SH    0.000000
 603877.SH   -0.0

## 减持

In [292]:
def Get_Holder_Plan_Trade_Info():
    ### 输出为一个元组： （3月内高管减持比例 DataFrame， 3月内大股东减持比例 DataFrame）
    
    #大股东、高管减持（比例）
    sql="""select S_INFO_WINDCODE,ANN_DT,HOLDER_TYPE,HOLD_PROPORTION,PLAN_TRANSACT_MAX_RATIO,PLAN_TRANSACT_MIN_RATIO from ASAREPLANTRADE where ANN_DT>={}""".format(str_my(TODAY_DATE-datetime.timedelta(days=90)))
    df_holder_plan_trade=pd.read_sql(sql,conn)

    Manager_holder_plan_trade=df_holder_plan_trade[df_holder_plan_trade["HOLDER_TYPE"]=='3']
    Important_holder_plan_trade=df_holder_plan_trade[df_holder_plan_trade["HOLD_PROPORTION"]>=1]#大股东为占比1%以上的股东

    Manager_holder_plan_trade=Manager_holder_plan_trade.groupby("S_INFO_WINDCODE").apply(lambda x:np.sum(x["PLAN_TRANSACT_MAX_RATIO"]))
    Important_holder_plan_trade=Important_holder_plan_trade.groupby("S_INFO_WINDCODE").apply(lambda x:np.sum(x["PLAN_TRANSACT_MAX_RATIO"]))
    
    return Manager_holder_plan_trade,Important_holder_plan_trade

In [304]:
Get_Holder_Plan_Trade_Info()

(S_INFO_WINDCODE
 000034.SZ    2.0000
 000069.SZ    0.0100
 000070.SZ    0.0037
 000158.SZ    0.4263
 000595.SZ    0.0060
 000676.SZ    0.7500
 000810.SZ    0.0200
 000921.SZ    0.0150
 000925.SZ    0.4000
 000998.SZ    0.1500
 002007.SZ    0.1572
 002009.SZ    1.0130
 002017.SZ    0.2013
 002025.SZ    0.0028
 002051.SZ    0.0250
 002062.SZ    0.0270
 002081.SZ    0.0064
 002095.SZ    0.0200
 002106.SZ    0.0400
 002107.SZ    0.0300
 002119.SZ    0.6900
 002132.SZ    0.0139
 002154.SZ    0.1757
 002157.SZ    0.2911
 002213.SZ    3.0000
 002217.SZ    2.6300
 002234.SZ    6.0000
 002235.SZ    3.8609
 002241.SZ    2.8500
 002280.SZ    0.0316
               ...  
 603566.SH    1.5860
 603579.SH    0.1600
 603583.SH    3.0300
 603586.SH    0.9800
 603602.SH    0.0832
 603608.SH    0.0200
 603609.SH    1.1083
 603639.SH    0.0433
 603657.SH    0.0323
 603683.SH    0.4342
 603686.SH    1.9247
 603690.SH    0.7948
 603711.SH    0.0600
 603725.SH    0.1850
 603738.SH    0.5600
 603757.SH    0.5

## 重组失败

In [293]:
def Get_Merge_Failure():
    ###返回一个DataFrame，上市公司过去六月是否有重组失败情况
    
    
    #重组失败
    sql="""select EVENT_ID,UPDATE_DATE,IS_MAJORASSETRESTRUCTURE，PROGRESS_CODE from MERGEREVENT where UPDATE_DATE>={}""".format(str_my(TODAY_DATE-datetime.timedelta(days=180)))
    df_Merge_Event=pd.read_sql(sql,conn)
    
    sql="""select EVENT_ID,S_INFO_WINDCODE,RELATIONSHIP,PARTY_TYPE_CODE,PARTY_ROLE_CODE from MERGERPARTICIPANT"""
    df_Merge_Participant=pd.read_sql(sql,conn)
    df_Merge_Participant=df_Merge_Participant[pd.notnull(df_Merge_Participant["S_INFO_WINDCODE"])]
    
    df_Merge_Event=pd.merge(df_Merge_Event,df_Merge_Participant,on="EVENT_ID",how="right")
    df_Merge_Event["is_merge_failure"]=df_Merge_Event["PROGRESS_CODE"].apply(lambda x:x in [324005000,324005001,324005002,324005003,324005004,324005005])
    
    df_Merge_Event=df_Merge_Event[df_Merge_Event["IS_MAJORASSETRESTRUCTURE"]==1]
    df_Merge_Event["is_major_sec"]=df_Merge_Event["S_INFO_WINDCODE"].apply(lambda x:((x[0] in ['0','3','6'])&(len(x)==9)))
    df_Merge_Event=df_Merge_Event[df_Merge_Event["is_major_sec"]]
    df_Merge_Event.sort_values("S_INFO_WINDCODE",inplace=True)
    
    df_Merge_Event=df_Merge_Event.groupby("S_INFO_WINDCODE").apply(lambda x:(x["is_merge_failure"].sum()>0))
    
    return df_Merge_Event

## 负面公告：问询函、债券违约

In [330]:
def Get_Major_Event_Info():   
    ###返回一个元组： （ 公司近1月是否收到问询函 DataFrame， 公司近两年是否发生债券违约 DataFrame）
    
    
    #问询函
    sql="""select S_INFO_WINDCODE,S_EVENT_ANNCEDATE,S_EVENT_CONTENT,S_EVENT_HAPDATE,S_EVENT_CATEGORYCODE from ASHAREMAJOREVENT where S_EVENT_ANNCEDATE>={}""".format(str_my(TODAY_DATE-datetime.timedelta(days=30)))
    df_major_event_1=pd.read_sql(sql,conn)
    def fun(x):
        try:
            return '问询函' in x
        except TypeError:
            return False
    df_major_event_1["receive_queryletter"]=df_major_event_1["S_EVENT_CONTENT"].apply(fun)
    df_major_event_1=df_major_event_1.groupby("S_INFO_WINDCODE").apply(lambda x:(x["receive_queryletter"].sum()>0))

    #债券违约
    sql="""select S_INFO_WINDCODE,S_EVENT_ANNCEDATE,S_EVENT_CONTENT,S_EVENT_HAPDATE,S_EVENT_CATEGORYCODE from ASHAREMAJOREVENT where S_EVENT_ANNCEDATE>={}""".format(str_my(TODAY_DATE-datetime.timedelta(days=720)))
    df_major_event_2=pd.read_sql(sql,conn)
    def fun(x):
        try:
            default_event_ls=['担保违约','担保人代偿','兑付风险警示','未按时兑付本金','未按时兑付利息','未按时兑付本息',\
                              '未按时兑付回售款', '未按时兑付回售款和利息', '提前到期未兑付','技术性违约']
            for default_event in default_event_ls:
                if default_event in x:
                    return True
            return '债券' in x and '违约' in x
        except TypeError:
            return False
    df_major_event_2["bond_default"]=df_major_event_2["S_EVENT_CONTENT"].apply(fun)
    df_major_event_2=df_major_event_2.groupby("S_INFO_WINDCODE").apply(lambda x:(x["bond_default"].sum()>0))
    
    return df_major_event_1,df_major_event_2

In [333]:
Get_Major_Event_Info()

(S_INFO_WINDCODE
 000001.SZ    False
 000002.SZ    False
 000003.SZ    False
 000004.SZ    False
 000005.SZ    False
 000006.SZ    False
 000007.SZ    False
 000008.SZ    False
 000010.SZ    False
 000011.SZ    False
 000012.SZ    False
 000014.SZ    False
 000016.SZ    False
 000017.SZ    False
 000018.SZ    False
 000019.SZ    False
 000020.SZ    False
 000021.SZ    False
 000023.SZ    False
 000025.SZ    False
 000026.SZ    False
 000027.SZ    False
 000029.SZ    False
 000030.SZ    False
 000031.SZ    False
 000033.SZ    False
 000034.SZ    False
 000035.SZ    False
 000036.SZ    False
 000038.SZ    False
              ...  
 688108.SH    False
 688111.SH    False
 688116.SH    False
 688118.SH    False
 688122.SH    False
 688128.SH    False
 688138.SH    False
 688166.SH    False
 688168.SH    False
 688188.SH    False
 688196.SH    False
 688198.SH    False
 688199.SH    False
 688202.SH    False
 688218.SH    False
 688258.SH    False
 688288.SH    False
 688299.SH    False
 68

## 限售解禁

In [20]:
def Get_Share_Float_Info():
    ### 返回一个DataFrame ，公司未来一月股份解禁流通比例
    
    
    #限售股解禁（比例）
    sql="""select S_INFO_WINDCODE,S_INFO_LISTDATE,S_SHARE_RATIO from ASHARECOMPRESTRICTED where S_INFO_LISTDATE between {} and {}""".format(str_my(TODAY_DATE),str_my(TODAY_DATE+datetime.timedelta(days=30)))
    df_free_float_ratio=pd.read_sql(sql,conn)
    
    df_free_float_ratio=df_free_float_ratio.groupby("S_INFO_WINDCODE").apply(lambda x:np.sum(x["S_SHARE_RATIO"]))
    
    return df_free_float_ratio

In [21]:
Get_Share_Float_Info()

S_INFO_WINDCODE
000065.SZ    18.2665
000426.SZ    29.2863
000547.SZ     1.1213
000716.SZ     5.4587
000733.SZ     8.8312
000793.SZ     1.0727
000813.SZ    10.1489
000851.SZ     1.2745
000892.SZ    17.9443
000928.SZ     8.0343
000990.SZ    69.0600
002109.SZ    48.2415
002120.SZ    74.1205
002176.SZ    13.8979
002230.SZ     0.3117
002251.SZ     1.9659
002331.SZ     5.8274
002341.SZ    22.5264
002426.SZ    14.9687
002434.SZ    12.2918
002468.SZ    78.3267
002542.SZ     2.9073
002568.SZ     1.1543
002579.SZ     5.2312
002636.SZ    65.4750
002638.SZ    35.0158
002642.SZ     3.9659
002675.SZ     0.2296
002759.SZ    42.2011
002822.SZ    43.5470
              ...   
600959.SH    11.4475
600996.SH    70.7994
601000.SH     5.9414
601138.SH     1.1590
601899.SH     6.2782
603033.SH    32.9652
603036.SH    74.0272
603058.SH    46.1718
603098.SH    84.3700
603131.SH     0.8103
603168.SH     7.8272
603187.SH    49.4768
603298.SH    72.4379
603319.SH    30.0393
603323.SH    26.8650
603389.SH    71.24

##  非标审计意见

In [308]:
def Get_Audit_Opinion_Info():
    ###返回一个DataFrame， 公司近一年是否出现非标审计意见
    
    
    #审计意见（进一年发布的财报是否出现非标审计意见）
    sql="""select S_INFO_WINDCODE,ANN_DT,REPORT_PERIOD,S_STMNOTE_AUDIT_CATEGORY from ASHAREAUDITOPINION where ANN_DT>='20190101'"""
    df_audit_opinion=pd.read_sql(sql,conn)

    df_audit_opinion["nonstd_audit_opinion"]=(df_audit_opinion["S_STMNOTE_AUDIT_CATEGORY"]!=405001000)
    df_audit_opinion=df_audit_opinion.set_index("S_INFO_WINDCODE").sort_index()
    
    return df_audit_opinion["nonstd_audit_opinion"]

In [309]:
Get_Audit_Opinion_Info()

S_INFO_WINDCODE
000001.SZ    False
000002.SZ    False
000003.SZ    False
000004.SZ    False
000005.SZ     True
000006.SZ    False
000007.SZ     True
000008.SZ    False
000009.SZ    False
000010.SZ     True
000011.SZ    False
000012.SZ    False
000014.SZ    False
000016.SZ    False
000017.SZ     True
000018.SZ     True
000019.SZ    False
000020.SZ    False
000021.SZ    False
000023.SZ    False
000025.SZ    False
000026.SZ    False
000027.SZ    False
000028.SZ    False
000029.SZ    False
000030.SZ    False
000031.SZ    False
000031.SZ    False
000032.SZ    False
000034.SZ    False
             ...  
688358.SH    False
688358.SH    False
688358.SH    False
688363.SH    False
688363.SH    False
688363.SH    False
688363.SH    False
688366.SH    False
688366.SH    False
688366.SH    False
688366.SH    False
688368.SH    False
688368.SH    False
688368.SH    False
688368.SH    False
688369.SH    False
688369.SH    False
688369.SH    False
688369.SH    False
688388.SH    False
688388.SH    Fa

## 业绩变脸、净利润大幅下降

In [310]:
def Get_Profit_Notice_Info():  
    ###返回一个 DataFrame， 公司最近财报是否发生业绩变脸以及是否净利润下滑50% 以上
    
    
    #业绩预亏、业绩预告大幅下滑（最近一期）
    sql = """SELECT S_INFO_WINDCODE, S_PROFITNOTICE_DATE, S_PROFITNOTICE_PERIOD,S_PROFITNOTICE_SIGNCHANGE,S_PROFITNOTICE_CHANGEMIN,S_PROFITNOTICE_CHANGEMAX
    from ASHAREPROFITNOTICE where S_PROFITNOTICE_DATE>='20190101'"""

    df_ProfitNotice = pd.read_sql_query(sql, conn)

    df_ProfitNotice.sort_values(["S_INFO_WINDCODE","S_PROFITNOTICE_PERIOD"],inplace=True)
    df_ProfitNotice=df_ProfitNotice.groupby("S_INFO_WINDCODE").apply(lambda x:x.iloc[-1])
    df_ProfitNotice["is_profit_signchange"]=(df_ProfitNotice["S_PROFITNOTICE_SIGNCHANGE"]=='1')
    df_ProfitNotice["is_profit_collapse"]=((df_ProfitNotice["S_PROFITNOTICE_CHANGEMIN"]+df_ProfitNotice["S_PROFITNOTICE_CHANGEMAX"])<=-50)
    
    return df_ProfitNotice[["S_INFO_WINDCODE","is_profit_signchange","is_profit_collapse"]]

In [311]:
Get_Profit_Notice_Info()

Unnamed: 0_level_0,S_INFO_WINDCODE,is_profit_signchange,is_profit_collapse
S_INFO_WINDCODE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
000004.SZ,000004.SZ,False,True
000005.SZ,000005.SZ,False,False
000006.SZ,000006.SZ,False,True
000007.SZ,000007.SZ,False,True
000008.SZ,000008.SZ,False,True
000009.SZ,000009.SZ,False,False
000010.SZ,000010.SZ,False,False
000014.SZ,000014.SZ,False,True
000016.SZ,000016.SZ,False,False
000017.SZ,000017.SZ,False,True


## 政府补助占比

In [312]:
def Get_Gov_Grant_Info():
    ###返回一个DataFrame， 公司上期财报政府补助占净利润比例
    
    
    
    #政府补助（最近一期）
    sql = """SELECT S_INFO_COMPCODE, REPORT_PERIOD, AMOUNT_CURRENT_ISSUE,AMOUNT_PREVIOUS_PERIOD
            from ASHAREGOVERNMENTGRANTS where REPORT_PERIOD >='20190101'"""

    df_gov_grants = pd.read_sql_query(sql, conn)
    df_gov_grants=pd.merge(df_gov_grants,df_codemap,on="S_INFO_COMPCODE",how='left')
    df_gov_grants.sort_values(["S_INFO_WINDCODE","REPORT_PERIOD"],inplace=True)
    df_gov_grants=df_gov_grants.groupby("S_INFO_WINDCODE").apply(lambda x:x.iloc[-1])

    sql = """SELECT S_INFO_WINDCODE, ANN_DT, REPORT_PERIOD,STATEMENT_TYPE,NET_PROFIT_INCL_MIN_INT_INC
        from ASHAREINCOME where REPORT_PERIOD > '20190101' and STATEMENT_TYPE='408001000'"""
    df_FA_EPS = pd.read_sql_query(sql, conn)
    df_FA_EPS.sort_values(["S_INFO_WINDCODE","REPORT_PERIOD"],inplace=True)
    df_FA_EPS=df_FA_EPS.groupby("S_INFO_WINDCODE").apply(lambda x:x.iloc[-1])

    df_gov_grants=pd.merge(df_gov_grants,df_FA_EPS,on='S_INFO_WINDCODE',how="left")
    df_gov_grants["grants_ratio"]=df_gov_grants["AMOUNT_CURRENT_ISSUE"]/df_gov_grants["NET_PROFIT_INCL_MIN_INT_INC"]
    
    return df_gov_grants[["S_INFO_WINDCODE","grants_ratio"]]

In [313]:
Get_Gov_Grant_Info()

Unnamed: 0,S_INFO_WINDCODE,grants_ratio
0,000005.SZ,0.000000
1,000008.SZ,
2,000009.SZ,0.000000
3,000010.SZ,0.000828
4,000012.SZ,0.000000
5,000016.SZ,
6,000018.SZ,-0.000000
7,000019.SZ,0.000009
8,000020.SZ,
9,000023.SZ,0.007488


## 实际业绩亏损

In [323]:
def Get_Nege_Profit_Info():  
    ###返回一个DataFrame， 公司上期财报是否亏损
    
    
    #业绩实亏（最近一期）
    sql = """SELECT S_INFO_WINDCODE, ANN_DT, REPORT_PERIOD,STATEMENT_TYPE,NET_PROFIT_INCL_MIN_INT_INC
            from ASHAREINCOME where REPORT_PERIOD > '20190101' and STATEMENT_TYPE='408001000'"""
    df_FA_EPS = pd.read_sql_query(sql, conn)
    df_FA_EPS.sort_values(["S_INFO_WINDCODE","REPORT_PERIOD"],inplace=True)
    df_FA_EPS=df_FA_EPS.groupby("S_INFO_WINDCODE").apply(lambda x:x.iloc[-1])
    
    df_FA_EPS["is_major_sec"]=df_FA_EPS["S_INFO_WINDCODE"].apply(lambda x:((x[0] in ['0','3','6'])&(len(x)==9)))
    df_FA_EPS=df_FA_EPS[df_FA_EPS["is_major_sec"]]

    df_FA_EPS["is_nega_profit"]=(df_FA_EPS["NET_PROFIT_INCL_MIN_INT_INC"]<0)
    
    return df_FA_EPS[["S_INFO_WINDCODE","is_nega_profit"]]

In [324]:
Get_Nege_Profit_Info()

Unnamed: 0_level_0,S_INFO_WINDCODE,is_nega_profit
S_INFO_WINDCODE,Unnamed: 1_level_1,Unnamed: 2_level_1
000001.SZ,000001.SZ,False
000002.SZ,000002.SZ,False
000003.SZ,000003.SZ,True
000004.SZ,000004.SZ,True
000005.SZ,000005.SZ,False
000006.SZ,000006.SZ,False
000007.SZ,000007.SZ,True
000008.SZ,000008.SZ,False
000009.SZ,000009.SZ,False
000010.SZ,000010.SZ,False
