- 中国人寿及其同业数据，补充八分类和倾向性结果
> - 七月及以前数据：db_docinfo_backup、db_docinfo_text_backup
> - 七月之后数据：db_docinfo_trade、db_docinfo_text

# 基本设置

In [1]:
import numpy as np
import pandas as pd
import os
import datetime

import requests,json
from sklearn.externals import joblib

import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

In [2]:
from toolkits.setup.date_time import get_day_list
from toolkits.setup import specific_func
specific_func.set_ch_pd()

# 连接数据库

In [3]:
engine = specific_func.get_engine('cbirc')

In [4]:
label_dic={'监管':1,'行业':2,'产品销售':3,'资本市场':4,'公司内部管理':5,'消费服务':6,'其他相关报道':7,'噪音':8}
class_name_dict = {v: k for k, v in label_dic.items()}
class_name_dict

{1: '监管',
 2: '行业',
 3: '产品销售',
 4: '资本市场',
 5: '公司内部管理',
 6: '消费服务',
 7: '其他相关报道',
 8: '噪音'}

# 七月及以前数据

In [5]:
ip_port = '47.93.183.157:10000'

headers={'content-type':'application/json'}
url_cor = "http://%s/judge_correlation_i"%ip_port
url_tend = "http://%s/tendency_analysis_i"%ip_port
url_warn = "http://%s/early_warning_i"%ip_port

file_list_1 = ['raw/人寿 7月.xlsx', 'raw/同业  七月.xlsx']

In [85]:
for filename in ['raw/人寿 7月.xlsx',]:
    print(filename, '  ----------------')
    data = pd.read_excel(filename)
    data_null = data[data['八大分险类型'].isnull() | data['文章倾向性'].isnull()]
    data_full = data[data['八大分险类型'].notnull() & data['文章倾向性'].notnull()]
    print('总量：', data.shape)
    print('缺失值数量：', data_null.shape)
    print('无缺失值数量：', data_full.shape)

    id_list = tuple(data_null['id'].unique().tolist())
    print('id_list: ', len(id_list))
    
    chunksize = 100
    loop = int(len(id_list) / chunksize) + 1
    title_content_com = pd.DataFrame()
    for i in range(loop):
        print('id_list_sel: ', 0 + i * chunksize, chunksize + i * chunksize)
        id_list_sel = id_list[0 + i * chunksize:chunksize + i * chunksize]
        sql_count = "select count(t1.id) \
                            from db_docinfo_backup t1 \
                                where t1.id in {0}".format(id_list_sel)
        count = pd.read_sql_query(sql_count, engine)
        print('count: ', list(count.values)[0])

        sql_title = "select t1.id, t1.title \
                            from db_docinfo_backup t1 \
                                where t1.id in {0}".format(id_list_sel)

        sql_content = "select t1.id, t2.text as content \
                            from db_docinfo_backup t1, db_docinfo_text_backup t2 \
                                where t1.urlhash = t2.urlhash \
                                    and t1.id in {0}".format(id_list_sel)

        title_id = pd.read_sql_query(sql_title, engine)
        content_id = pd.read_sql_query(sql_content, engine)
        title_content = pd.merge(title_id, content_id, on = 'id', how = 'left')
        
        data = {"types":3, "record":title_content.iloc[:,[0, 1, 2]].to_dict(orient = 'records')}

        # 相关性模型
        result = requests.post(url_cor, data = json.dumps(data),
                               headers=headers, allow_redirects=True)
        json_data = json.loads(result.text)
        cor_elapsed_time = json_data['elapsed_time']
        print('cor elapsed_time: ', cor_elapsed_time)
        cor_list = [[j['cor'], j['id']] for j in json_data['docs']]
        cor_list = pd.DataFrame(cor_list, columns = ['八大分险类型', 'id'])
        
        # 倾向性模型
        try :
            result = requests.post(url_tend, data = json.dumps(data),
                                   headers=headers, allow_redirects=True)
            json_data = json.loads(result.text)
            tend_elapsed_time = json_data['elapsed_time'] 
            print('tend elapsed_time: ', tend_elapsed_time)
            tendency_list = [[j['tendency'], j['id']] for j in json_data['docs']]            
        except Exception as e:
            print('error: ', e)
            tendency_list = []
            for index in range(len(data['record'])):
#                 print(index, '.................')
                data_sel = {"types":3, "record":[data['record'][index]]}
#                 print('data_sel: ', data_sel)
                try :
                    result = requests.post(url_tend, data = json.dumps(data_sel),
                                           headers=headers, allow_redirects=True)
                    json_data = json.loads(result.text) 
                    tendency_list.append([json_data['docs'][0]['tendency'], json_data['docs'][0]['id']])
                except Exception as e1:
                    print('error again...    ', e1)
                    print(data['record'][index])
                    tendency_list.append([0, data['record'][index]['id']])           

        tendency_list = pd.DataFrame(tendency_list, columns = ['文章倾向性', 'id'])

        cor_tend = pd.merge(cor_list, tendency_list, on = 'id', how = 'inner')
        title_content_com = pd.concat([title_content_com, cor_tend], axis = 0)        
        title_content_com.index = range(title_content_com.shape[0])
        print('    %s  title_id: '%i, title_id.shape)
        print('    %s  content_id: '%i, content_id.shape)
        print('    %s  title_content: '%i, title_content.shape)
        print('    %s  title_content_com: '%i, title_content_com.shape)
    
    data_null = data_null.drop(['八大分险类型', '文章倾向性'], axis = 1)
    print('data_null: ', data_null.shape) 
    data_null = pd.merge(data_null, title_content_com, on = 'id', how = 'left')
    print('combined_data: ', data_null.shape)    
    data_null_still = data_null[data_null['八大分险类型'].isnull() | data_null['文章倾向性'].isnull()]
    print('data_null_still: ', data_null_still.shape) 

    data_null[''] = data_null['八大分险类型'].apply(lambda x: class_name_dict[x])
    data_null[''] = data_null['文章倾向性'].apply(lambda x: '非负' if x == 0 else '负面')
    
    update_data = pd.concat([data_null, data_full], axis = 0)
    print('update_data: ', update_data.shape) 
    
    writer = pd.ExcelWriter('result/{0}'.format(filename.split('/')[1]),
                            engine='xlsxwriter',
                            options={'strings_to_urls': False})

    update_data.to_excel(writer, sheet_name='Sheet1', index = False)
    writer.save()   

raw/人寿 7月.xlsx   ----------------
总量： (8053, 12)
缺失值数量： (8028, 12)
无缺失值数量： (25, 12)
id_list:  7426
id_list_sel:  0 1000
count:  [1000]
cor elapsed_time:  32.00
tend elapsed_time:  111.00
    0  title_id:  (1000, 2)
    0  content_id:  (1000, 2)
    0  title_content:  (1000, 3)
    0  title_content_com:  (1000, 3)
id_list_sel:  1000 2000
count:  [1000]
cor elapsed_time:  22.00
tend elapsed_time:  129.00
    1  title_id:  (1000, 2)
    1  content_id:  (1000, 2)
    1  title_content:  (1000, 3)
    1  title_content_com:  (2000, 3)
id_list_sel:  2000 3000
count:  [1000]
cor elapsed_time:  25.00
error:  Expecting value: line 1 column 1 (char 0)
error again...     Expecting value: line 1 column 1 (char 0)
{'id': 54974461, 'title': '中国人寿“康宁终身”（至尊版）产品详解+案例+病种明细', 'content': nan}
error again...     ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response',))
{'id': 55510647, 'title': '国寿新防癌：保100多种癌症+160%健康保障，6重给付！', 'content': '“ 每个中国人都必须有一张国寿防癌险（优享版），人人都买得起的生命防

error again...     ('Connection aborted.', RemoteDisconnected('Remote end closed connection without response',))
{'id': 56688549, 'title': '【发改动态】我委召开金融形势分析座谈会', 'content': '2018年7月19日，南京市发改委蓝军副主任在玉兰路8号国资大厦主持召开金融形势分析座谈会。人民银行南京分行营业管理部胡亮亮副处长，工商银行南京分行陈金华副总经理，紫金农商行总行王清国副行长，华泰联合证券债务融资部刘江峰主管，中国人寿南京分公司综合管理部钱旭强副经理，江苏省信用再担保集团南京分公司杨喜荣总经理，南京紫金投资集团陈玲总会计师，南京新工投资集团姚兆年总会计师，南京江宁科技创业投资集团叶桂副总经理，大公国际资信评估有限公司华东一区孙高升总经理，江苏国衡土地房地产资产评估咨询有限公司刘清军董事长，南京市发改委综合处余其刚处长，财金处胡耀调研员参加座谈。会议由南京紫金投资集团承办。 \xa0 \xa0 与会人员就2018年上半年经济和金融运行情况，特别是融资情况，以及下半年宏观经济和金融走势及对全市经济运行的影响等方面进行了充分交流和讨论，对做好下半年工作提出了相关意见和建议。 今年以来，全市认真落实中央和省市决策部署，坚持稳中求进工作总基调，认真践行新发展理念和高质量发展要求，全面对标找差、创新实干，聚力聚焦创新名城建设，统筹推进稳增长、调结构、促改革、惠民生、防风险等各项工作，经济运行呈现出“运行平稳、稳中向好、稳中提质”的良好态势。 来源：委财金处'}
error again...     Expecting value: line 1 column 1 (char 0)
{'id': 56809476, 'title': '突破！中国人寿进入《财富》“世界500强”前50位', 'content': nan}
error again...     Expecting value: line 1 column 1 (char 0)
{'id': 57054771, 'title': '突破！中国人寿进入《财富》“世界500强”前50位', 'content': nan}
error again...     Ex

KeyboardInterrupt: 

In [None]:
for filename in ['raw/同业  七月.xlsx', ]:
    print(filename, '  ----------------')
    data = pd.read_excel(filename)
    data_null = data[data['八大分险类型'].isnull() | data['文章倾向性'].isnull()]
    data_full = data[data['八大分险类型'].notnull() & data['文章倾向性'].notnull()]
    print('总量：', data.shape)
    print('缺失值数量：', data_null.shape)
    print('无缺失值数量：', data_full.shape)

    id_list = tuple(data_null['id'].unique().tolist())
    print('id_list: ', len(id_list))
    
    chunksize = 100
    loop = int(len(id_list) / chunksize) + 1
    title_content_com = pd.DataFrame()
    for i in range(loop):
        print('id_list_sel: ', 0 + i * chunksize, chunksize + i * chunksize)
        id_list_sel = id_list[0 + i * chunksize:chunksize + i * chunksize]
        sql_count = "select count(t1.id) \
                            from db_docinfo_backup t1 \
                                where t1.id in {0}".format(id_list_sel)
        count = pd.read_sql_query(sql_count, engine)
        print('count: ', list(count.values)[0])

        sql_title = "select t1.id, t1.title \
                            from db_docinfo_backup t1 \
                                where t1.id in {0}".format(id_list_sel)

        sql_content = "select t1.id, t2.text as content \
                            from db_docinfo_backup t1, db_docinfo_text_backup t2 \
                                where t1.urlhash = t2.urlhash \
                                    and t1.id in {0}".format(id_list_sel)

        title_id = pd.read_sql_query(sql_title, engine)
        content_id = pd.read_sql_query(sql_content, engine)
        title_content = pd.merge(title_id, content_id, on = 'id', how = 'left')
        
        data = {"types":3, "record":title_content.iloc[:,[0, 1, 2]].to_dict(orient = 'records')}

        # 相关性模型
        result = requests.post(url_cor, data = json.dumps(data),
                               headers=headers, allow_redirects=True)
        json_data = json.loads(result.text)
        cor_elapsed_time = json_data['elapsed_time']
        print('cor elapsed_time: ', cor_elapsed_time)
        cor_list = [[j['cor'], j['id']] for j in json_data['docs']]
        cor_list = pd.DataFrame(cor_list, columns = ['八大分险类型', 'id'])
        
        # 倾向性模型
        try :
            result = requests.post(url_tend, data = json.dumps(data),
                                   headers=headers, allow_redirects=True)
            json_data = json.loads(result.text)
            tend_elapsed_time = json_data['elapsed_time'] 
            print('tend elapsed_time: ', tend_elapsed_time)
            tendency_list = [[j['tendency'], j['id']] for j in json_data['docs']]            
        except Exception as e:
            print('error: ', e)
            tendency_list = []
            for index in range(len(data['record'])):
#                 print(index, '.................')
                data_sel = {"types":3, "record":[data['record'][index]]}
#                 print('data_sel: ', data_sel)
                try :
                    result = requests.post(url_tend, data = json.dumps(data_sel),
                                           headers=headers, allow_redirects=True)
                    json_data = json.loads(result.text) 
                    tendency_list.append([json_data['docs'][0]['tendency'], json_data['docs'][0]['id']])
                except Exception as e1:
                    print('error again...    ', e1)
                    print(data['record'][index])
                    tendency_list.append([0, data['record'][index]['id']])           

        tendency_list = pd.DataFrame(tendency_list, columns = ['文章倾向性', 'id'])

        cor_tend = pd.merge(cor_list, tendency_list, on = 'id', how = 'inner')
        title_content_com = pd.concat([title_content_com, cor_tend], axis = 0)        
        title_content_com.index = range(title_content_com.shape[0])
        print('    %s  title_id: '%i, title_id.shape)
        print('    %s  content_id: '%i, content_id.shape)
        print('    %s  title_content: '%i, title_content.shape)
        print('    %s  title_content_com: '%i, title_content_com.shape)
    
    data_null = data_null.drop(['八大分险类型', '文章倾向性'], axis = 1)
    print('data_null: ', data_null.shape) 
    data_null = pd.merge(data_null, title_content_com, on = 'id', how = 'left')
    print('combined_data: ', data_null.shape)    
    data_null_still = data_null[data_null['八大分险类型'].isnull() | data_null['文章倾向性'].isnull()]
    print('data_null_still: ', data_null_still.shape) 

    data_null[''] = data_null['八大分险类型'].apply(lambda x: class_name_dict[x])
    data_null[''] = data_null['文章倾向性'].apply(lambda x: '非负' if x == 0 else '负面')
    
    update_data = pd.concat([data_null, data_full], axis = 0)
    print('update_data: ', update_data.shape) 
    
    writer = pd.ExcelWriter('result/{0}'.format(filename.split('/')[1]),
                            engine='xlsxwriter',
                            options={'strings_to_urls': False})

    update_data.to_excel(writer, sheet_name='Sheet1', index = False)
    writer.save()   

# 七月之后数据

In [6]:
file_list_2 = ['raw/人寿  8-10月.xlsx', 'raw/同业  8-10月.xlsx']

In [7]:
for filename in ['raw/人寿  8-10月.xlsx',]:
    print(filename, '  ----------------')
    data = pd.read_excel(filename)
    data_null = data[data['八大分险类型'].isnull() | data['文章倾向性'].isnull()]
    data_full = data[data['八大分险类型'].notnull() & data['文章倾向性'].notnull()]
    print('总量：', data.shape)
    print('缺失值数量：', data_null.shape)
    print('无缺失值数量：', data_full.shape)

    id_list = tuple(data_null['id'].unique().tolist())
    print('id_list: ', len(id_list))
    
    chunksize = 100
    loop = int(len(id_list) / chunksize) + 1
    title_content_com = pd.DataFrame()
    for i in range(loop):
        print('id_list_sel: ', 0 + i * chunksize, chunksize + i * chunksize)
        id_list_sel = id_list[0 + i * chunksize:chunksize + i * chunksize]
        sql_count = "select count(t1.id) \
                            from db_docinfo t1 \
                                where t1.id in {0}".format(id_list_sel)
        count = pd.read_sql_query(sql_count, engine)
        print('count: ', list(count.values)[0])

        sql_title = "select t1.id, t1.title \
                            from db_docinfo t1 \
                                where t1.id in {0}".format(id_list_sel)

        sql_content = "select t1.id, t2.text as content \
                            from db_docinfo t1, db_docinfo_text t2 \
                                where t1.urlhash = t2.urlhash \
                                    and t1.id in {0}".format(id_list_sel)

        title_id = pd.read_sql_query(sql_title, engine)
        content_id = pd.read_sql_query(sql_content, engine)
        title_content = pd.merge(title_id, content_id, on = 'id', how = 'left')
        
        data = {"types":3, "record":title_content.iloc[:,[0, 1, 2]].to_dict(orient = 'records')}

        # 相关性模型
        result = requests.post(url_cor, data = json.dumps(data),
                               headers=headers, allow_redirects=True)
        json_data = json.loads(result.text)
        cor_elapsed_time = json_data['elapsed_time']
        print('cor elapsed_time: ', cor_elapsed_time)
        cor_list = [[j['cor'], j['id']] for j in json_data['docs']]
        cor_list = pd.DataFrame(cor_list, columns = ['八大分险类型', 'id'])

        # 倾向性模型
        try :
            result = requests.post(url_tend, data = json.dumps(data),
                                   headers=headers, allow_redirects=True)
            json_data = json.loads(result.text)
            tend_elapsed_time = json_data['elapsed_time'] 
            print('tend elapsed_time: ', tend_elapsed_time)
            tendency_list = [[j['tendency'], j['id']] for j in json_data['docs']]            
        except Exception as e:
            print('error: ', e)
            tendency_list = []
            for index in range(len(data['record'])):
#                 print(index, '.................')
                data_sel = {"types":3, "record":[data['record'][index]]}
#                 print('data_sel: ', data_sel)
                try :
                    result = requests.post(url_tend, data = json.dumps(data_sel),
                                           headers=headers, allow_redirects=True)
                    json_data = json.loads(result.text) 
                    tendency_list.append([json_data['docs'][0]['tendency'], json_data['docs'][0]['id']])
                except Exception as e1:
                    print('error again...    ', e1)
                    print(data['record'][index])
                    tendency_list.append([0, data['record'][index]['id']])            

        tendency_list = pd.DataFrame(tendency_list, columns = ['文章倾向性', 'id'])

        cor_tend = pd.merge(cor_list, tendency_list, on = 'id', how = 'inner')
        title_content_com = pd.concat([title_content_com, cor_tend], axis = 0)                
        print('    %s  title_id: '%i, title_id.shape)
        print('    %s  content_id: '%i, content_id.shape)
        print('    %s  title_content: '%i, title_content.shape)
        print('    %s  title_content_com: '%i, title_content_com.shape)
    
    title_content_com.index = range(title_content_com.shape[0])
    data_null = data_null.drop(['八大分险类型', '文章倾向性'], axis = 1)
    print('data_null: ', data_null.shape) 
    data_null = pd.merge(data_null, title_content_com, on = 'id', how = 'left')
    print('combined_data: ', data_null.shape)    
    data_null_still = data_null[data_null['八大分险类型'].isnull() | data_null['文章倾向性'].isnull()]
    print('data_null_still: ', data_null_still.shape) 
    data_null['八大分险类型'] = data_null['八大分险类型'].apply(lambda x: class_name_dict[x])
    data_null['文章倾向性'] = data_null['文章倾向性'].apply(lambda x: '非负' if x == 0 else '负面')
    
    update_data = pd.concat([data_null, data_full], axis = 0)
    print('update_data: ', update_data.shape) 
    
    writer = pd.ExcelWriter('result/{0}'.format(filename.split('/')[1]),
                            engine='xlsxwriter',
                            options={'strings_to_urls': False})

    update_data.to_excel(writer, sheet_name='Sheet1', index = False)
    writer.save()    

raw/人寿  8-10月.xlsx   ----------------
总量： (16316, 12)
缺失值数量： (8598, 12)
无缺失值数量： (7718, 12)
id_list:  7472
id_list_sel:  0 100
count:  [100]
cor elapsed_time:  3.00
tend elapsed_time:  15.00
    0  title_id:  (100, 2)
    0  content_id:  (100, 2)
    0  title_content:  (100, 3)
    0  title_content_com:  (100, 3)
id_list_sel:  100 200
count:  [100]
cor elapsed_time:  2.00
error:  Expecting value: line 1 column 1 (char 0)
error again...     Expecting value: line 1 column 1 (char 0)
{'id': 63620278, 'title': '反洗钱不到位 中国人寿被处罚70万元', 'content': nan}
    1  title_id:  (100, 2)
    1  content_id:  (99, 2)
    1  title_content:  (100, 3)
    1  title_content_com:  (200, 3)
id_list_sel:  200 300
count:  [100]
cor elapsed_time:  3.00
tend elapsed_time:  23.00
    2  title_id:  (100, 2)
    2  content_id:  (100, 2)
    2  title_content:  (100, 3)
    2  title_content_com:  (300, 3)
id_list_sel:  300 400
count:  [100]
cor elapsed_time:  2.00
tend elapsed_time:  14.00
    3  title_id:  (100, 2)
    3

cor elapsed_time:  3.00
tend elapsed_time:  19.00
    37  title_id:  (100, 2)
    37  content_id:  (100, 2)
    37  title_content:  (100, 3)
    37  title_content_com:  (3800, 3)
id_list_sel:  3800 3900
count:  [100]
cor elapsed_time:  4.00
tend elapsed_time:  22.00
    38  title_id:  (100, 2)
    38  content_id:  (100, 2)
    38  title_content:  (100, 3)
    38  title_content_com:  (3900, 3)
id_list_sel:  3900 4000
count:  [100]
cor elapsed_time:  2.00
tend elapsed_time:  27.00
    39  title_id:  (100, 2)
    39  content_id:  (100, 2)
    39  title_content:  (100, 3)
    39  title_content_com:  (4000, 3)
id_list_sel:  4000 4100
count:  [100]
cor elapsed_time:  3.00
tend elapsed_time:  22.00
    40  title_id:  (100, 2)
    40  content_id:  (100, 2)
    40  title_content:  (100, 3)
    40  title_content_com:  (4100, 3)
id_list_sel:  4100 4200
count:  [100]
cor elapsed_time:  3.00
tend elapsed_time:  18.00
    41  title_id:  (100, 2)
    41  content_id:  (100, 2)
    41  title_content:  

In [8]:
    title_content_com.index = range(title_content_com.shape[0])
    data_null = data_null.drop(['八大分险类型', '文章倾向性'], axis = 1)
    print('data_null: ', data_null.shape) 
    data_null = pd.merge(data_null, title_content_com, on = 'id', how = 'left')
    print('combined_data: ', data_null.shape)    
    data_null_still = data_null[data_null['八大分险类型'].isnull() | data_null['文章倾向性'].isnull()]
    print('data_null_still: ', data_null_still.shape) 
    data_null['八大分险类型'] = data_null['八大分险类型'].apply(lambda x: class_name_dict[x])
    data_null['文章倾向性'] = data_null['文章倾向性'].apply(lambda x: '非负' if x == 0 else '负面')
    
    update_data = pd.concat([data_null, data_full], axis = 0)
    print('update_data: ', update_data.shape) 
    
    writer = pd.ExcelWriter('result/{0}'.format(filename.split('/')[1]),
                            engine='xlsxwriter',
                            options={'strings_to_urls': False})

    update_data.to_excel(writer, sheet_name='Sheet1', index = False)
    writer.save() 

data_null:  (8598, 11)
combined_data:  (8598, 13)
data_null_still:  (0, 13)
update_data:  (16316, 13)


In [None]:
for filename in ['raw/同业  8-10月.xlsx', ]:
    print(filename, '  ----------------')
    data = pd.read_excel(filename)
    data_null = data[data['八大分险类型'].isnull() | data['文章倾向性'].isnull()]
    data_full = data[data['八大分险类型'].notnull() & data['文章倾向性'].notnull()]
    print('总量：', data.shape)
    print('缺失值数量：', data_null.shape)
    print('无缺失值数量：', data_full.shape)

    id_list = tuple(data_null['id'].unique().tolist())
    print('id_list: ', len(id_list))
    
    chunksize = 100
    loop = int(len(id_list) / chunksize) + 1
    title_content_com = pd.DataFrame()
    for i in range(loop):
        print('id_list_sel: ', 0 + i * chunksize, chunksize + i * chunksize)
        id_list_sel = id_list[0 + i * chunksize:chunksize + i * chunksize]
        sql_count = "select count(t1.id) \
                            from db_docinfo t1 \
                                where t1.id in {0}".format(id_list_sel)
        count = pd.read_sql_query(sql_count, engine)
        print('count: ', list(count.values)[0])

        sql_title = "select t1.id, t1.title \
                            from db_docinfo t1 \
                                where t1.id in {0}".format(id_list_sel)

        sql_content = "select t1.id, t2.text as content \
                            from db_docinfo t1, db_docinfo_text t2 \
                                where t1.urlhash = t2.urlhash \
                                    and t1.id in {0}".format(id_list_sel)

        title_id = pd.read_sql_query(sql_title, engine)
        content_id = pd.read_sql_query(sql_content, engine)
        title_content = pd.merge(title_id, content_id, on = 'id', how = 'left')
        
        data = {"types":3, "record":title_content.iloc[:,[0, 1, 2]].to_dict(orient = 'records')}

        # 相关性模型
        result = requests.post(url_cor, data = json.dumps(data),
                               headers=headers, allow_redirects=True)
        json_data = json.loads(result.text)
        cor_elapsed_time = json_data['elapsed_time']
        print('cor elapsed_time: ', cor_elapsed_time)
        cor_list = [[j['cor'], j['id']] for j in json_data['docs']]
        cor_list = pd.DataFrame(cor_list, columns = ['八大分险类型', 'id'])

        # 倾向性模型
        try :
            result = requests.post(url_tend, data = json.dumps(data),
                                   headers=headers, allow_redirects=True)
            json_data = json.loads(result.text)
            tend_elapsed_time = json_data['elapsed_time'] 
            print('tend elapsed_time: ', tend_elapsed_time)
            tendency_list = [[j['tendency'], j['id']] for j in json_data['docs']]            
        except Exception as e:
            print('error: ', e)
            tendency_list = []
            for index in range(len(data['record'])):
#                 print(index, '.................')
                data_sel = {"types":3, "record":[data['record'][index]]}
#                 print('data_sel: ', data_sel)
                try :
                    result = requests.post(url_tend, data = json.dumps(data_sel),
                                           headers=headers, allow_redirects=True)
                    json_data = json.loads(result.text) 
                    tendency_list.append([json_data['docs'][0]['tendency'], json_data['docs'][0]['id']])
                except Exception as e1:
                    print('error again...    ', e1)
                    print(data['record'][index])
                    tendency_list.append([0, data['record'][index]['id']])            

        tendency_list = pd.DataFrame(tendency_list, columns = ['文章倾向性', 'id'])

        cor_tend = pd.merge(cor_list, tendency_list, on = 'id', how = 'inner')
        title_content_com = pd.concat([title_content_com, cor_tend], axis = 0)                
        print('    %s  title_id: '%i, title_id.shape)
        print('    %s  content_id: '%i, content_id.shape)
        print('    %s  title_content: '%i, title_content.shape)
        print('    %s  title_content_com: '%i, title_content_com.shape)
    
    title_content_com.index = range(title_content_com.shape[0])
    data_null = data_null.drop(['八大分险类型', '文章倾向性'], axis = 1)
    print('data_null: ', data_null.shape) 
    data_null = pd.merge(data_null, title_content_com, on = 'id', how = 'left')
    print('combined_data: ', data_null.shape)    
    data_null_still = data_null[data_null['八大分险类型'].isnull() | data_null['文章倾向性'].isnull()]
    print('data_null_still: ', data_null_still.shape) 
    data_null[''] = data_null['八大分险类型'].apply(lambda x: class_name_dict[x])
    data_null[''] = data_null['文章倾向性'].apply(lambda x: '非负' if x == 0 else '负面')
    
    update_data = pd.concat([data_null, data_full], axis = 0)
    print('update_data: ', update_data.shape) 
    
    writer = pd.ExcelWriter('result/{0}'.format(filename.split('/')[1]),
                            engine='xlsxwriter',
                            options={'strings_to_urls': False})

    update_data.to_excel(writer, sheet_name='Sheet1', index = False)
    writer.save()    