In [1]:
from py2neo import Graph,Node,NodeMatcher,RelationshipMatcher
import json
import re
from math import ceil

In [2]:
class NeoGraph:
    def __init__(self):
        self.g = Graph(
            host="10.15.82.71",
            port=57687,
            user="neo4j",
            password="123")
        self.matcher = NodeMatcher(self.g)
        self.re_matcher = RelationshipMatcher(self.g)

    def getNode(self, key):
        return self.matcher.match(name = key).first()

    def getBansui(self, key):
        node = self.getNode(key)
        res = []
        if node is None:
            return res
        for i in self.re_matcher.match(nodes=(None,node), r_type='伴随'):
            res.append([len(i['id']), i.start_node['name'], str(i.start_node.labels)[1:], i['id']])
            
        for i in self.re_matcher.match(nodes=(node,None), r_type='伴随'):
            res.append([len(i['id']), i.end_node['name'], str(i.end_node.labels)[1:], i['id']])
        return res

    def getZhiliao(self, key):
        node = self.getNode(key)
        res = []
        if node is None:
            return res
        for i in self.re_matcher.match(nodes=(None,node), r_type='治疗'):
            res.append([len(i['id']), i.start_node['name'], str(i.start_node.labels)[1:], i['id']])
            
        for i in self.re_matcher.match(nodes=(node,None), r_type='治疗'):
            res.append([len(i['id']), i.end_node['name'], str(i.end_node.labels)[1:], i['id']])
        return res

    def getZuchen(self, key):
        node = self.getNode(key)
        res = []
        if node is None:
            return res
        for i in self.re_matcher.match(nodes=(None,node), r_type='组成'):
            res.append([len(i['id']), i.start_node['name'], str(i.start_node.labels)[1:], i['id']])
            
        for i in self.re_matcher.match(nodes=(node,None), r_type='组成'):
            res.append([len(i['id']), i.end_node['name'], str(i.end_node.labels)[1:], i['id']])
        return res    
    
    def getQuestion2Plant(self, key):
        node = self.getNode(key)
        res = {}
        if node is None:
            return res        
        for i in self.re_matcher.match(nodes=(None,node), r_type='治疗'):
            if str(i.start_node.labels) == ':Plant':
                for single_id in i['id']:
                    res.setdefault(single_id, []).append(i.start_node['name'])
        return res
    
handler = NeoGraph()

In [3]:
confidence = {1:"可信度5%", 
              2:"可信度10%",
              3:"可信度15%",
              4:"可信度30%",
              5:"可信度40%",
              6:"可信度50%",
              7:"可信度60%"}

def getJson(word):
    res = {}
    bansui = handler.getBansui(word)
    zhiliao = handler.getZhiliao(word)
    zuchen = handler.getZuchen(word)
    if len(bansui)>10:
        bansui = sorted(bansui, key=lambda x:x[0])[-8:]
    if len(zhiliao)>10:
        zhiliao = sorted(zhiliao, key=lambda x:x[0])[-8:]
    if len(zuchen)>10:
        zuchen = sorted(zuchen, key=lambda x:x[0])[-8:]
    for i in bansui:
        i[0] = "伴随：" + confidence.get(i[0],"可信度90%")
    for i in zhiliao:
        i[0] = "治疗：" + confidence.get(i[0],"可信度90%")
    for i in zuchen:
        i[0] = "组成：" + confidence.get(i[0],"可信度90%")
    bansui.extend(zhiliao)
    bansui.extend(zuchen)
    data = {}
    data['entity'] = word
    data['avp'] = bansui
    res['message'] = "success"
    res['data'] = data
    return res

In [33]:
def getPlant2Question(res):
    result = {}
    for key in res:
        result.setdefault(hash(''.join(sorted(res[key]))), []).append(key)
    return result

def getPlantCluster(disease, single_num=5, multiple_num=5):
    single = []
    multiple = []
    # 根据疾病名称找到对应的中药{123:[云南白药，白术],124:[三七]}
    id2plant = handler.getQuestion2Plant(disease)
    # 根据中药集合找到问题{Plants_hash:[123,125]}
    plant2id = getPlant2Question(id2plant)
    # 找到拥有问题id数最多的中药集合前5名返回
    for key in sorted(plant2id, key=lambda x:len(plant2id[x]),reverse=True):
        print(plant2id[key])
        tem = id2plant[plant2id[key][0]]
        if len(tem) == 1:
            single.append(tem)
        else:
            multiple.append(tem)
    return single[:single_num], multiple[:multiple_num]
        
drug2plant = {}
with open('drug_plant_relation.txt') as f:
    for line in f.readlines():
        a,b,c = line.split('\t')
        drug2plant[a+'-'+b] = re.sub('（.*?）', '', c.replace(' ',''))

def getF1score(cluster, drugDescription):
    TP,FP = 0,0
    plant_num_assess = ceil(len(drugDescription)/4)
    for i in cluster:
        if i in drugDescription:
            TP+=1
            plant_num_assess-=1
        else:
            FP+=1
    FN = plant_num_assess if plant_num_assess>=0 else 0
    if TP == 0:
        return 0
    p = TP/(TP+FP)
    r = TP/(TP+FN)
    return 2*p*r/(p+r)

def findDrug(clusters, match_rate=0.3):
    res = []
    for cluster,id in zip(clusters,range(len(clusters))):
        for key in drug2plant:
            score = getF1score(cluster, drug2plant[key])
            if score >= match_rate:
                #res.append([score, key, drug2plant[key], cluster])
                res.append([id, round(score, 2), key])
    return sorted(res, key = lambda x:x[1], reverse = True)

In [30]:
import pymysql, re
coon = pymysql.connect(
    host = '10.15.82.58',user = 'root',passwd = '123',
    port = 3306,db = 'qa',charset = 'utf8'
)

def getList(ids):
    cur = coon.cursor()
    res = {}
    for id in ids.split(','):
        cur.execute("select qa_q,qa_a from qa_new where qa_id=" + id)
        tem = cur.fetchall()
        res[re.sub('<.*?>', '', tem[0][0])] = re.sub('<.*?>', '', tem[0][1])
    return res

In [None]:
from flask import Flask, request
from flask_cors import CORS,cross_origin

app = Flask(__name__)

@app.route('/main', methods=['GET', 'POST'])
@cross_origin()
def callback_main():
    entity = request.args.get("entity") or "EOF"
    try:
        res = getJson(entity)
        return json.dumps(res, ensure_ascii=False, indent=4)
    except:
        return json.dumps({'message':'error'})

@app.route('/disease', methods=['GET', 'POST'])
@cross_origin()
def callback_disease():
    entity = request.args.get("entity") or "EOF"
    try:
        res = {}
        res['entity'] = entity
        res['single'], res['multiple'], res['singleid'], res['multipleid'] =  getPlantCluster(entity, single_num=5, multiple_num=5)
        res['single_drug'] = findDrug(res['single'], match_rate=1)[:5]
        res['multiple_drug'] = findDrug(res['multiple'], match_rate=0.2)[:5]
        return json.dumps(res, ensure_ascii=False, indent=4)
    except:
        return json.dumps({'message':'error'})

@app.route('/QAlist', methods=['GET', 'POST'])
@cross_origin()
def callback_list():
    id = request.args.get("id") or "EOF"
    return json.dumps(getList(id), ensure_ascii=False, indent=4)
    
if __name__ == '__main__':
    app.run(host='0.0.0.0',port=5787)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://0.0.0.0:5787/ (Press CTRL+C to quit)


[1072046, 1119209, 1127905, 1131856, 1180203, 1180214, 1180258, 1180261, 1180270, 1180282, 1180284, 1180286, 1180288, 1180292, 1180306, 1180310, 1180322, 1180328, 1180375, 1180390, 1180394, 1180412, 1180450, 1180453, 1180463, 1180474, 1180487, 1180493, 1180495, 1180499, 1180501, 1180505, 1180569, 1180571, 1180573, 1180576, 1180586, 1180588, 1180590, 1180592, 1180594, 1180603, 1180637, 1180668, 1180896, 1180947, 1180977, 1180991, 1210110, 1224539]
[1069709, 1152636, 1192850, 1192872, 1192874, 1192881, 1192885, 1192887, 1192891, 1192896, 1192916, 1192926, 1192928, 1192930, 1192963, 1193010, 1193012, 1193017, 1193021, 1193031, 1193061, 1193063, 1193070, 1193072, 1193074, 1193076, 1193082, 1193092, 1193124, 1193146, 1193165, 1193246, 1193248, 1193401, 1193431, 1193458, 1193465, 1353713, 1353718]
[52472, 1119020, 1129213, 1133683, 1133693, 1133700, 1133702, 1133710, 1133724, 1133726, 1133728, 1133748, 1133753, 1133757, 1133778, 1133790, 1133792, 1133800, 1133822, 1133824, 1133826, 1133832, 

222.205.17.152 - - [10/Dec/2019 16:53:10] "[37mGET /disease?entity=%E4%B8%AD%E9%A3%8E HTTP/1.1[0m" 200 -
