In [1]:
from py2neo import Graph,Node,NodeMatcher,RelationshipMatcher
import json
import re
from math import ceil

In [2]:
class NeoGraph:
    def __init__(self):
        self.g = Graph(
            host="10.15.82.71",
            port=57687,
            user="neo4j",
            password="123")
        self.matcher = NodeMatcher(self.g)
        self.re_matcher = RelationshipMatcher(self.g)

    def getNode(self, key):
        return self.matcher.match(name = key).first()

    def getBansui(self, key):
        node = self.getNode(key)
        res = []
        if node is None:
            return res
        for i in self.re_matcher.match(nodes=(None,node), r_type='伴随'):
            res.append([len(i['id']), i.start_node['name'], str(i.start_node.labels)[1:], i['id']])
            
        for i in self.re_matcher.match(nodes=(node,None), r_type='伴随'):
            res.append([len(i['id']), i.end_node['name'], str(i.end_node.labels)[1:], i['id']])
        return res

    def getZhiliao(self, key):
        node = self.getNode(key)
        res = []
        if node is None:
            return res
        for i in self.re_matcher.match(nodes=(None,node), r_type='治疗'):
            res.append([len(i['id']), i.start_node['name'], str(i.start_node.labels)[1:], i['id']])
            
        for i in self.re_matcher.match(nodes=(node,None), r_type='治疗'):
            res.append([len(i['id']), i.end_node['name'], str(i.end_node.labels)[1:], i['id']])
        return res

    def getZuchen(self, key):
        node = self.getNode(key)
        res = []
        if node is None:
            return res
        for i in self.re_matcher.match(nodes=(None,node), r_type='组成'):
            res.append([len(i['id']), i.start_node['name'], str(i.start_node.labels)[1:], i['id']])
            
        for i in self.re_matcher.match(nodes=(node,None), r_type='组成'):
            res.append([len(i['id']), i.end_node['name'], str(i.end_node.labels)[1:], i['id']])
        return res    
    
    def getQuestion2Plant(self, key):
        node = self.getNode(key)
        res = {}
        if node is None:
            return res        
        for i in self.re_matcher.match(nodes=(None,node), r_type='治疗'):
            if str(i.start_node.labels) == ':Plant':
                for single_id in i['id']:
                    res.setdefault(single_id, []).append(i.start_node['name'])
        return res
    
handler = NeoGraph()

In [3]:
confidence = {1:"可信度5%", 
              2:"可信度10%",
              3:"可信度15%",
              4:"可信度30%",
              5:"可信度40%",
              6:"可信度50%",
              7:"可信度60%"}

def getJson(word):
    res = {}
    bansui = handler.getBansui(word)
    zhiliao = handler.getZhiliao(word)
    zuchen = handler.getZuchen(word)
    if len(bansui)>10:
        bansui = sorted(bansui, key=lambda x:x[0])[-8:]
    if len(zhiliao)>10:
        zhiliao = sorted(zhiliao, key=lambda x:x[0])[-8:]
    if len(zuchen)>10:
        zuchen = sorted(zuchen, key=lambda x:x[0])[-8:]
    for i in bansui:
        i[0] = "伴随：" + confidence.get(i[0],"可信度90%")
    for i in zhiliao:
        i[0] = "治疗：" + confidence.get(i[0],"可信度90%")
    for i in zuchen:
        i[0] = "组成：" + confidence.get(i[0],"可信度90%")
    bansui.extend(zhiliao)
    bansui.extend(zuchen)
    data = {}
    data['entity'] = word
    data['avp'] = bansui
    res['message'] = "success"
    res['data'] = data
    return res

In [4]:
def getPlant2Question(res):
    result = {}
    for key in res:
        result.setdefault(hash(''.join(sorted(res[key]))), []).append(key)
    return result

def getPlantCluster(disease, single_num=5, multiple_num=5):
    single = []
    multiple = []
    singleid = []
    multipleid = []
    # 根据疾病名称找到对应的中药{123:[云南白药，白术],124:[三七]}
    id2plant = handler.getQuestion2Plant(disease)
    # 根据中药集合找到问题{Plants_hash:[123,125]}
    plant2id = getPlant2Question(id2plant)
    # 找到拥有问题id数最多的中药集合前5名返回
    for key in sorted(plant2id, key=lambda x:len(plant2id[x]),reverse=True):
        tem = id2plant[plant2id[key][0]]
        if len(tem) == 1:
            single.append(tem)
            singleid.append(plant2id[key])
        else:
            multiple.append(tem)
            multipleid.append(plant2id[key])
    return single[:single_num], multiple[:multiple_num], singleid[:single_num], multipleid[:multiple_num]
        
drug2plant = {}
with open('drug_plant_relation.txt') as f:
    for line in f.readlines():
        a,b,c = line.split('\t')
        drug2plant[a+'-'+b] = re.sub('（.*?）', '', c.replace(' ',''))

def getF1score(cluster, drugDescription):
    TP,FP = 0,0
    plant_num_assess = ceil(len(drugDescription)/4)
    for i in cluster:
        if i in drugDescription:
            TP+=1
            plant_num_assess-=1
        else:
            FP+=1
    FN = plant_num_assess if plant_num_assess>=0 else 0
    if TP == 0:
        return 0
    p = TP/(TP+FP)
    r = TP/(TP+FN)
    return 2*p*r/(p+r)

def findDrug(clusters, match_rate=0.3):
    res = []
    for cluster,id in zip(clusters,range(len(clusters))):
        for key in drug2plant:
            score = getF1score(cluster, drug2plant[key])
            if score >= match_rate:
                #res.append([score, key, drug2plant[key], cluster])
                res.append([id, round(score, 2), key])
    return sorted(res, key = lambda x:x[1], reverse = True)

In [5]:
import pymysql, re

def getList(ids):
    coon = pymysql.connect(
    host = '10.15.82.58',user = 'root',passwd = '123',
    port = 3306,db = 'qa',charset = 'utf8'
    )
    cur = coon.cursor()
    res = {}
    for id in ids.split(','):
        cur.execute("select qa_q from qa_new where qa_id=" + id)
        tem = cur.fetchall()
        #res[re.sub('<.*?>', '', tem[0][0])] = re.sub('<.*?>', '', tem[0][1])
        if tem[0][0] not in res.values():
            res[id] = tem[0][0]
    return res

In [None]:
from flask import Flask, request
from flask_cors import CORS,cross_origin

app = Flask(__name__)

@app.route('/main', methods=['GET', 'POST'])
@cross_origin()
def callback_main():
    entity = request.args.get("entity") or "EOF"
    try:
        res = getJson(entity)
        return json.dumps(res, ensure_ascii=False, indent=4)
    except:
        return json.dumps({'message':'error'})

@app.route('/disease', methods=['GET', 'POST'])
@cross_origin()
def callback_disease():
    entity = request.args.get("entity") or "EOF"
    try:
        res = {}
        res['entity'] = entity
        res['single'], res['multiple'], res['singleid'], res['multipleid'] =  getPlantCluster(entity, single_num=5, multiple_num=5)
        res['single_drug'] = findDrug(res['single'], match_rate=1)[:5]
        res['multiple_drug'] = findDrug(res['multiple'], match_rate=0.2)[:5]
        return json.dumps(res, ensure_ascii=False, indent=4)
    except:
        return json.dumps({'message':'error'})

@app.route('/QAlist', methods=['GET', 'POST'])
@cross_origin()
def callback_list():
    id = request.args.get("id") or "EOF"
    return json.dumps(getList(id), ensure_ascii=False, indent=4)
    
if __name__ == '__main__':
    app.run(host='0.0.0.0',port=5787)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://0.0.0.0:5787/ (Press CTRL+C to quit)
10.190.78.101 - - [09/Mar/2020 10:45:04] "[37mGET /main?entity=%E4%B8%81%E9%A6%99 HTTP/1.1[0m" 200 -
10.190.78.101 - - [09/Mar/2020 10:45:29] "[37mGET /disease?entity=%E5%92%B3%E5%97%BD HTTP/1.1[0m" 200 -
10.190.78.101 - - [09/Mar/2020 10:46:09] "[37mGET /QAlist?id=1394703,1418136,1420788,1420896,1421356,1499477,1499743,1499949,1500138,1500230,1500642,1500985,1501087,1501707,1560596,1662167,1669361,1672386,1828673,1867359,1888219,1888223,1888231,1888243,1888264,1888265,1888427,1888829,1888842,1889123,1946651,1947866,1948147,1958353,2000139,2002542,2006937,2015539,2017545,2022035,2028555,2033834,2040610,2041027,2041066,2041133,2041142,2041165,2041307,2041361,2041402,2041492,2041923,2042637,2043096,2043119,2043243,2043305,2043466,2043470,2043664,2043942,2044123,2044209,2044516,2044544,2044677,2124013,2139890,2155136,2155137,2220359,2250976,2253605,2254709,2262580,2267844,2388241,2389678,2391999,2400501,2413841,2637779,2639442