In [1]:
from py2neo import Graph,Node,NodeMatcher,RelationshipMatcher
import json
import re
from math import ceil

In [2]:
class NeoGraph:
    def __init__(self):
        self.g = Graph(
            host="10.15.82.71",
            port=57687,
            user="neo4j",
            password="123")
        self.matcher = NodeMatcher(self.g)
        self.re_matcher = RelationshipMatcher(self.g)

    def getNode(self, key):
        return self.matcher.match(name = key).first()

    def getQuestion2Plant(self, key):
        node = self.getNode(key)
        res = {}
        if node is None:
            return res        
        for i in self.re_matcher.match(nodes=(None,node), r_type='治疗'):
            if str(i.start_node.labels) == ':Plant':
                for single_id in i['id']:
                    res.setdefault(single_id, []).append(i.start_node['name'])
        return res

handler = NeoGraph()

In [3]:
def getPlant2Question(res):
    result = {}
    for key in res:
        result.setdefault(hash(''.join(sorted(res[key]))), []).append(key)
    return result

def getPlantCluster(disease, single_num=5, multiple_num=5):
    single = []
    multiple = []
    # 根据疾病名称找到对应的中药{123:[云南白药，白术],124:[三七]}
    id2plant = handler.getQuestion2Plant(disease)
    # 根据中药集合找到问题{Plants_hash:[123,125]}
    plant2id = getPlant2Question(id2plant)
    # 找到拥有问题id数最多的中药集合前5名返回
    for key in sorted(plant2id, key=lambda x:len(plant2id[x]),reverse=True):
        tem = id2plant[plant2id[key][0]]
        if len(tem) == 1:
            single.append(tem)
        else:
            multiple.append(tem)
    return single[:single_num], multiple[:multiple_num]
        
drug2plant = {}
with open('drug_plant_relation.txt') as f:
    for line in f.readlines():
        a,b,c = line.split('\t')
        drug2plant[a+'-'+b] = re.sub('（.*?）', '', c.replace(' ',''))

计算图谱获得的方剂组成和知识库中方剂组成的F1-score

在图谱，在方剂 | 在图谱，不在方剂

不在图谱，在方剂 | 不在图谱，不在方剂

2   1

3   4

precision: 2/(2+1)

recall:    2/(2+3)

F1: 2*p*r/(p+r)

In [4]:
def getF1score(cluster, drugDescription):
    TP,FP = 0,0
    plant_num_assess = ceil(len(drugDescription)/4)
    for i in cluster:
        if i in drugDescription:
            TP+=1
            plant_num_assess-=1
        else:
            FP+=1
    FN = plant_num_assess if plant_num_assess>=0 else 0
    if TP == 0:
        return 0
    p = TP/(TP+FP)
    r = TP/(TP+FN)
    return 2*p*r/(p+r)

In [11]:
def findDrug(clusters, match_rate=0.3):
    res = []
    for cluster,id in zip(clusters,range(len(clusters))):
        for key in drug2plant:
            score = getF1score(cluster, drug2plant[key])
            if score >= match_rate:
                #res.append([score, key, drug2plant[key], cluster])
                res.append([id, round(score, 2), key])
    return sorted(res, key = lambda x:x[1], reverse = True)

In [13]:
disease = "慢性骨髓炎"
single, multiple = getPlantCluster(disease, single_num=5, multiple_num=5)
single_drug = findDrug(single, match_rate=1)[:5]
multiple_drug = findDrug(multiple, match_rate=0.2)[:5]
print(single, multiple, single_drug, multiple_drug, sep="\n-----------------------------------------------------------------------------------------------\n")

[['橄榄'], ['葡萄'], ['绣线菊'], ['铜皮'], ['木防己']]
-----------------------------------------------------------------------------------------------
[['山香', '隔山香'], ['茉莉', '臭茉莉'], ['当归', '葡萄']]
-----------------------------------------------------------------------------------------------
[[0, 1.0, '89115-黑香散'], [0, 1.0, '96174-橄榄饮'], [0, 1.0, '96177-橄榄散']]
-----------------------------------------------------------------------------------------------
[[2, 0.67, '33737-当归散'], [2, 0.67, '63942-独圣散'], [0, 0.5, '42812-护心胶囊'], [1, 0.5, '80479-接骨散'], [2, 0.5, '337-一味生新饮']]


In [14]:
from flask import Flask, request
from flask_cors import CORS,cross_origin

def create_app():
    app = Flask(__name__)
    @app.route('/', methods=['GET', 'POST'])
    @cross_origin()
    def callback():
        entity = request.args.get("entity") or "EOF"
        try:
            res = {}
            res['entity'] = entity
            res['single'], res['multiple'] =  getPlantCluster(entity, single_num=5, multiple_num=5)
            res['single_drug'] = findDrug(res['single'], match_rate=1)[:5]
            res['multiple_drug'] = findDrug(res['multiple'], match_rate=0.2)[:5]
            return json.dumps(res, ensure_ascii=False, indent=4)
        except:
            return json.dumps({'message':'error'})
    return app

app = create_app()

if __name__ == '__main__':
    app.run(host='0.0.0.0',port=5788)

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://0.0.0.0:5788/ (Press CTRL+C to quit)
222.205.5.158 - - [27/Nov/2019 15:05:41] "[37mGET /?entity=%E6%85%A2%E6%80%A7%E9%AA%A8%E9%AB%93%E7%82%8E HTTP/1.1[0m" 200 -
222.205.5.158 - - [27/Nov/2019 15:09:05] "[37mGET /?entity=%E6%85%A2%E6%80%A7%E9%AA%A8%E9%AB%93%E7%82%8E HTTP/1.1[0m" 200 -
222.205.5.158 - - [27/Nov/2019 15:12:39] "[37mGET /?entity=%E6%85%A2%E6%80%A7%E9%AA%A8%E9%AB%93%E7%82%8E HTTP/1.1[0m" 200 -
222.205.5.158 - - [27/Nov/2019 15:13:16] "[37mGET /?entity=%E6%85%A2%E6%80%A7%E9%AA%A8%E9%AB%93%E7%82%8E HTTP/1.1[0m" 200 -
222.205.5.158 - - [27/Nov/2019 15:14:30] "[37mGET /?entity=%E6%85%A2%E6%80%A7%E9%AA%A8%E9%AB%93%E7%82%8E HTTP/1.1[0m" 200 -
222.205.5.158 - - [27/Nov/2019 15:16:02] "[37mGET /?entity=%E6%85%A2%E6%80%A7%E9%AA%A8%E9%AB%93%E7%82%8E HTTP/1.1[0m" 200 -
222.205.5.158 - - [27/Nov/2019 15:17:47] "[37mGET /?entity=%E6%85%A2%E6%80%A7%E9%AA%A8%E9%AB%93%E7%82%8E HTTP/1.1[0m" 200 -
222.205.5.158 - - [27/Nov/2019 15:17:54] "[37mGET /?entity=