In [17]:
import os
import ahocorasick
from tqdm import tqdm
# 亦可以使用下面语句完成单一单元格中多个变量的输出，但是仅在当前notebook中起作用
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [36]:
import os
import ahocorasick

class QuestionClassifier:
    def __init__(self):
        # cur_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1])
        cur_dir = os.getcwd()
        #　特征词路径
        self.piece_path = os.path.join(cur_dir, 'dict/piece.txt')
        self.composer_path = os.path.join(cur_dir, 'dict/composer.txt')
#         self.person_path = os.path.join(cur_dir, 'dict/person.txt')
#         self.musicstyle_path = os.path.join(cur_dir, 'dict/musicstyle_cntoen.txt')        
        self.deny_path = os.path.join(cur_dir, 'dict/deny.txt')
        
        # 加载特征词


        self.piece_wds = [i.strip() for i in open(self.piece_path,encoding="utf-8") if i.strip()]
        self.composer_wds = [i.strip() for i in open(self.composer_path,encoding="utf-8") if i.strip()]
#         self.person_wds = [i.strip() for i in open(self.person_path,encoding="utf-8") if i.strip()]
#         self.musicstyle_wds = [i.strip() for i in open(self.musicstyle_path,encoding="utf-8") if i.strip()]
#         self.region_words = set(self.piece_wds + self.composer_wds + self.person_wds + self.musicstyle_wds)
        self.region_words = set(self.piece_wds + self.composer_wds)    
        self.deny_words = [i.strip() for i in open(self.deny_path,encoding="utf-8") if i.strip()]
        
        # 构造领域actree
        self.region_tree = self.build_actree(list(self.region_words))
        # 构建词典
        self.wdtype_dict = self.build_wdtype_dict()

        
        self.composer_qwds = [ '创作的', '写的', '演奏的','作者', '哪位', '作曲家']
        self.piece_qwds = [ '什么曲子', '哪首曲子', '哪首乐曲', '曲子', '乐曲']
        
        self.musicstyle_qwds = ['属于什么', '属于', '什么类型','音乐风格', '音乐类型', '风格', '主义']
        self.recommend_qwds = ['有什么好听的', '推荐', '类似',  '一起',  '相似', '相同', '好听', '类似']
        self.relation_qwds = ['关系', '认识']
        self.life_qwds = ['出生', '生日', '出生地', '老家', '国籍', '死亡', '去世', '逝去', '一生']
        
        print('model init finished ......')
        return

    '''分类主函数'''
    def classify(self, question):
        data = {}
        music_dict = self.check_music(question)
        if not music_dict:
            return {}
        data['args'] = music_dict
        #收集问句当中所涉及到的实体类型
        types = []
        for type_ in music_dict.values():
            types += type_
        question_type = 'others'

        question_types = []

# 查询乐曲
        if self.check_words(self.composer_qwds, question) and ('piece' in types):
            question_type = 'piece_composer'
            question_types.append(question_type)

# 查询乐曲的信息
        if self.check_words(self.piece_qwds, question) and ('piece' in types):
            question_type = 'piece_info'
            question_types.append(question_type)

# 编曲关系
        if self.check_words(self.piece_qwds, question) and ('composer' in types):
            question_type = 'composer_piece'
            question_types.append(question_type)
            
# 作曲风格
        if self.check_words(self.musicstyle_qwds, question) and ('composer' in types):
            question_type = 'composer_musicstyle'
            question_types.append(question_type)
            
# 乐曲风格
        if self.check_words(self.musicstyle_qwds, question) and ('piece' in types):
            question_type = 'piece_musicstyl'
            question_types.append(question_type)

# 风格介绍
        if self.check_words(self.musicstyle_qwds, question) and ('musicstyle' in types):
            question_type = 'musicstyle_info'
            question_types.append(question_type)                               
                               
                               
# 根据曲子推荐
        if self.check_words(self.recommend_qwds, question) and ('piece' in types):
            question_type = 'piece_recommend'
            question_types.append(question_type)
# 根据作曲家推荐        
        if self.check_words(self.recommend_qwds, question) and ('composer' in types):
            question_type = 'composer_recommend'
            question_types.append(question_type)
# 根据类型推荐              
        if self.check_words(self.recommend_qwds, question) and ('musicstyle' in types):
            question_type = 'musicstyle_recommend'
            question_types.append(question_type)
# 与作曲家的关系
        if self.check_words(self.relation_qwds, question) and ('composer' in types):
            question_type = 'composer_relation'
            question_types.append(question_type)
# 作曲家与某人的关系                            
        if self.check_words(self.relation_qwds, question) and ('person' in types):
            question_type = 'composer_person'
            question_types.append(question_type)       
# 作曲家信息 
        if self.check_words(self.life_qwds, question) and ('composer' in types):
            question_type = 'composer_life'
            question_types.append(question_type)
                               
        # 将多个分类结果进行合并处理，组装成一个字典
        data['question_types'] = question_types

        return data

    '''构造词对应的类型'''
    def build_wdtype_dict(self):
        wd_dict = dict()
        for wd in self.region_words:
            wd_dict[wd] = []
            if wd in self.composer_wds:
                wd_dict[wd].append('composer')
            if wd in self.piece_wds:
                wd_dict[wd].append('piece')
#             if wd in self.person_wds:
#                 wd_dict[wd].append('person')
#             if wd in self.type_wds:
#                 wd_dict[wd].append('musicstyle')

        return wd_dict

    '''构造actree，加速过滤'''
    def build_actree(self, wordlist):
        actree = ahocorasick.Automaton()
        for index, word in enumerate(wordlist):
            actree.add_word(word, (index, word))
        actree.make_automaton()
        return actree

    '''问句过滤'''
    def check_music(self, question):
        region_wds = []
        for i in self.region_tree.iter(question):
            wd = i[1][1]
            region_wds.append(wd)
        stop_wds = []
        for wd1 in region_wds:
            for wd2 in region_wds:
                if wd1 in wd2 and wd1 != wd2:
                    stop_wds.append(wd1)
        final_wds = [i for i in region_wds if i not in stop_wds]
        final_dict = {i:self.wdtype_dict.get(i) for i in final_wds}

        return final_dict

    '''基于特征词进行分类'''
    def check_words(self, wds, sent):
        for wd in wds:
            if wd in sent:
                return True
        return False

In [37]:
class QuestionPaser:

    '''构建实体节点'''
    def build_entitydict(self, args):
        entity_dict = {}
        for arg, types in args.items():
            for type in types:
                if type not in entity_dict:
                    entity_dict[type] = [arg]
                else:
                    entity_dict[type].append(arg)

        return entity_dict

    '''解析主函数'''
    def parser_main(self, res_classify):
        args = res_classify['args']
        entity_dict = self.build_entitydict(args)
        question_types = res_classify['question_types']
        sqls = []
        for question_type in question_types:
            sql_ = {}
            sql_['question_type'] = question_type
            sql = []
            if question_type == 'piece_info':
                sql = self.sql_transfer(question_type, entity_dict.get('piece'))
                
            elif question_type == 'piece_composer':
                sql = self.sql_transfer(question_type, entity_dict.get('piece'))

            elif question_type == 'composer_piece':
                sql = self.sql_transfer(question_type, entity_dict.get('composer'))

            elif question_type == 'composer_musicstyle':
                sql = self.sql_transfer(question_type, entity_dict.get('composer'))
                
            elif question_type == 'piece_musicstyl':
                sql = self.sql_transfer(question_type, entity_dict.get('piece'))

            elif question_type == 'musicstyle_info':
                sql = self.sql_transfer(question_type, entity_dict.get('musicstyle'))
                
            elif question_type == 'piece_recommend':
                sql = self.sql_transfer(question_type, entity_dict.get('piece'))

            elif question_type == 'composer_recommend':
                sql = self.sql_transfer(question_type, entity_dict.get('composer'))
                
            elif question_type == 'musicstyle_recommend':
                sql = self.sql_transfer(question_type, entity_dict.get('musicstyle'))

            elif question_type == 'composer_relation':
                sql = self.sql_transfer(question_type, entity_dict.get('composer'))                
                
            elif question_type == 'composer_person':
                sql = self.sql_transfer(question_type, entity_dict.get('person'))

            elif question_type == 'composer_life':
                sql = self.sql_transfer(question_type, entity_dict.get('composer')) 
                

            if sql:
                sql_['sql'] = sql

                sqls.append(sql_)

        return sqls

    '''针对不同的问题，分开进行处理'''
    def sql_transfer(self, question_type, entities):
        if not entities:
            return []

        # 查询语句
        sql = []
        # 乐曲的信息
        if question_type == 'piece_info':
            sql = ["MATCH (m:piece) where m.name = '{0}' return m.name, m.opus, m.composer, m.musicstyle".format(i) for i in entities]

        # 作曲家和曲子的关系
        elif question_type == 'piece_composer':
            sql = ["MATCH (m:piece) where m.name = '{0}' return m.name, m.composer".format(i) for i in entities]
            
        # 作曲家和曲子的关系
        elif question_type == 'composer_piece':
            sql = ["MATCH (m:piece) where m.composer = '{0}' return m.name, m.opus, m.musicstyle".format(i) for i in entities]

        # 作曲家的音乐风格
        elif question_type == 'composer_musicstyle':
            sql = ["MATCH (m:composer) where m.name = '{0}' return m.name, m.musicstyle".format(i) for i in entities]

        # 乐曲音乐风格
        elif question_type == 'piece_musicstyl':
            sql = ["MATCH (m:piece) where m.name = '{0}' return m.name, m.composer, m.musicstyle".format(i) for i in entities]

        # 音乐风格信息
        elif question_type == 'musicstyle_info':
            sql = ["MATCH (m:musicstyle) where m.name = '{0}' return m.name, m.info".format(i) for i in entities]

        # 根据乐曲推荐
        elif question_type == 'piece_recommend':
            sql = ["MATCH (m:piece) where m.musicstyle = (MATCH (n:piece) where n.name = '{0}' return m.musicstyle) return m.name, m.opus, m.composer".format(i) for i in entities]

        # 根据作曲家推荐
        elif question_type == 'composer_recommend':
            sql = ["MATCH (m:piece) where m.composer = '{0}' return m.name, m.opus, m.composer".format(i) for i in entities]

        # 根据风格推荐
        elif question_type == 'musicstyle_recommend':
            sql = ["MATCH (m:piece) where m.musicstyle = '{0}' return m.name, m.composer, m.opus".format(i) for i in entities]

        # 和作曲家关系
        elif question_type == 'composer_relation':
            sql = ["MATCH (m:Disease)-[r:has_symptom]->(n:Symptom) where n.name = '{0}' return m.name, r.name, n.name".format(i) for i in entities]

        # 非作曲家和作曲家关系
        elif question_type == 'composer_person':
            sql = ["MATCH (m:composer)-[r:friend]->(n:composer) where m.name = '{0}' and n.name = '{1}'"]
        # 作曲家信息/一生
        elif question_type == 'composer_life':
            sql = ["MATCH (m:composer) where m.name = '{0}' return m.name, m.birth_time, m.birthplace, m.daeth_time".format(i) for i in entities]


        return sql

In [38]:
from py2neo import Graph

class AnswerSearcher:
    def __init__(self):
        # self.g = Graph("http://localhost:7474", username="neo4j", password="tangyudiadid0")
        self.g = Graph("http://localhost:7474", auth=("neo4j", "12345678"), name = "neo4j")
        self.num_limit = 4

    '''执行cypher查询，并返回相应结果'''
    def search_main(self, sqls):
        final_answers = []
        for sql_ in sqls:
            question_type = sql_['question_type']
            queries = sql_['sql']
            answers = []
            for query in queries:
                ress = self.g.run(query).data()
                answers += ress
            final_answer = self.answer_prettify(question_type, answers)
            if final_answer:
                final_answers.append(final_answer)
        return final_answers

    '''根据对应的qustion_type，调用相应的回复模板'''
    def answer_prettify(self, question_type, answers):
        final_answer = []
        if not answers:
            return ''
        if question_type == 'piece_info':

#             name_desc = [i['m.name'] for i in answers]
            opus_desc = [i['m.opus'] for i in answers]
            composer_desc = [i['m.composer'] for i in answers]
            musicstyle_desc = [i['m.musicstyle'] for i in answers]
            subject = answers[0]['m.name']
            final_answer = '{0}编号为{1}是由：{2}创作的，{3}类型的音乐'.format(subject, ';'.join(list(set(opus_desc))[:self.num_limit]),';'.join(list(set(composer_desc))[:self.num_limit]),';'.join(list(set(musicstyle_desc))[:self.num_limit]))


        elif question_type == 'piece_composer':
#             m.name, m.composer
            composer_desc = [i['m.composer'] for i in answers]
            subject = answers[0]['m.name']
            
            final_answer = '{0}的创作风格为{1}类型'.format(subject, ';'.join(list(set(composer_desc))[:self.num_limit]))          
            
        elif question_type == 'composer_piece':
#             m.name, m.opus, m.musicstyle
            name_desc = [i['m.name'] for i in answers]
            opus_desc = [i['m.opus'] for i in answers]
            musicstyle_desc = [i['m.musicstyle'] for i in answers]
            subject = answers[0]['m.composer']
            
            final_answer = '{0}创作了{1},编号位{2},属于{3}类型'.format(subject, ';'.join(list(set(name_desc))[:self.num_limit]),';'.join(list(set(opus_desc))[:self.num_limit]),';'.join(list(set(musicstyle_desc))[:self.num_limit]))
            
        elif question_type == 'composer_musicstyle':
#             m.name, m.musicstyle
            musicstyle_desc = [i['m.musicstyle'] for i in answers]
            subject = answers[0]['m.name']
            
            final_answer = '{0}的创作风格为{1}类型'.format(subject, ';'.join(list(set(musicstyle))[:self.num_limit]))
            
            
        elif question_type == 'piece_musicstyl':
#             m.name, m.composer, m.musicstyle
            composer_desc = [i['m.composer'] for i in answers]
            musicstyle_desc = [i['m.musicstyle'] for i in answers]
            subject = answers[0]['m.name']
            
            final_answer = '{0}的作者是{1},属于{2}类型'.format(subject, ';'.join(list(set(composer_desc))[:self.num_limit]),';'.join(list(set(musicstyle_desc))[:self.num_limit]))
            
        elif question_type == 'musicstyle_info':
#             m.name, m.info
            info_desc = [i['m.info'] for i in answers]
            subject = answers[0]['m.name']
            
            final_answer = '{0}类型：{1}'.format(subject, ';'.join(list(set(info_desc))[:self.num_limit]))
                        
        elif question_type == 'piece_recommend':
#             m.name, m.opus, m.composer
            composer_desc = [i['m.composer'] for i in answers]
            opus_desc = [i['m.opus'] for i in answers]
            subject = answers[0]['m.name']
            
            final_answer = '{0}的作者是{1},编号为{2}'.format(subject, ';'.join(list(set(composer_desc))[:self.num_limit]),';'.join(list(set(opus_desc))[:self.num_limit]))

        elif question_type == 'composer_recommend':
#             m.name, m.opus
            composer_desc = [i['m.composer'] for i in answers]
            opus_desc = [i['m.opus'] for i in answers]
            subject = answers[0]['m.name']
            
            final_answer = '{0}的作者是{1},编号为{2}'.format(subject, ';'.join(list(set(composer_desc))[:self.num_limit]),';'.join(list(set(opus_desc))[:self.num_limit]))
           
        elif question_type == 'musicstyle_recommend':
#             m.name, m.composer, m.opus
            composer_desc = [i['m.composer'] for i in answers]
            opus_desc = [i['m.opus'] for i in answers]
            subject = answers[0]['m.name']
            
            final_answer = '{0}的作者是{1},编号为{2}'.format(subject, ';'.join(list(set(composer_desc))[:self.num_limit]),';'.join(list(set(opus_desc))[:self.num_limit]))
                        
        elif question_type == 'composer_life':
#             m.name, m.birth_time, m.birthplace, m.daeth_time
            birth_time_desc = [i['m.birth_time'] for i in answers]
            daeth_time_desc = [i['m.daeth_time'] for i in answers]
            birthplace_desc = [i['m.birthplace'] for i in answers]
            subject = answers[0]['m.name']
            
            final_answer = '{0}于{1}出生于{2}在{3}去世，'.format(subject, ';'.join(list(set(birth_time_desc))[:self.num_limit]),';'.join(list(set(daeth_time_desc))[:self.num_limit]),';'.join(list(set(birthplace_desc))[:self.num_limit]))
   
        return final_answer

searcher = AnswerSearcher()


In [39]:
classifier = QuestionClassifier()

parser = QuestionPaser()

searcher = AnswerSearcher()

model init finished ......


In [40]:
question_list = [
    "路德维希·范·贝多芬写了哪些曲子",
    "安魂曲的作者是谁",
    "路德维希·范·贝多芬的风格是什么",
    "安魂曲是什么类型的音乐",
    "你能给我推荐类似安魂曲的曲子吗",
    "你能给我推荐路德维希·范·贝多芬的曲子吗",
    "能给我介绍一下路德维希·范·贝多芬的一生吗"
] 

In [41]:
for question in question_list:
    res_classify = classifier.classify(question)
    print(res_classify)
    res_cypher = parser.parser_main(res_classify)
    print(res_cypher)

{'args': {'路德维希·范·贝多芬': ['composer']}, 'question_types': ['composer_piece']}
[{'question_type': 'composer_piece', 'sql': ["MATCH (m:piece) where m.composer = '路德维希·范·贝多芬' return m.name, m.opus, m.musicstyle"]}]
{'args': {'安魂曲': ['piece']}, 'question_types': ['piece_composer']}
[{'question_type': 'piece_composer', 'sql': ["MATCH (m:piece) where m.name = '安魂曲' return m.name, m.composer"]}]
{'args': {'路德维希·范·贝多芬': ['composer']}, 'question_types': ['composer_musicstyle']}
[{'question_type': 'composer_musicstyle', 'sql': ["MATCH (m:composer) where m.name = '路德维希·范·贝多芬' return m.name, m.musicstyle"]}]
{'args': {'安魂曲': ['piece']}, 'question_types': ['piece_musicstyl']}
[{'question_type': 'piece_musicstyl', 'sql': ["MATCH (m:piece) where m.name = '安魂曲' return m.name, m.composer, m.musicstyle"]}]
{'args': {'安魂曲': ['piece']}, 'question_types': ['piece_info', 'piece_recommend']}
[{'question_type': 'piece_info', 'sql': ["MATCH (m:piece) where m.name = '安魂曲' return m.name, m.opus, m.composer, m.mus