In [2]:
import networkx as nx
author_graph = nx.read_gpickle("author_graph.gpickle")

### Helper functions

### Most Occuring Keyword in Author's Publications:
Param: author graph, name of author to be inspected, number of keywords to be returned

Return: dictionary of most occuring keywords mapped to count of each keyword

In [4]:
def relevant_keywords(author_name, author_graph, return_count=20):
    keywords = {}
    publications = author_graph.nodes[author_name]["Publications"]
    for publication in publications:
        pub_list = (str)(publication).split()
        for word in pub_list:
            word = word.replace(".", "")
            word = word.replace("?", "")
            if len(word) > 4:
                if word in keywords and word[0].isupper():
                    keywords.update({word : keywords[word] + 1})
                else:
                    keywords.update({word : 1})
    keywords = sorted(keywords.items(), key=lambda item: item[1], reverse=True)
    return keywords[0:return_count]

In [6]:
print(len(author_graph.nodes["Wei Li"]["Publications"]))
print(type(author_graph))
print(relevant_keywords("Wei Li", author_graph, 20))

226
<class 'networkx.classes.graph.Graph'>
[('Based', 44), ('Network', 16), ('System', 14), ('Neural', 13), ('Method', 12), ('Learning', 12), ('Algorithm', 11), ('Networks', 11), ('Study', 11), ('Using', 11), ('Model', 11), ('Research', 10), ('Design', 10), ('Analysis', 9), ('Classification', 7), ('Detection', 7), ('Internet', 6), ('Segmentation', 6), ('Power', 6), ('Optimization', 5)]


### Most Occuring Keyword in Coauthor's Publications:

Finds most occuring keywords in publication titles of every coauthor of author

Param: author graph, name of author to be inspected, number of keywords to be returned

Return: dictionary of most occuring keywords mapped to count of each keyword

In [3]:
def coauthor_relevant_keywords(author_name, author_graph, return_count=20):
    keywords = {}
    coauthor_list = list(nx.dfs_preorder_nodes(author_graph, source=author_name, depth_limit=1))
    for coauthor in coauthor_list:
        if coauthor == author_name:
            continue
        coauthor_keywords = relevant_keywords(coauthor)
        for word in coauthor_keywords:
            if word[0] in keywords:
                keywords.update({word[0] : keywords[word[0]] + word[1]})
            else:
                keywords.update({word[0] : 1})
    keywords = sorted(keywords.items(), key=lambda item: item[1], reverse=True)
    return keywords[0:return_count]

In [None]:
print(len(author_graph.nodes["Wei Li"]["Publications"]))
print(coauthor_relevant_keywords("Wei Li", author_graph, 20))

### Most Relevant Phrases in Author Publication Titles

In [10]:
import yake
def relevant_phrases(author_name, author_graph):
    kw_extractor = yake.KeywordExtractor()
    author_publications = author_graph.nodes[author_name]["Publications"]
    phrase_list = [] #list of relevant phrases to be returned
    for pub in author_publications:
        language = "en"
        max_ngram_size = 3
        deduplication_threshold = 0.9
        numOfKeywords = 3
        custom_kw_extractor = yake.KeywordExtractor(lan=language, n=max_ngram_size, dedupLim=deduplication_threshold, top=numOfKeywords, features=None)
        keywords = custom_kw_extractor.extract_keywords(pub)
        for kw in keywords:
            phrase_list.append(kw[0].lower())
    return phrase_list

In [None]:
relevant_phrases("Radu Timofte", author_graph)