# DCSNS Bibliographic Networks

In [1]:
from IPython.display import clear_output
!pip install crossrefapi
clear_output()

import csv, re, random, operator, os, math, re, string, copy, itertools, pickle, datetime, pandas as pd, numpy as np, matplotlib.pyplot as plt, networkx as nx
from urllib.request import urlopen
from zipfile import ZipFile
from io import BytesIO
from collections import Counter, OrderedDict
from itertools import combinations, permutations
import operator
from wordcloud import WordCloud
from networkx.algorithms import community
import pygraphviz
from networkx.drawing.nx_agraph import graphviz_layout
from networkx.algorithms import community
import community as louvain
import spacy 
nlp = spacy.load('en_core_web_lg')

# crossrefapi
from crossref.restful import Journals
journals = Journals()
from crossref.restful import Works
works = Works()

# Plotting-related
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
# from matplotlib import pyplot as plt
# from matplotlib.gridspec import GridSpec
from matplotlib.ticker import FuncFormatter
import matplotlib.colors as mcolors
import matplotlib._color_data as mcd
%matplotlib inline
import seaborn as sns
import plotly
from plotly import tools
# import plotly.plotly as py
import chart_studio.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
#connects JS to notebook so plots work inline
init_notebook_mode(connected=True)

import bokeh
from bokeh.io import push_notebook, show, output_notebook, save
import bokeh.plotting as bp
from bokeh.plotting import figure, save, output_file, show #, from_networkx
from bokeh.models import (ColumnDataSource, LabelSet, Label, BoxSelectTool, Circle, EdgesAndLinkedNodes, HoverTool,MultiLine, NodesAndLinkedEdges, Plot, Range1d, TapTool,)
from holoviews.element.graphs import layout_nodes
# bokeh.sampledata.download()
from bokeh.sampledata.airport_routes import routes, airports

output_notebook()
import holoviews as hv
from holoviews import dim, opts
hv.extension('bokeh', 'matplotlib')
from holoviews.operation import  gridmatrix
from holoviews.operation.datashader import datashade, bundle_graph
from holoviews import Graph, Nodes
from holoviews.plotting.bokeh import GraphPlot, LabelsPlot
import hvplot.networkx as hvnx
import hvplot.pandas

import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning) 
warnings.simplefilter('ignore')


pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.



In [2]:
def g_diagnostics(G,st):
    print("The %s has %i nodes and %i edges" %(st,len(G.nodes()), len(G.edges())),'\n')
    if G.is_directed()==True:
        print("The %s is a directed graph" %st)
    else:
        print("The %s is an undirected graph" %st) 
    if G.is_multigraph()==True:
        print("The %s is a multigraph" %st)
    else:
        print("The %s is a simple graph" %st)
    if nx.is_weighted(G)==True:
        print("The %s is a weighted graph" %st)
    else:
        print("The %s is an unweighted graph" %st) 
    if nx.is_bipartite(G)==True:
        print("The %s is a bipartite graph" %st)
    else:
        print("The %s is not a bipartite graph" %st)    
    if nx.is_tree(G)==True:
        print("The %s is a tree \n" %st)
    else:
        print("The %s is not a tree \n" %st )   
    if G.is_directed()==False:
        if nx.is_connected(G)==True:
            print("The %s is a connected graph" %st)
        else:
            print("The %s is a disconnected graph and it has %i connected components" %(st,nx.number_connected_components(G)))   
            giant = max(nx.connected_components(G), key=len)
            Glcc=G.subgraph(giant)
            print("The largest connected component of this graph has %i nodes and %i edges" %(len(Glcc.nodes()),len(Glcc.edges())))
    else:
        if nx.is_strongly_connected(G)==True:
            print("The %s is a strongly connected graph" %st)
        else:
            print("The %s is not strongly connected and it has %i strongly connected components" %(st,nx.number_strongly_connected_components(G)))
            giant = max(nx.strongly_connected_components(G), key=len)
            Glcc=G.subgraph(giant)
            print("The largest strongly connected component of this graph has %i nodes and %i edges" %(len(Glcc.nodes()),len(Glcc.edges())))
        if nx.is_weakly_connected(G)==True:
            print("The %s is a weakly connected graph" %st)
        else:
            print("The %s is not weakly connected and it has %i weakly connected components" %(st,nx.number_weakly_connected_components(G)))
            giant = max(nx.weakly_connected_components(G), key=len)
            Glwc=G.subgraph(giant)
            print("The largest weakly connected component of this graph has %i nodes and %i edges" %(len(Glwc.nodes()),len(Glwc.edges())))
    if len(list(nx.isolates(G)))>0:
        print("The %s has %i isolates \n" %(st,len(list(nx.isolates(G)))))
    else:
        print("The %s has no isolates \n" %st)
    print("The density of the %s is %.3f" %(st,nx.density(G)))  
    try:
        print("The transitivity of the %s is %.3f" %(st,nx.transitivity(G)) )
    except Exception as e: 
        print("Transitivity not computable: "+str(e))
    if G.is_directed()==True:
        print("The reciprocity of the %s is %.3f" %(st,nx.reciprocity(G)))
    if G.is_directed()==False:
        try:
            print("The average clustering of the %s is %.3f" %(st,nx.algorithms.cluster.average_clustering(G)))
        except Exception as e: 
            print("Average clustering not computable: "+str(e))
        if nx.is_connected(G)==True:
            print("The average shortest path length of the %s is %.3f" %(st,nx.algorithms.shortest_paths.generic.average_shortest_path_length(G)))  
    try:
        diameter=nx.diameter(G)
        print("The diameter of the %s is %i" %(st,diameter))
    except Exception as e: 
        print("Diameter not computable: "+str(e))

In [3]:
def hvnx_plot(G,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
              title,bundled,nodelabels,xoffset,yoffset,
              arrowhead_length,selection_mode,selection_policy,
              edge_hover_line_color,node_hover_fill_color,
              fontsize,text_font_size, text_color,bgcolor):
    if nx.is_directed(G)==True:
        in_neighbors={} #arriving_airlines={}
        for n in G.nodes():
            t=list(G.predecessors(n))
            in_neighbors[n]=", ".join(sorted([x for x in t]))
#         print(in_neighbors)
        out_neighbors={} #departing_airlines={}
        for n in G.nodes():
            t=list(G.successors(n))
            out_neighbors[n]=", ".join(sorted([x for x in t])) 
#         print(out_neighbors)
        if ctype==1:
            communities_generator = community.girvan_newman(G)
            top_level_communities = next(communities_generator)
            next_level_communities = next(communities_generator)
            lc=sorted(sorted(map(sorted, next_level_communities)), key=len,reverse=True)
            partition={n:i for i,c in enumerate(lc) for n in c }
            print("No. of Girvan-Newman communities",len(set(partition.values())))
            for n in G.nodes():
                G.nodes[n]['Girvan_Newman_community'] = partition[n]
#                 if (n,n) in G.edges():
#                     G.nodes[n]['in-degree'] = 0
#                     G.nodes[n]['in-neighbors'] = ""
#                     G.nodes[n]['out-degree'] = 0
#                     G.nodes[n]['out-neighbors'] = ""
#                 else:    
                G.nodes[n]['in-degree'] = G.in_degree(n)
                G.nodes[n]['in_neighbors'] = in_neighbors[n] 
#                     G.nodes[n]['arriving airlines'] = arriving_airlines[n] 
                G.nodes[n]['out-degree'] = G.out_degree(n)
                G.nodes[n]['out_neighbors'] = out_neighbors[n] 
#                     G.nodes[n]['departing airlines'] = departing_airlines[n] 
            graph = hvnx.draw(G, pos)
            graph.opts(edge_color=edge_color,edge_line_width=edge_line_width,node_size=node_size,node_color='Girvan_Newman_community',node_cmap=node_cmap)
            if bundled==0:                
                graph.opts(selection_policy=selection_policy,title=title,edge_hover_line_color=edge_hover_line_color,node_hover_fill_color=node_hover_fill_color,fontsize=fontsize,width=width,height=height,arrowhead_length=arrowhead_length) #,tools=tools) #,'box_zoom',"tap"])
                if nodelabels==1:
                    labels = hv.Labels(graph.nodes, ['x', 'y'], 'index')
                    graph=(graph * labels.opts(xoffset=xoffset, yoffset=yoffset,text_font_size=text_font_size, text_color=text_color, bgcolor=bgcolor))
                    print(labels)
                    return graph
                else:
                    return graph
            else:
                graph = bundle_graph(graph)
                graph.opts(selection_policy=selection_policy,title=title,edge_hover_line_color=edge_hover_line_color,node_hover_fill_color=node_hover_fill_color,fontsize=fontsize,width=width,height=height,arrowhead_length=arrowhead_length) #,tools=tools) #,'box_zoom',"tap"])
                if nodelabels==1:
                    labels = hv.Labels(graph.nodes, ['x', 'y'], 'index')
                    graph=(graph * labels.opts(xoffset=xoffset, yoffset=yoffset,text_font_size=text_font_size, text_color=text_color, bgcolor=bgcolor))
                    return graph
                else:
                    return graph
        if ctype==0:
            print("Louvain communities are not computable for directed graphs!")
    else:
        connections={}
        for n in G.nodes():
            t=list(G.neighbors(n))
            connections[n]=", ".join(sorted([x for x in t]))
#         connected_components={}
#         Gcc = sorted(connected_component_subgraphs(G), key = len, reverse=True)
#         ccl=[list(g.nodes) for g in Gcc]
#     #     ccl=sorted(ccl, key=lambda item: len(item[1]), reverse=True)
#         for i,j in enumerate(ccl):
#             for n in j:
#                 connected_components[n]=i 
        if ctype==1:
            communities_generator = community.girvan_newman(G)
            top_level_communities = next(communities_generator)
            next_level_communities = next(communities_generator)
            lc=sorted(sorted(map(sorted, next_level_communities)), key=len,reverse=True)
            partition={n:i for i,c in enumerate(lc) for n in c }
#             print("No. of connected components",len(ccl))
            print("No. of Girvan-Newman communities",len(set(partition.values())))
            for n in G.nodes():
                G.nodes[n]['Girvan-Newman_community'] = partition[n]
#                 if (n,n) in G.edges():
#                     G.nodes[n]['degree'] = 0
#                     G.nodes[n]['connections'] = ""
# #                     G.nodes[n]['connected_component'] = connected_components[n]
#                 else:    
                G.nodes[n]['degree'] = G.degree(n)
                G.nodes[n]['connections'] = connections[n] #", ".join([str(x) for x in list(G.neighbors(n))])
#                     G.nodes[n]['connected_component'] = connected_components[n]
    #         for n in G.nodes():
    #             G.nodes[n]['Louvain_community'] = partition[n]
            graph = hvnx.draw(G, pos)
            graph.opts(edge_color=edge_color,edge_line_width=edge_line_width,node_size=node_size,node_color='Girvan-Newman_community',node_cmap=node_cmap)
            graph.opts(padding=0.15)
            if bundled==0:
                graph.opts(selection_policy=selection_policy,title=title,edge_hover_line_color=edge_hover_line_color,node_hover_fill_color=node_hover_fill_color,fontsize=fontsize,width=width,height=height,arrowhead_length=arrowhead_length) #,tools=tools) #,'box_zoom',"tap"])
                if nodelabels==1:
                    labels = hv.Labels(graph.nodes, ['x', 'y'], 'index')
                    graph=(graph * labels.opts(xoffset=xoffset, yoffset=yoffset,text_font_size=text_font_size, text_color=text_color, bgcolor=bgcolor))            
                    return graph
                else:
                    return graph
            else:
                graph = bundle_graph(graph)
                graph.opts(selection_policy=selection_policy,title=title,edge_hover_line_color=edge_hover_line_color,node_hover_fill_color=node_hover_fill_color,fontsize=fontsize,width=width,height=height,arrowhead_length=arrowhead_length) #,tools=tools) #,'box_zoom',"tap"])
                if nodelabels==1:
                    labels = hv.Labels(graph.nodes, ['x', 'y'], 'index')
                    graph=(graph * labels.opts(xoffset=xoffset, yoffset=yoffset,text_font_size=text_font_size, text_color=text_color, bgcolor=bgcolor))
                    return graph
                else:
                    return graph
        if ctype==0:
            partition_l=louvain.best_partition(G)
#             print("No. of connected components",len(ccl))
            print("No. of Louvain communities",len(set(partition_l.values())))
            for n in G.nodes():
                G.nodes[n]['Louvain_community'] = partition_l[n]
                if (n,n) in G.edges():
                    G.nodes[n]['degree'] = 0
                    G.nodes[n]['connections'] = ""
        #             G.nodes[n]['connected_component'] = connected_components[n]
                else:    
                    G.nodes[n]['degree'] = G.degree(n)
                    G.nodes[n]['connections'] = connections[n] #", ".join([str(x) for x in list(G.neighbors(n))])  #connections          
        #             G.nodes[n]['connected_component'] = connected_components[n]
            graph = hvnx.draw(G, pos)
            graph.opts(edge_color=edge_color,edge_line_width=edge_line_width,node_size=node_size,node_color='Louvain_community',node_cmap=node_cmap)
            graph.opts(padding=0.15)
            if bundled==0:                
                graph.opts(selection_policy=selection_policy,title=title,edge_hover_line_color=edge_hover_line_color,node_hover_fill_color=node_hover_fill_color,fontsize=fontsize,width=width,height=height,arrowhead_length=arrowhead_length) #,tools=tools) #,'box_zoom',"tap"])
                if nodelabels==1:
                    labels = hv.Labels(graph.nodes, ['x', 'y'], 'index')
                    graph=(graph * labels.opts(xoffset=xoffset, yoffset=yoffset,text_font_size=text_font_size, text_color=text_color, bgcolor=bgcolor))
                    return graph
                else:
                    return graph
            else:
                graph = bundle_graph(graph)
                graph.opts(selection_policy=selection_policy,title=title,edge_hover_line_color=edge_hover_line_color,node_hover_fill_color=node_hover_fill_color,fontsize=fontsize,width=width,height=height,arrowhead_length=arrowhead_length) #,tools=tools) #,'box_zoom',"tap"])
                if nodelabels==1:
                    labels = hv.Labels(graph.nodes, ['x', 'y'], 'index')
                    graph=(graph * labels.opts(xoffset=xoffset, yoffset=yoffset,text_font_size=text_font_size, text_color=text_color, bgcolor=bgcolor))
                    return graph
                else:
                    return graph

In [4]:
node_list = [] # a list of titles and references
edge_list = [] # includes rows of format [a, b] where 'a' references 'b'
type_dict = {} # key: node, value: type ('title' or 'reference'), holds all possible node values

''' 
Requires: 'n_type' is either 'title' or 'reference' 
Modifies: If 'node' occurs in the list, preserves type 'title,' changing either 
          the 'node_list' value and the 'type_dict' type, or just the 'node' value.
          Else, adds 'node' to 'node_list.'
Effects:  Compares 'node' to the current 'note_list.' 
'''
def comp_add(node_list, node, n_type):
    for i in range(len(node_list)): 
        # check to see if 'node' compares to any current nodes
        if node in node_list[i] or node_list[i] in node: 
            # if a node exists as a row 'title' and a row 'reference', 
            # we want to favor the type 'title' in our data structures 
            if n_type == 'title': 
                # switch the representation in 'node_list' to 'title'
                node_list[i] = node
                type_dict[node] = n_type 
            else:
                # switch the representation of 'node' to 'title' 
                node = node_list[i]
            return node 
        
    # The rest of this function executes if 'node' is not already in 'node_list'
    if n_type == 'title':
        node_list.append(node)
        type_dict[node] = n_type
    else: 
        node_list.append(node)
        type_dict[node] = n_type

    return node

## 1. The Citation Network

In [5]:
datafile = 'SCOPUS Disease Control through Social Network Surveillance.csv' 
name = "Disease Control through Social Network Surveillance"
st = "Scopus bibliometric dataset on %s" %name

In [6]:
# ''' 
# Main loop to parse data into nodes and edges. It takes a lot of time to run!
# '''
# cnt = 0
# rows = 0
# with open(datafile) as csv_file:
#     reader = csv.DictReader(csv_file)
#     for row in reader:
#         rows += 1
#         # Add node with unique identifier
#         source_node = row['Title']
#         source_node = comp_add(node_list, source_node, 'title')
#         # Add an edge for each source and its references
#         # Some rows may not any references. to check, verify 'refs' is a string
#         if type(row['References']) != '':  
#             cnt += 1
#             refs = row['References'].split(';')
#             for ref in refs:
#                 # Disregard web references, and clean data for any formatting inconsistencies
#                 if 'https://' not in ref and 'http://' not in ref and ref != " " and ref != "":  
#                     ref = comp_add(node_list, ref, 'reference')
#                     edge = [source_node, ref] # 'source_node' references 'ref'
#                     edge_list.append(edge)
                    
''' 
Main loop to parse data into nodes and edges. It takes a lot of time to run!
'''
import time
cnt = 0
rows = 0
with open(datafile) as csv_file:
    reader = csv.DictReader(csv_file)
    for row in reader:
        rows += 1
        # Add node with unique identifier
        source_node = row['DOI']
        # Add an edge for each source and its references
        # Some rows may not any references. to check, verify 'refs' is a string
        if row['References'] != '':  
            time.sleep(.05)
            crw = works.doi(source_node)
            if crw:
                try:
                    if 'reference' in crw:
                        source_node = comp_add(node_list, source_node, 'title')
                        cnt += 1
                        if cnt % 100 == 0:
                            print('the count is', cnt)
                        for ref in crw['reference']:
                            # Disregard web references, and clean data for any formatting inconsistencies
                            if 'https://' not in ref and 'http://' not in ref and ref != " " and ref != "":
                                if 'DOI' in ref:
                                    ref = ref['DOI']
                                elif 'key' in ref and len(ref['key']) > 9:
                                    ref = ref['key'] 
                                else:
                                    continue
                                ref = comp_add(node_list, ref, 'reference')
                                edge = [source_node, ref] # 'source_node' references 'ref'
                                edge_list.append(edge)
                except:
                    print('----------', sys.exc_info()[-1].tb_lineno,'---------',
                          sys.exc_info(),'\n\n', crw, '\n\n\n')

the count is 100
the count is 200
the count is 300
the count is 400
the count is 500


In [7]:
# Construction of the citation graph
G = nx.DiGraph()
for n in node_list: 
    G.add_node(n)
G.add_edges_from(edge_list)

In [8]:
# Remove nodes that are isolated 
G0=nx.DiGraph(G)
nodes_to_remove = []
for n in G0.nodes(): 
    if G0.degree(n) == 0: 
        nodes_to_remove.append(n)
G0.remove_nodes_from(nodes_to_remove)

print("The unpruned citation graph of the %s has %i nodes and %i edges" %(st,len(G0.nodes()),len(G0.edges())))

# Remove nodes of degree 1
nodes_to_remove = []
for n in G.nodes(): 
    if G.degree(n) == 1: 
        nodes_to_remove.append(n)
G.remove_nodes_from(nodes_to_remove)

# Remove nodes that are isolated 
nodes_to_remove = []
for n in G.nodes(): 
    if G.degree(n) == 0: 
        nodes_to_remove.append(n)
G.remove_nodes_from(nodes_to_remove)

print("The pruned citation graph of the %s has %i nodes and %i edges" %(st,len(G.nodes()),len(G.edges())))

The unpruned citation graph of the Scopus bibliometric dataset on Disease Control through Social Network Surveillance has 12078 nodes and 14365 edges
The pruned citation graph of the Scopus bibliometric dataset on Disease Control through Social Network Surveillance has 1518 nodes and 3863 edges


In [9]:
g_diagnostics(G0,st)

The Scopus bibliometric dataset on Disease Control through Social Network Surveillance has 12078 nodes and 14365 edges 

The Scopus bibliometric dataset on Disease Control through Social Network Surveillance is a directed graph
The Scopus bibliometric dataset on Disease Control through Social Network Surveillance is a simple graph
The Scopus bibliometric dataset on Disease Control through Social Network Surveillance is an unweighted graph
The Scopus bibliometric dataset on Disease Control through Social Network Surveillance is not a bipartite graph
The Scopus bibliometric dataset on Disease Control through Social Network Surveillance is not a tree 

The Scopus bibliometric dataset on Disease Control through Social Network Surveillance is not strongly connected and it has 12077 strongly connected components
The largest strongly connected component of this graph has 2 nodes and 2 edges
The Scopus bibliometric dataset on Disease Control through Social Network Surveillance is not weakly co

In [10]:
g_diagnostics(G,st)

The Scopus bibliometric dataset on Disease Control through Social Network Surveillance has 1518 nodes and 3863 edges 

The Scopus bibliometric dataset on Disease Control through Social Network Surveillance is a directed graph
The Scopus bibliometric dataset on Disease Control through Social Network Surveillance is a simple graph
The Scopus bibliometric dataset on Disease Control through Social Network Surveillance is an unweighted graph
The Scopus bibliometric dataset on Disease Control through Social Network Surveillance is not a bipartite graph
The Scopus bibliometric dataset on Disease Control through Social Network Surveillance is not a tree 

The Scopus bibliometric dataset on Disease Control through Social Network Surveillance is not strongly connected and it has 1517 strongly connected components
The largest strongly connected component of this graph has 2 nodes and 2 edges
The Scopus bibliometric dataset on Disease Control through Social Network Surveillance is not weakly conne

In [21]:
ctype=1
node_color="Girvan-Newman_community"  #'Louvain_community' "connected_component"
bundled=0
nodelabels=0
width=1000
height=1000
node_size=5*np.log(5+hv.dim('in-degree')) # #4*np.log(6+hv.dim('in-degree'))
node_cmap="tab10"
edge_line_width=1
edge_color='lightgreen'
arrowhead_length=0.01
selection_mode='nodes'
selection_policy="nodes"
edge_hover_line_color='green'
node_hover_fill_color='red'
fontsize={'title': '9pt'}
text_font_size='4pt'
text_color='black'
bgcolor='white'
xoffset=0
yoffset=-0.03 #-15
pos=graphviz_layout(G)
title="The citation graph of the %s" %st

citation=hvnx_plot(G,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
              title,bundled,nodelabels,xoffset,yoffset,
              arrowhead_length,selection_mode,selection_policy,
              edge_hover_line_color,node_hover_fill_color,
              fontsize,text_font_size, text_color,bgcolor)

hv.save(citation, 'DCSNS_citationGraph.html', backend='bokeh')

citation

No. of Girvan-Newman communities 63


## 2. The Co-Citation Network

In [12]:
G1=G.copy()
medgesout=[]
n_out=[n for n in G1.nodes() if G1.out_degree(n)>1]
for n in n_out:
    nn=list(G1.successors(n))
    combs=combinations(nn, 2)
    for j in combs:
        medgesout.append((j[0],j[1]))
Gcocitation=nx.MultiGraph()
Gcocitation.add_edges_from(medgesout)
weight=[(x, y, {'weight': v}) for (x, y), v in Counter(Gcocitation.edges()).items()]
Gcocitation = nx.Graph(weight)
edge_width=[Gcocitation[u][v]['weight'] for u,v in Gcocitation.edges()]

st2="co-citation graph of the %s" %st
print("The %s has %i nodes and %i edges" %(st2,len(Gcocitation.nodes()),len(Gcocitation.edges())))

The co-citation graph of the Scopus bibliometric dataset on Disease Control through Social Network Surveillance has 1286 nodes and 19811 edges


In [22]:
ctype=0
node_color='Louvain_community'  #"Girvan-Newman_community"  # "connected_component"
bundled=0
nodelabels=0
width=1000
height=1000
node_size=5 #15*np.log(5+hv.dim('in_degree')) #4*np.log(6+hv.dim('in-degree'))
node_cmap="tab10"
edge_line_width=1
edge_color='lightgreen'
arrowhead_length=0.04
selection_mode='nodes'
selection_policy="nodes"
edge_hover_line_color='green'
node_hover_fill_color='red'
fontsize={'title': '9pt'}
text_font_size='4pt'
text_color='black'
bgcolor='white'
xoffset=0
yoffset=-0.03 #-15
pos=graphviz_layout(Gcocitation)
title="The %s" %st2

cocitation=hvnx_plot(Gcocitation,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
              title,bundled,nodelabels,xoffset,yoffset,
              arrowhead_length,selection_mode,selection_policy,
              edge_hover_line_color,node_hover_fill_color,
              fontsize,text_font_size, text_color,bgcolor)

hv.save(cocitation, 'DCSNS_cocitationGraph.html', backend='bokeh')

cocitation

No. of Louvain communities 23


## 3. The Network of Bibliographic Coupling

In [14]:
G2=G.copy()
medgesin=[]
n_in=[n for n in G2.nodes() if G2.in_degree(n)>1]
for n in n_in:
    nn=list(G2.predecessors(n))
    combs=combinations(nn, 2)
    for j in combs:
        medgesin.append((j[0],j[1]))
Gbc=nx.MultiGraph()
Gbc.add_edges_from(medgesin)
weight=[(x, y, {'weight': v}) for (x, y), v in Counter(Gbc.edges()).items()]
Gbc = nx.Graph(weight)
edge_width=[Gbc[u][v]['weight'] for u,v in Gbc.edges()]

st4="graph of bibliographic coupling of the %s" %st
print("The %s has %i nodes and %i edges" %(st4,len(Gbc.nodes()),len(Gbc.edges())))

The graph of bibliographic coupling of the Scopus bibliometric dataset on Disease Control through Social Network Surveillance has 443 nodes and 3753 edges


In [15]:
ctype=1
node_color="Girvan-Newman_community"  #'Louvain_community'  #"  # "connected_component"
bundled=0
nodelabels=0
width=1000
height=1000
node_size=5 #15*np.log(5+hv.dim('in_degree')) #4*np.log(6+hv.dim('in-degree'))
node_cmap="tab10"
edge_line_width=1
edge_color='lightgreen'
arrowhead_length=0.04
selection_mode='nodes'
selection_policy="nodes"
edge_hover_line_color='green'
node_hover_fill_color='red'
fontsize={'title': '9pt'}
text_font_size='4pt'
text_color='black'
bgcolor='white'
xoffset=0
yoffset=-0.03 #-15
pos=graphviz_layout(Gbc)
title="The %s" %st4

bibiocoupling=hvnx_plot(Gbc,ctype,pos,width,height,node_size,node_cmap,edge_color,edge_line_width,
              title,bundled,nodelabels,xoffset,yoffset,
              arrowhead_length,selection_mode,selection_policy,
              edge_hover_line_color,node_hover_fill_color,
              fontsize,text_font_size, text_color,bgcolor)

hv.save(bibiocoupling, 'DCSNS_bibiocouplingGraph.html', backend='bokeh')

bibiocoupling

No. of Girvan-Newman communities 16
