## This code takes a network of technology clusters in table format and builds year-specific graphs from it with varying node size, coloring according to the degree and weight-specific edge width. 

In [None]:
import pandas as pd
import os 
import re
import numpy as np
#import spacy
import string
pd.set_option('mode.chained_assignment', None)

import networkx as nx
import matplotlib.pyplot as plt

%matplotlib notebook

In [None]:
#Uploading file with patents
patents=pd.read_csv('patents.csv')
patents=patents[['patent','appyear']]
patents=patents.drop_duplicates()

In [None]:
#Uploading file with patent-article matches
matches=pd.read_csv('complete_matches_full.csv')
matches=pd.merge(matches, patents, how='inner', left_on='patent', right_on='patent')
matches=matches.drop_duplicates()
#Computing number of annual patents per article
pat_per_art=matches[['article_title','patent','appyear']].groupby(['article_title','appyear']).agg(['count'])
temp=matches[['patent','appyear']].groupby(['appyear']).agg(['count'])

In [None]:
pat_per_art=pat_per_art.reset_index()
temp=temp.reset_index()
pat_per_art.columns=['article_title','appyear','pat_per_year']
temp.columns=['appyear','all_pat']
pat_per_art=pd.merge(pat_per_art, temp, how='inner', left_on='appyear', right_on='appyear')
pat_per_art['share']=pat_per_art['pat_per_year']/pat_per_art['all_pat'] #this will the size of the nodes

In [None]:
#Creating a frame of all possible year-article combination to account for zero patenting in these clusters sometimes
titles=pat_per_art[['article_title']]
titles=titles.drop_duplicates()
titles['key']=0
years=list(range(1978,2015,1))
years=pd.DataFrame(years)
years.columns=['appyear']
years['key']=0
frame=pd.merge(years, titles, how='outer', left_on='key', right_on='key')
frame=frame.drop(['key'], axis=1)
pat_per_art=pd.merge(pat_per_art, frame, how='outer', left_on=['appyear','article_title'], right_on=['appyear','article_title'])
pat_per_art=pat_per_art.sort_values(by=['article_title','appyear'])
pat_per_art['pat_per_year']=np.where((pat_per_art['appyear']==1978) & (pat_per_art['pat_per_year'].isnull()), 0, pat_per_art['pat_per_year'])
pat_per_art['share']=np.where(pat_per_art['appyear']==0, 0, pat_per_art['share'])

In [None]:
#Uploading file with network in dataframe format
link_work=pd.read_csv('link_all.csv')
link_work['weight']=1/link_work['level']

In [None]:
#Looping through different years and creating year-specific network

for year in range(1976, 2013):
    
    working_pat=pat_per_art[pat_per_art['appyear']==year]
    #Selecting only relevant parts of the network for this year
    link_for_year=pd.merge(link_work, working_pat, how='inner', right_on='article_title', left_on='link_from')
    link_for_year=link_for_year[['link_from','link_to','weight']]
    link_for_year=pd.merge(link_for_year, working_pat, how='inner', right_on='article_title', left_on='link_to')
    link_for_year=link_for_year[['link_from','link_to','weight']]
    link_for_year=link_for_year.drop_duplicates()
    
    #Feeding network into the graph interpreter
    G_year = nx.from_pandas_edgelist(link_for_year, source='link_from', target='link_to', edge_attr='weight', create_using=nx.Graph())
    test_attr=pd.DataFrame(list(sorted(G_year.nodes)))
    test_attr.columns=['article_title']
    
    #Adding nodes attributes - size
    test_attr=pd.merge(test_attr, working_pat, how='inner', left_on='article_title', right_on='article_title') 
    test_attr=test_attr[['article_title','share']]
    k=0
    for i in sorted(G_year.nodes()):
        G_year.nodes[i]['share'] = test_attr.loc[k,'share']
        k=k+1
    #Adding labels for the top-20 nodes by size
    test_attr=test_attr.sort_values(by=['share'], ascending=False)
    test_attr=test_attr.reset_index(drop=True)
    label= [0]*20
    for i in range(20):
        label[i]=test_attr.iloc[i][0]
        
    plt.ioff()
    plt.figure(figsize=(9,5), dpi=300)
    plt.subplot()
    #plt.subplots_adjust(left=5, right=5.02)

    #Making color of the node vary with its degree
    node_color = [G_year.degree(v) for v in G_year]
    #Making size of the node vary with the size of a cluster
    node_size = [12000*nx.get_node_attributes(G_year, 'share')[v] for v in G_year]
    #Making width of the edges vary with the weight of the connection
    edge_width = [0.5/G_year[u][v]['weight'] for u,v in G_year.edges()]

    #Drawing pretty graphs and saving them to files
    nx.draw_networkx(G_year, pos, font_size=7, labels={label[0]:str(label[0]),label[1]:str(label[1]),label[2]:str(label[2]),label[3]:str(label[3]),label[4]:str(label[4]),label[5]:str(label[5]),label[6]:str(label[6]),label[7]:str(label[7]),label[8]:str(label[8]),label[9]:str(label[9]), label[10]:str(label[10]),label[11]:str(label[11]),label[12]:str(label[12]),label[13]:str(label[13]),label[14]:str(label[14]),label[15]:str(label[15]),label[16]:str(label[16]),label[17]:str(label[17]),label[18]:str(label[18]),label[19]:str(label[19])}, font_color='Black', width=edge_width, node_color=node_color, node_size=node_size, alpha=0.7, edgecolors='peru', with_labels=True, edge_color='.8', cmap=plt.cm.Oranges)
    plt.title(year, loc='left')
    plt.axis('off')
    plt.tight_layout()
    plt.savefig(str(year)+'.png')