In [36]:
import pandas as pd
import numpy as np
import networkx as nx
from netgraph import Graph
import itertools
import statistics

df = pd.concat(map(pd.read_csv, [f'queryResult-{year}.csv' for year in range(2008,2025)]))
df.drop(df.columns[df.columns.str.contains('unnamed', case=False)], axis=1, inplace=True)

G1 = nx.Graph()
G2 = nx.Graph()

for index, row in df.iterrows():
    affil_list = str(row['affilname']).lower().split(';')
    country_by_affil_list = str(row['affiliation_country']).lower().split(';')
    for affil_from, country_by_affil_from in zip(affil_list, country_by_affil_list):
        if not G1.has_node(affil_from):
            G1.add_node(affil_from)
            G1.nodes[affil_from]['country'] = country_by_affil_from
        for affil_to, country_by_affil_to in zip(affil_list, country_by_affil_list):
            if not G1.has_node(affil_to):
                G1.add_node(affil_to)
                G1.nodes[affil_to]['country'] = country_by_affil_to
            if affil_from == affil_to:
                continue
            if G1.has_edge(affil_from, affil_to):
                G1[affil_from][affil_to]['weight'] += 0.1
            else:
                G1.add_edge(affil_from, affil_to, weight=0.1)

degrees = sorted([degree for node, degree in dict(G1.degree()).items() if degree < 100])
quantile = 500
low_degree_nodes = [node for node, degree in dict(G1.degree()).items() if degree < quantile]
G1.remove_nodes_from(low_degree_nodes)

for index, row in df.iterrows():
    country_list = str(row['affiliation_country']).lower().split(';')
    for country_from in country_list:
        for country_to in country_list:
            if country_from == country_to:
                continue
            if G2.has_edge(country_from, country_to):
                G2[country_from][country_to]['weight'] += 0.1
            else:
                G2.add_edge(country_from, country_to, weight=0.1)

nx.write_gexf(G1, 'graph_institution.gexf')
nx.write_gexf(G2, 'graph_country.gexf')