# Building and analysing a XXX network

In [12]:
import graph_tool_extras as gte
from pathlib import Path

## Introduction

In this notebook, a network of co-authorship of papers about condensed matter authors was built. The database in the file below is used. It's from the Stanford Network Analysis Project's dataset at https://networks.skewed.de/net/arxiv_authors#CondMat_draw.

In [13]:
FOLDER_PATH = Path.cwd() / 'CondMat.csv'
EDGE_FILE_PATH = FOLDER_PATH / 'edges.csv'
NODES_FILE_PATH = FOLDER_PATH / 'nodes.csv'

## Understanding the data

Concept of vertices:
Authors

Concept of edges:
An edge between i and j indicates co-authorship between i and j.

Operationalization of vertices:

Each vertex represents an author of a paper in the cond-mat category (an area that studies condensed matter) on the arXiv platform. The data was collected from January 1993 to April 2003.

Operationalization of edges:
An edge between authors i and j indicates a co-authorship relationship between them. In cases where other k authors also contributed to the production of the paper, these additional k authors are connected to both authors i and j, as well as to each other, through edges.

## Creating functions to build the network

In [14]:
def get_or_add_vertex(g, id):
    u = g.vertex_by_id(id)
    if u is None:
        u = g.add_vertex_by_id(id)
        # u['id'] = id
    return u

In [15]:
def get_or_add_edge(g, gene_a, gene_b, master_net_id):
    e = g.edge_by_ids(gene_a, gene_b)
    if e is None:
        e = g.add_edge_by_ids(gene_a, gene_b)
        # e['master_net_id'] = master_net_id
    return e

## Reading the data and building the network

In [16]:
g = gte.Graph(directed=False)

In [9]:
g.add_ep('')
g.add_vp('')
g.add_('')

AttributeError: 'Graph' object has no attribute 'add_'

In [17]:
with open(NODES_FILE_PATH) as file:

    next(file)

    for line in file:
        parts = line.split(',')

        parts = [part[1:-1] for part in parts]
        print(parts)
        break

FileNotFoundError: [Errno 2] No such file or directory: 'CondMat.csv/nodes.csv'

In [None]:
with open(EDGES_FILE_PATH) as file:

    next(file)
    for line in file:
        parts = line.split(',')

        parts = [part[1:-1] for part in parts]
        ...

In [None]:
g = gte.clean(g)

In [None]:
gte.save(g, '.net.gz')

## Configuring the layout and rendering the network

In [None]:
from graph_tool import draw
import netpixi

In [None]:
layout = draw.sfdp_layout(g)

In [None]:
gte.move(g, layout)

In [None]:
gte.save(g, '_layout.net.gz')

In [None]:
r = netpixi.render('_layout.net.gz', infinite=True)

## Improving network vizualization

In [None]:
r.vertex_default(
    size=4,
    color=0xffff00,
    bwidth=1,
    bcolor=0x007700,
)

In [None]:
r.edge_default(
    width=0.2,
    color=0x7777ff,
    curve1=0,
    curve2=0,
)

## Analyzing the network