# Tutorial 1 - Loading hypergraphs and basic functionality

In [1]:
import hypergraph as hg
from hypergraph import Hypergraph
import random
import pandas as pd
import numpy as np

## Loading hypergraphs from different formats

We handle loading hypergraphs in many different formats, but the hypergraph constructor takes five main data formats:
* A Hypergraph object
* A hyperedge list
* A hyperedge dictionary
* A 2-column pandas dataframe specifying (node, edge) bipartite edges
* An incidence matrix (A Numpy or Scipy matrix)

In [2]:
n = 1000
m = 1000

min_edge_size = 2
max_edge_size = 25

# hyperedge list
hyperedge_list = [random.sample(range(n), random.choice(range(min_edge_size,max_edge_size+1))) for i in range(m)]

# hyperedge dict
hyperedge_dict = {i : random.sample(range(n), random.choice(range(min_edge_size,max_edge_size+1))) for i in range(m)}

# pandas dataframe
fname = "../data/disGene.txt"
df = pd.read_csv(fname, delimiter=" ", header=None)

# incidence matrix
incidence_matrix = np.random.randint(0, high=2, size=(n, m), dtype=int)

### Loading a hyperedge list

When a user gives a hyperedge list, the system automatically creates system edge IDs.

In [3]:
H = hg.Hypergraph(hyperedge_list)
print(f"The hypergraph has {H.number_of_nodes()} nodes and {H.number_of_edges()} edges")

The hypergraph has 1000 nodes and 1000 edges


### Loading a hyperedge dictionary

When a user gives a hyperedge dictionary, the system uses the edge IDs specified in the dictionary.

In [4]:
H = hg.Hypergraph(hyperedge_dict)
print(f"The hypergraph has {H.number_of_nodes()} nodes and {H.number_of_edges()} edges")

The hypergraph has 1000 nodes and 1000 edges


### Loading an incidence matrix

When a user gives an incidence matrix, the system transforms the non-zero entries into lists of rows and columns specifying a bipartite edge list.

In [5]:
H = hg.Hypergraph(incidence_matrix)
print(f"The hypergraph has {H.number_of_nodes()} nodes and {H.number_of_edges()} edges")

The hypergraph has 1000 nodes and 1000 edges


### Loading a Pandas dataframe
When a user gives a Pandas dataframe, the system automatically imports the first two columns as lists of node and edge indices specifying a bipartite edge list.

In [6]:
H = hg.Hypergraph(df)
print(f"The hypergraph has {H.number_of_nodes()} nodes and {H.number_of_edges()} edges")

The hypergraph has 12368 nodes and 2261 edges


## Simple functions

The Hypergraph class can do simple things like
* output an incidence matrix
* output the adjacency matrix for s-connectedness
* output the dual of the hypergraph
* find if the hypergraph is connected

In [7]:
I = hg.incidence_matrix(H, sparse=True)

In [8]:
A = hg.adjacency_matrix(H)

In [10]:
W = hg.clique_motif_matrix(H)

In [11]:
D = H.dual()

In [14]:
n = 1000
m = 100

min_edge_size = 2
max_edge_size = 10

# hyperedge list
hyperedge_list = [random.sample(range(n), random.choice(range(min_edge_size,max_edge_size+1))) for i in range(m)]
H = hg.Hypergraph(hyperedge_list)

In [15]:
is_connected = hg.is_connected(H)
if is_connected:
    print("H is connected")
else:
    print("H is not connected")

print("The sizes of the connected components are:")
print([len(component) for component in hg.connected_components(H)])

node = 0
print(f"The size of the component containing node {node} is {len(hg.node_connected_component(H, node))}")

H is not connected
The sizes of the connected components are:
[407, 2, 2, 2, 6, 4, 6, 3, 3, 8, 2, 2, 2]
The size of the component containing node 0 is 407


In [16]:
node_subhypergraph = H.subhypergraph(list(range(100)))
edge_subhypergraph = H.edge_subhypergraph(list(range(100)))
arbitrary_subhypergraph = H.arbitrary_subhypergraph(list(range(100)),list(range(100)))

## Converting to other formats

Functionality to output a hyperedge list or hyperedge dict

In [17]:
h_list = hg.to_hyperedge_list(H)
h_dict = hg.to_hyperedge_dict(H)

## Importing and exporting hypergraph data

In [None]:
hg.write_hypergraph_json(H,"test.json")

In [None]:
H = hg.read_hypergraph_json("test.json")

In [None]:
for edge in H.edges:
    H._edge_attr[edge]["weight"] = random.random()

In [None]:
hg.write_edgelist(H, "test.csv", delimiter=",")

In [None]:
H.node_degree(weight="weight")