In [36]:
import networkx as nx
import requests
from bs4 import BeautifulSoup
from tqdm.auto import tqdm
import os

# Import KONECT datasets
Download and convert to GML all the datasets used in [https://www.nature.com/articles/s41598-023-41476-9](https://www.nature.com/articles/s41598-023-41476-9):
1. [Karate](http://konect.cc/networks/ucidata-zachary/)
2. [Bombing (Train bombing)](http://konect.cc/networks/moreno_train/)
3. [Iceland](http://konect.cc/networks/iceland/)
4. [LesMis (Les Misérables)](http://konect.cc/networks/moreno_lesmis/)
5. [Neurons (Caenorhabditis elegans (neural))](http://konect.cc/networks/dimacs10-celegansneural/)
6. E.coli (no link!)
7. [Netscience (Network Science)](http://konect.cc/networks/dimacs10-netscience/)
8. [Infectious](http://konect.cc/networks/sociopatterns-infectious/)
9. [Metabolic (Caenorhabditis elegans (metabolic))](http://konect.cc/networks/dimacs10-celegans_metabolic/)
10. [US Air (US airports)](http://konect.cc/networks/opsahl-usairport/)
11. [ASoIaF (A Song of Ice and Fire)](http://konect.cc/networks/asoiaf/)
12. [Email (U. Rovira i Virgili)](http://konect.cc/networks/arenas-email/)
13. [Bible](http://konect.cc/networks/moreno_names/)
14. [Yeast](http://konect.cc/networks/moreno_propro/)
15. [Vidal](http://konect.cc/networks/maayan-vidal/)
16. [Household (Hamsterster households)](http://konect.cc/networks/petster-hamster-household/)
17. [Friendship (Hamsterster friendships of friends?)](http://konect.cc/networks/petster-friendships-hamster/)
18. [Hamster (Hamsterster full?)](http://konect.cc/networks/petster-hamster/)

## Step 1: Download all the datasets

In [31]:
links = {
    "Karate": "http://konect.cc/networks/ucidata-zachary/",
    "Bombing": "http://konect.cc/networks/moreno_train",
    "Iceland": "http://konect.cc/networks/iceland",
    "LesMis": "http://konect.cc/networks/moreno_lesmis",
    "Neurons": "http://konect.cc/networks/dimacs10-celegansneural",
    "Netscience": "http://konect.cc/networks/dimacs10-netscience",
    "Infectious": "http://konect.cc/networks/sociopatterns-infectious",
    "Metabolic": "http://konect.cc/networks/dimacs10-celegans_metabolic",
    "US_Air": "http://konect.cc/networks/opsahl-usairport",
    "ASoIaF": "http://konect.cc/networks/asoiaf",
    "Email": "http://konect.cc/networks/arenas-email",
    "Bible": "http://konect.cc/networks/moreno_names",
    "Yeast": "http://konect.cc/networks/moreno_propro",
    "Vidal": "http://konect.cc/networks/maayan-vidal",
    "Household": "http://konect.cc/networks/petster-hamster-household",
    "Friendship": "http://konect.cc/networks/petster-friendships-hamster",
    "Hamster": "http://konect.cc/networks/petster-hamster",
}

In [32]:
for name, url in tqdm(links.items()):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    dataset_link = soup.find("a", string="Data as TSV", href=True)["href"].replace("../..", "http://konect.cc")
    
    r = requests.get(dataset_link)
    with open(f'./konect_graphs/{name}.tsv.tar.bz2', 'wb') as f:
        f.write(r.content)
    # break

  0%|          | 0/17 [00:00<?, ?it/s]

## Step 2: Convert to GML

In [61]:
for graph_name in [x for x in os.listdir("./konect_graphs") if not x.startswith(".")]:
    print(graph_name)
    filepath = f"./konect_graphs/{graph_name}/out.{graph_name}"
    with open(filepath) as f:
        line = f.readlines()[3]
        columns = max(len(line.split("\t")), len(line.split(" ")))
        if columns == 2:
            data = tuple()
        elif columns == 3:
            data = (("attr1", int), )
        elif columns == 4:
            data = (("attr1", int), ("attr2", int))
        else:
            raise NotImplementedError(columns)
            
    G = nx.read_edgelist(filepath, comments="%", data=data)
    nx.write_gml(G, f'./gml_graphs/konect/{graph_name}.gml')
    
    Gcc = sorted(nx.connected_components(G), key=len, reverse=True)
    G0 = G.subgraph(Gcc[0])
    nx.write_gml(G, f'./gml_connected_subgraphs/konect/{graph_name}.gml')

dimacs10-celegans_metabolic
dimacs10-celegansneural
asoiaf
sociopatterns-infectious
petster-hamster-household
moreno_names
maayan-vidal
petster-hamster
ucidata-zachary
arenas-email
moreno_propro
dimacs10-netscience
opsahl-usairport
moreno_lesmis
petster-friendships-hamster
iceland
moreno_train
