## This workflow automates the publication of WikiPathways networks to NDEx.

#### Import Required Library

#### py4cytoscape: https://github.com/cytoscape/py4cytoscape
#### ndex2 client: https://github.com/ndexbio/ndex2-client/tree/master
#### pywikipathways: https://github.com/wikipathways/pywikipathways

In [None]:
import io
import json
import os
import re
import time
import urllib
import urllib.parse

import ndex2
from ndex2.client import DecimalEncoder
from ndex2.cx2 import RawCX2NetworkFactory
import py4cytoscape as p4c
import pywikipathways as pyw
from requests import HTTPError

## Step 1 - Use Cytoscape to convert GPML to CX2

#### Download wikipathway networks from https://github.com/wikipathways/wikipathways-data-release in GPML format.
#### Open Cytoscape. Run the code below to convert all GPML files to CX2 files, and save all CX2 files in a new folder.
#### In the future, we can use https://github.com/wikipathways/gpml-to-cx2 to run this workflow without Cytoscape.

In [None]:
input_dir = "wikipathways-20250310-gpml-Homo_sapiens"
output_dir = "wikipathways-20250310-cx2-Homo_sapiens"

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for filename in os.listdir(input_dir):
    if filename.endswith(".gpml"):
        input_filepath = os.path.join(input_dir, filename)
        try:
            p4c.import_network_from_file(input_filepath)
        except Exception as e:
            if "this.largeNetworks" in str(e):
                print(f"Known error encountered in {filename}, skipping.")
                export_filepath = os.path.join(output_dir, filename)
                p4c.export_network(filename=export_filepath, type='cx2')
                p4c.delete_all_networks()
                continue
            else:
                raise

#### Rename file names to network names we use in NDEx.

In [None]:
directory = output_dir

species_map = {
    "Hs": "Homo sapiens",
}

for filename in os.listdir(directory):
    if filename.endswith(".cx2"):
        if filename.endswith(".gpml.cx2"):
            base_name = filename[:-len(".gpml.cx2")]
        else:
            base_name = filename[:-len(".cx2")]
        
        parts = base_name.split("_")
        if len(parts) < 4:
            print(f"Filename '{filename}' does not conform to the expected pattern. Skipping.")
            continue

        species_abbr = parts[0]
        species_full = species_map.get(species_abbr, species_abbr)

        wp_id = parts[-2]

        description_tokens = parts[1:-2]
        description = " ".join(description_tokens)

        new_filename = f"{wp_id} - {description} - {species_full}.cx2"
        
        old_path = os.path.join(directory, filename)
        new_path = os.path.join(directory, new_filename)

        os.rename(old_path, new_path)
        print(f"Renamed '{filename}' to '{new_filename}'")

## Step 2 - Update existing networks and upload new networks

#### Use ndex2 library to build connection to ndex server. This only need to be run once.

In [None]:
client = ndex2.client.Ndex2(username='username', password='password')

#### This UUID is wikipathway network set ID, and this ID can be found in NDEx.

In [None]:
networks_id_list = client.get_networkset("453c1c63-5c10-11e9-9f06-0ac135e8bacf")['networks']

#### Get existing wikipathway networks in NDEx, since these networks need to be updated with client.update_cx2_network().

In [None]:
local_directory = output_dir

local_wpids = set()
for fname in os.listdir(local_directory):
    if fname.endswith(".cx2"):
        base = fname[:-len(".cx2")]
        wpid = base.split(" - ")[0]
        local_wpids.add(wpid)

common_networks = []
for net_id in networks_id_list:
    summary = client.get_network_summary(net_id)
    network_name = summary['name']
    wpid = network_name.split(" - ")[0]
    if wpid in local_wpids:
        common_networks.append({"id": net_id, "name": network_name})

print("Networks present in both the result list and local folder:")
for net in common_networks:
    print(f"ID: {net['id']}, Name: {net['name']}")

#### Get new wikipathway networks and they are not in NDEx, and these networks need to be uploade with client.save_new_cx2_network()

In [None]:
network_wpids = set()
for net_id in networks_id_list:
    summary = client.get_network_summary(net_id)
    network_name = summary['name']
    wpid = network_name.split(" - ")[0]
    network_wpids.add(wpid)

new_networks = []
for fname in os.listdir(local_directory):
    if fname.endswith(".cx2"):
        base = fname[:-len(".cx2")]
        local_wpid = base.split(" - ")[0]
        if local_wpid not in network_wpids:
            new_networks.append(fname)

print("Local networks that do not exist in the network_id_list:")
for network in new_networks:
    print(network)

#### pywikipathway library is used here to get additional network information, which will be used in network properties in NDEx.

#### Updating existed network...

In [None]:
cx2_folder = output_dir

if not os.path.isdir(cx2_folder):
    print(f"Error: Directory '{cx2_folder}' not found. Check the folder name or path.")
    exit(1)

all_files = os.listdir(cx2_folder)

for network in common_networks:
    network_id = network.get('id')
    network_name = network.get('name')
    
    if not network_name:
        print(f"Skipping network with id {network_id} because it has no name.")
        continue

        
    match = re.search(r"(WP\d+)", network_name)
    fwpid = match.group(1) if match else ""

    if not wpid:
        print(f"Skipping network {network_name} because no WPID could be extracted.")
        continue

    pattern = re.compile(r'(?<!\d)' + re.escape(wpid) + r'(?!\d)', re.IGNORECASE)

    candidates = [f for f in all_files if pattern.search(f) and f.endswith(".cx2")]
    if not candidates:
        print(f"No file found for WPID {wpid} in {cx2_folder}. Skipping network {network_name}.")
        continue

    file_path = os.path.join(cx2_folder, candidates[0])
    print(f"Processing file: {file_path} for network {network_name} (WPID: {wpid})")

    with open(file_path, 'r') as f:
        data = json.load(f)
    
    for item in data:
        if isinstance(item, dict) and 'networkAttributes' in item:
            for attr in item['networkAttributes']:
                attr['name'] = f"{wpid} - {attr.get('name', '')}"
    
    factory = RawCX2NetworkFactory()
    net = factory.get_cx2network(data)
    net_attrs = net.get_network_attributes()
    
    onto_info = pyw.wikipathways_get('getOntologyTermsByPathway', {'pwId': wpid, 'format': 'json'})
    if onto_info is None:
        print(f"Warning: No ontology info returned for WPID {wpid}. Proceeding with an empty terms list.")
        onto_info = {"terms": []}

    base_url_pw = "https://www.ebi.ac.uk/ols4/ontologies/pw/classes?obo_id="
    labels_html_pw = []
    for term in onto_info.get("terms", []):
        if term.get("ontology") == "Pathway Ontology":
            term_id = term.get("id")
            term_name = term.get("name")
            if term_id and term_name:
                term_id_encoded = urllib.parse.quote(term_id, safe='')
                hyperlink = f'<a href="{base_url_pw}{term_id_encoded}">{term_name}</a>'
                labels_html_pw.append(hyperlink)
    if labels_html_pw:
        net_attrs["Labels"] = ", ".join(labels_html_pw)
    
    base_url_cl = "https://www.ebi.ac.uk/ols4/ontologies/cl/classes?obo_id="
    labels_html_cl = []
    for term in onto_info.get("terms", []):
        if term.get("ontology") == "Cell Type":
            term_id = term.get("id")
            term_name = term.get("name")
            if term_id and term_name:
                term_id_encoded = urllib.parse.quote(term_id, safe='')
                hyperlink = f'<a href="{base_url_cl}{term_id_encoded}">{term_name}</a>'
                labels_html_cl.append(hyperlink)
    if labels_html_cl:
        net_attrs["Cell"] = ", ".join(labels_html_cl)
    
    base_url_doid = "https://www.ebi.ac.uk/ols4/ontologies/doid/classes?obo_id="
    labels_html_doid = []
    for term in onto_info.get("terms", []):
        if term.get("ontology") == "Disease":
            term_id = term.get("id")
            term_name = term.get("name")
            if term_id and term_name:
                term_id_encoded = urllib.parse.quote(term_id, safe='')
                hyperlink = f'<a href="{base_url_doid}{term_id_encoded}">{term_name}</a>'
                labels_html_doid.append(hyperlink)
    if labels_html_doid:
        net_attrs["Disease"] = ", ".join(labels_html_doid)
    
    net_attrs['author'] = 'WikiPathways team'
    net_attrs['version'] = '20250407'
    net_attrs['WikipathwaysID'] = wpid
    net_attrs['WikipathwaysIRI'] = f'<a href="http://identifiers.org/wikipathways/{wpid}">http://identifiers.org/wikipathways/{wpid}</a>'
    net_attrs['NetworkType'] = 'pathway'
    
    net.set_network_attributes(net_attrs)
    
    cx_stream = io.BytesIO(json.dumps(net.to_cx2(), cls=DecimalEncoder).encode('utf-8'))
    
    client.set_read_only(network_id, False)
    res = client.update_cx2_network(cx_stream, network_id)
    
    max_retries = 3
    for attempt in range(max_retries):
        try:
            client.set_read_only(network_id, True)
            break
        except Exception as e:
            print(f"Attempt {attempt+1} to set network as read-only failed: {e}")
            if attempt < max_retries - 1:
                time.sleep(2)
            else:
                print("Max retry attempts reached. Could not set network as read-only.")
                raise
    
    print(f"Successfully processed network: {network_name} (WPID: {wpid}) with id {network_id}")

#### Uploading new networks...

In [None]:
cx2_folder = output_dir

if not os.path.isdir(cx2_folder):
    print(f"Error: Directory '{cx2_folder}' not found. Check the folder name or path.")
    exit(1)

for fn in new_networks:
    file_path = os.path.join(cx2_folder, fn)
    
    file_name = os.path.basename(file_path)
    match = re.match(r"(WP\d+)\s*-", file_name)
    prefix = match.group(1) if match else ""
    
    if not prefix:
        print(f"Skipping file {file_name} because no WPID could be extracted.")
        continue

    print(f"Processing file: {file_path} (WPID: {prefix})")
    
    with open(file_path, 'r') as f:
        data = json.load(f)
    
    for item in data:
        if isinstance(item, dict) and 'networkAttributes' in item:
            for attr in item['networkAttributes']:
                attr['name'] = f"{prefix} - {attr.get('name', '')}"
    
    factory = RawCX2NetworkFactory()
    net = factory.get_cx2network(data)
    net_attrs = net.get_network_attributes()
    
    onto_info = pyw.wikipathways_get('getOntologyTermsByPathway', {'pwId': prefix, 'format': 'json'})
    
    base_url_pw = "https://www.ebi.ac.uk/ols4/ontologies/pw/classes?obo_id="
    labels_html = []
    for term in onto_info.get("terms", []):
        if term.get("ontology") == "Pathway Ontology":
            term_id = term.get("id")
            term_name = term.get("name")
            if term_id and term_name:
                term_id_encoded = urllib.parse.quote(term_id, safe='')
                hyperlink = f'<a href="{base_url_pw}{term_id_encoded}">{term_name}</a>'
                labels_html.append(hyperlink)
    if labels_html:
        net_attrs["Labels"] = ", ".join(labels_html)
    
    base_url_cl = "https://www.ebi.ac.uk/ols4/ontologies/cl/classes?obo_id="
    labels_html = []
    for term in onto_info.get("terms", []):
        if term.get("ontology") == "Cell Type":
            term_id = term.get("id")
            term_name = term.get("name")
            if term_id and term_name:
                term_id_encoded = urllib.parse.quote(term_id, safe='')
                hyperlink = f'<a href="{base_url_cl}{term_id_encoded}">{term_name}</a>'
                labels_html.append(hyperlink)
    if labels_html:
        net_attrs["Cell"] = ", ".join(labels_html)
    
    base_url_doid = "https://www.ebi.ac.uk/ols4/ontologies/doid/classes?obo_id="
    labels_html = []
    for term in onto_info.get("terms", []):
        if term.get("ontology") == "Disease":
            term_id = term.get("id")
            term_name = term.get("name")
            if term_id and term_name:
                term_id_encoded = urllib.parse.quote(term_id, safe='')
                hyperlink = f'<a href="{base_url_doid}{term_id_encoded}">{term_name}</a>'
                labels_html.append(hyperlink)
    if labels_html:
        net_attrs["Disease"] = ", ".join(labels_html)
    
    net_attrs['author'] = 'WikiPathways team'
    net_attrs['version'] = '20250407'
    net_attrs['WikipathwaysID'] = prefix
    net_attrs['WikipathwaysIRI'] = f'<a href="http://identifiers.org/wikipathways/{prefix}">http://identifiers.org/wikipathways/{prefix}</a>'
    net_attrs['NetworkType'] = 'pathway'
    
    net.set_network_attributes(net_attrs)
    
    res = client.save_new_cx2_network(net.to_cx2(), "PUBLIC")
    uuid_match = re.search(
        r'([0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12})',
        res
    )
    if uuid_match:
        uuid = uuid_match.group(1)
    else:
        print(f"Could not extract UUID from response for network {prefix}")
        continue
    
    client.set_read_only(uuid, True)
    
    print(f"Successfully processed network with WPID: {prefix} and UUID: {uuid}")

## Step 3 - Add hyperlink to ChEBI and Ensembl in node table for all networks

#### Get all networks UUID

In [None]:
uuid_list = client.get_networkset('453c1c63-5c10-11e9-9f06-0ac135e8bacf')['networks']

#### Add url in @context, NDEx server will make the url clickable.

In [None]:
cx_context = {
    "signor": "http://signor.uniroma2.it/relation_result.php?id=",
    "BTO": "http://identifiers.org/bto/BTO:",
    "uniprot": "http://identifiers.org/uniprot/",
    "pubmed": "http://identifiers.org/pubmed/",
    "CID": "http://identifiers.org/pubchem.compound/",
    "SID": "http://identifiers.org/pubchem.substance/",
    "chebi": "http://identifiers.org/chebi/CHEBI:",
    "hgnc.symbol": "http://identifiers.org/hgnc.symbol/",
    "Ensembl":"http://identifiers.org/ensembl:"
}


def retry_on_500(func, *args, retries=3, backoff=1, **kwargs):
    attempt = 0
    while True:
        try:
            return func(*args, **kwargs)
        except HTTPError as e:
            status = getattr(e.response, "status_code", None)
            if status == 500 and attempt < retries:
                attempt += 1
                time.sleep(backoff)
                backoff *= 2
                continue
            raise
            
def prefix_cx_context(obj):
    if isinstance(obj, dict):
        new = {}
        for k, v in obj.items():
            if k == "Ensembl" and isinstance(v, str):
                new[k] = f"Ensembl:{v}"
            else:
                new[k] = prefix_cx_context(v)
        return new
    elif isinstance(obj, list):
        return [prefix_cx_context(item) for item in obj]
    else:
        return obj



for uid in uuid_list:
    retry_on_500(client.set_read_only, uid, False)

    resp = retry_on_500(client.get_network_as_cx2_stream, uid)
    net_cx = factory.get_cx2network(prefix_cx_context(json.loads(resp.content)))

    net_attrs = net_cx.get_network_attributes()
    net_attrs['@context'] = json.dumps(cx_context)
    net_cx.set_network_attributes(net_attrs)
    
    payload = json.dumps(net_cx.to_cx2(), cls=DecimalEncoder).encode('utf-8')
    cx_stream = io.BytesIO(payload)
    retry_on_500(client.update_cx2_network, cx_stream, uid)

    retry_on_500(client.set_read_only, uid, True)

### Some outdated networks need to be delete

In [None]:
delete_networks = []

In [None]:
client.delete_networks_from_networkset('453c1c63-5c10-11e9-9f06-0ac135e8bacf', delete_networks)

#### After finishing the workflow, go to NDEx, and double check that all wikipathway networks are READ ONLY.

## Future Plan
#### Use Github action to run this workflow every month.