In [None]:
# Import libraries
import pandas as pd
import requests, io, json
import matplotlib.dates as mdates
import matplotlib.pyplot as plt
from datetime import datetime
from typing import Dict, List

In [None]:
# Step 1 - Call the API for available language models
import requests
request = requests.get(f"http://cmdb.vectorspacebio.science/api/get_language_models?vxv_wallet_addr=0076e0b8328203a1beb056caf17a05a78929bd74")
language_models = request.json()
print(language_models)

In [None]:
# Step 2 - Choose a language model - e.g., Biosciences_0001
language_model = language_models[0]
print(language_model)

In [None]:
# Step 3 - Define up to 10 custom features / column labels (up to 100,000 labels upon request)
column_labels = ["mena", "opa1", "cardiolipin", "plant"]

In [None]:
# Step 4 - Optional: Submit a context-dependency - e.g., "mitochondria"
context_dependency = ""
# context_dependency = "mitochondria"

In [None]:
# Step 5 - Call the API with the selected parameters and return two matrices
# This method may take several minutes to complete depending on the request.
import requests
import pandas as pd

# Specify the types of rows
row_types = "protein_names"

# Add the context dependency component of the query
context_dependency_param = ""
if context_dependency != "":
    context_dependency_param = f"&context_dependency={context_dependency}"

# Retrieve the correlation matrix dataset and hidden relationship matrix
cmd_request = requests.get(f"http://cmdb.vectorspacebio.science/api/get_correlation_matrix?vxv_wallet_addr=0076e0b8328203a1beb056caf17a05a78929bd74&language_model={language_model['name']}&row_types={row_types}&column_labels={'%2C%20'.join(column_labels)}{context_dependency_param}")

# Retrieve request and convert to dataframes
cm_dataset = pd.DataFrame.from_dict(cmd_request.json())

# Preview the correlation matrix dataset
cm_dataset.head(10)

In [None]:
# This block preprocesses the data, outputs it to the terminal, injects it into a ready-to-go html template and saves the output in json format

data = []

cols = cm_dataset.columns.tolist()
rows = cm_dataset.index.tolist()[:100] # For quick tests limit the number of rows

for col_n, col in enumerate(cols):
    for row_n, row in enumerate(rows):
        data.append([col_n, row_n, cm_dataset[col][row]])

# Visualize the data
print("Column labels:")
print(cols)
print("\n")
print("Rows:")
print(rows)
print("\n")
print("Correlation scores:")
print(data)

# Load the heatmap template
with open("Heatmap/heatmap-template.html", "r") as file_in:
    text = file_in.read()

# Replace the placeholder text with the data above
text = text.replace("vsb_placeholder_column_labels", str(cols))
text = text.replace("vsb_placeholder_rows", str(rows))
text = text.replace("vsb_placeholder_correlation_scores", str(data))

# Save out the updated heatmap file
with open("Heatmap/heatmap.html", "w") as file_out:
    file_out.write(text)

# Save out the JS
json_output = {"cols": cols, "rows": rows, "correlation_scores": data}
with open("Heatmap/output.json", "w") as file_out:
    json.dump(json_output, file_out)

In [None]:
# Example of heatmap with rows (of proteins) and column labels

from IPython.display import Image
Image(filename='heatmap-example.jpg') 

In [None]:
# Step 6 - Create a graph from the correlation matrix dataset
# To expand the network of relationships, submit each leaf node to the PPIN (Protein-Protein Interaction Network) API
# The number of leaf nodes can be increased by adjusting the value from 5
import requests

max_depth = 1
top_n = 5

network_results = []

for column_label in list(cm_dataset.keys()):
    ppin_request = requests.get(f"http://ppin.vectorspacebio.science/api/protein_network?vxv_wallet_addr=d1030ededff0f348992fd4d41ae169dae99bf9c0&query={column_label}&branches={top_n}&max_depth={max_depth}")
    result = json.loads(ppin_request.content.decode())
    network_results.append(result)

In [None]:
# Step 7 - Prepare the data
nodes = []
hidden_relationships = {}

# Add the root node of the graph
root_node = ""
for col in column_labels:
    nodes.append([root_node, col])

# Add the leaf nodes
for result in network_results:
    for item in result["graph_network"]:
        source, target = item["source"], item["target"]
        key = f"{source}_{target}".lower()
        nodes.append([source, target])
        hidden_relationships[key] = item["hidden_relationship"]


# Visualize the nodes and hidden relationship data
print("Nodes:\n")
print(nodes)
print("\n")

print("Hidden relationships:\n")
print(hidden_relationships)

# Load the network graph template, replace the placeholders with the data above, save out network_graph.html
with open("NetworkGraph/network_graph-template.html", "r") as file_in:
    text = file_in.read()

text = text.replace("vsb_placeholder_nodes", str(nodes))
text = text.replace("vsb_placeholder_hidden_relationships", str(hidden_relationships))

with open("NetworkGraph/network_graph.html", "w") as file_out:
    file_out.write(text)

# Save out the JS
json_output = {"nodes": nodes, "hidden_relationships": hidden_relationships}
with open("NetworkGraph/output.json", "w") as file_out:
    json.dump(json_output, file_out)

In [None]:
# Example of heatmap with rows (of proteins) and column labels

from IPython.display import Image
Image(filename='network-graph-example.jpg') 