### Experiment

In [None]:
import json
import os

from IPython.display import display
import algorithmx
import ipywidgets as widgets
import matplotlib.pyplot as plt
import networkx as nx
import pandas as pd
import plotly.graph_objects as go

# from utils.utils_go import *

In [None]:
# %run a_format_go.py
# %run b_prepro_go.py
# %run c-d_node-edge_go.py
# %run e_change_go.py
# %run f_biocyc_go.py

In [None]:
# %run experiments.py

### Summary

In [None]:
# print(exp)
exp = "387f0495-9927-4296-833c-e64fa7c4ac62"
file = open("experiments/output/{}/parameters.json".format(exp))
params = json.load(file)

exp = params["exp"]
print("Exp:\t\t", exp)

methods = params["methods"]
print("Methods:\t", methods)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

controls = params["controls"]
print("Control:\t", controls)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

groups = params["groups"]
print("Groups:\t\t", groups)

#### Preprocessing

In [6]:
df_preprocessing = pd.read_csv("experiments/output/{}/preprocessing/graphs_data/summary.csv".format(exp))
df_preprocessing

Unnamed: 0,Group,Subgroup,Num. nodes,Num. edges,Density,Diameter
0,Yellow,dyn,3551,2956123,0.469001,3
1,Red,dyn,3549,2959604,0.470082,3
2,Orange,dyn,3550,2964634,0.470616,3


In [None]:
print(df_preprocessing.to_latex(index=False, formatters={"name": str.upper}, float_format="{:.2f}".format)) 

Correlation plots

In [None]:
path = "experiments/output/{}/correlations".format(exp)
dir_list = sorted(os.listdir(path))

for dir in dir_list:
	print(dir)
	df_matrix = pd.read_csv("experiments/output/{}/correlations/{}".format(exp, dir), index_col=0)
	
	plt.imshow(df_matrix, cmap="coolwarm", interpolation="none")
	plt.colorbar()
	plt.title(dir.split(".")[0][13:])
	plt.show()

#### Node-Edge embeddings (common subgraph)

In [None]:
f = "f1" # change: f1, f2

df_process = pd.read_csv("experiments/output/{}/common_edges/summary_{}.csv".format(exp, f))
df_process

In [None]:
"""vdf_comparation_temp = df_process.set_index(["Method", "Group", "Data var."])
df_comparation_temp.sort_values(by=["Method", "Group", "Num. edges"], ascending=False, inplace=True)
ax = df_comparation_temp.plot.bar(rot=90)
ax.grid() """

In [None]:
df_join_raw = pd.read_csv("experiments/input/{}_raw.csv".format(exp), index_col=0, usecols=[0, 1, 2, 3])        
# df_join_raw.columns = ["mz", "name"]
df_join_raw

In [None]:
# details
list_details = []

for group in groups_id:
	df_common_edges = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}.csv".format(exp, "greedy", group, "none"))
	G = nx.from_pandas_edgelist(df_common_edges, edge_attr=["weight"])
	nodes = list(G.nodes())
	df_nodes_raw = df_join_raw.loc[nodes]
	
	num_nodes_name = len(df_nodes_raw[df_nodes_raw["Metabolite name"] != "Unknown"])
	
	list_details.append(["greedy", group, "none", G.number_of_nodes(), G.number_of_edges(), nx.density(G), num_nodes_name])

for method in methods:
	for group in groups_id:
		for data_variation in data_variations:
			df_edges_filter_weight_filter = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}_{}.csv".format(exp, f, method, group, data_variation))

			G = nx.from_pandas_edgelist(df_edges_filter_weight_filter, "source", "target", edge_attr="weight")
			nodes = list(G.nodes())
			df_nodes_raw = df_join_raw.loc[nodes]
			
			num_nodes_name = len(df_nodes_raw[df_nodes_raw["Metabolite name"] != "Unknown"])
			
			list_details.append([method, group, data_variation, G.number_of_nodes(), G.number_of_edges(), nx.density(G), num_nodes_name])

df_details = pd.DataFrame(list_details, columns=["Method", "Group", "Data var.", "Num. nodes", "Num. edges", "Density", "With name"])
df_details

In [None]:
# percentage

max_nodes = df_details.iloc[:3, :]["Num. nodes"].to_list()
max_nodes

max_edges = df_details.iloc[:3, :]["Num. edges"].to_list()
max_edges

total = []
nodes = []
edges = []

for i in tqdm(df_details.index[:len(groups_id)]):
	total.append([df_details["Num. nodes"][i], df_details["Num. edges"][i]])
	nodes.append("{} ({}%)".format(df_details["Num. nodes"][i], round(df_details["Num. nodes"][i]*100/df_details["Num. nodes"][i], 2)))
	edges.append("{} ({}%)".format(df_details["Num. edges"][i], round(df_details["Num. edges"][i]*100/df_details["Num. edges"][i], 2)))

index = 0
c = 0
for i in tqdm(df_details.index[len(groups_id):]):
	nodes.append("{} ({}%)".format(df_details["Num. nodes"][i], round(df_details["Num. nodes"][i]*100/total[index][0], 2)))
	edges.append("{} ({}%)".format(df_details["Num. edges"][i], round(df_details["Num. edges"][i]*100/total[index][1], 2)))

	c += 1
	if c % len(data_variations) == 0:
		index = (index + 1) % len(groups_id)
		
df_results_ = df_details.copy()
df_results_["Num. nodes"] = nodes
df_results_["Num. edges"] = edges
df_results_

In [None]:
df_results_.to_csv("z_percentage_mutan_random.csv", index=False)

#### Similarity analysis (change detection)

In [None]:
df_changes = pd.read_csv("experiments/output/{}/changes/summary.csv".format(exp))
df_changes

In [None]:
df_join_raw = pd.read_csv("experiments/input/{}_raw.csv".format(exp), index_col=0, usecols=[0, 1, 2])        
# df_join_raw.columns = ["mz", "name"]
df_join_raw

In [None]:
# count metabolities by name in raw data
size1 = len(df_join_raw)
size2 = len(df_join_raw[df_join_raw["Metabolite name"] != "Unknown"])
print(size1, size2)

In [None]:
methods.insert(0, "greedy")
methods

In [None]:
# details

is_significant = False # change, True: significant changes (*), False: no significant changes (non-* and same labels)
list_details = []

for method in methods:
	if method == "greedy":
		data_variations = ["none"]
	else:
		data_variations = ["none", "str", "dyn"]
	for data_variation in data_variations:
		for group in groups:
			df_change_filter = pd.read_csv("experiments/output/{}/changes/changes_edges_log2_{}_{}_{}_{}.csv".format(exp, method, group[0], group[1], data_variation))
			
			if is_significant:
				df_change_filter = df_change_filter[df_change_filter["significant"] == "*"]
			else:
				df_change_filter = df_change_filter[df_change_filter["significant"] != "*"]
				df_change_filter = df_change_filter[df_change_filter["label"].isin(["nn", "NN", "pp", "PP"])]
			
			# df_change_filter = df_change_filter[df_change_filter["label"].str.contains("?", regex=False) == False] # change uncomment this line to view (intersection)
			
			G = nx.from_pandas_edgelist(df_change_filter, "source", "target", edge_attr="weight1")
			nodes = list(G.nodes())
			df_nodes_raw = df_join_raw.loc[nodes]
			
			num_nodes_name = len(df_nodes_raw[df_nodes_raw["Metabolite name"] != "Unknown"])
			
			list_details.append([method, "-".join(group), data_variation, G.number_of_nodes(), G.number_of_edges(), nx.density(G), num_nodes_name])

df_details = pd.DataFrame(list_details, columns=["Method", "Groups", "Data var.", "Num. nodes", "Num. edges", "Density", "With name"])
df_details

In [None]:
df_details.to_csv("z.csv", index=False)

In [None]:
# details count labels

is_significant = False # change, True: significant changes (*), False: no significant changes (non-* and same labels)

list_details = []
labels = ['PP', 'Pp', 'PN', 'Pn', 'P?', 'pP', 'pp', 'pN', 'pn', 'p?', 'NP', 'Np', 'NN', 'Nn', 'N?', 'nP', 'np', 'nN', 'nn', 'n?', '?P', '?p', '?N', '?n']

for method in methods:
	if method == "greedy":
		data_variations = ["none"]
	else:
		data_variations = ["none", "str", "dyn"]
	for data_variation in data_variations:
		for group in groups:
			df_change_filter = pd.read_csv("experiments/output/{}/changes/changes_edges_log2_{}_{}_{}_{}.csv".format(exp, method, group[0], group[1], data_variation))
			
			if is_significant:
				df_change_filter = df_change_filter[df_change_filter["significant"] == "*"]
			else:
				df_change_filter = df_change_filter[df_change_filter["significant"] != "*"]
				df_change_filter = df_change_filter[df_change_filter["label"].isin(["nn", "NN", "pp", "PP"])]
				
			counts = []
			for label in labels:
				counts.append(len(df_change_filter[df_change_filter.label == label]))
			
			list_details.append([method, "-".join(group), data_variation] + counts)

df_details = pd.DataFrame(list_details, columns=["Method", "Groups", "Data var."] + labels)

In [None]:
df_details.to_csv("z.csv", index=False)

### Filter graph

In [None]:
# print(exp)
exp = "exp201"
file = open("experiments/output/{}/parameters.json".format(exp))
params = json.load(file)

exp = params["exp"]
# print("Exp:\t\t", exp)

methods = params["methods"]
print("Methods:\t", methods)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

control = params["control"]
print("Control:\t", control)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

groups = params["groups"]
print("Groups:\t\t", groups)

In [None]:
edges_count = []
label_x = []
label_y = []
range_ = range(180, 184) # change

for e in range_:
	exp = "exp" + str(e)
	file = open("experiments/output/{}/parameters.json".format(exp))
	params = json.load(file)
	
	methods = params["methods"]
	# print("Methods:\t", methods)

	data_variations = params["data_variations"]
	# print("Data variations:", data_variations)
	
	dimension = params["dimension"]
	# print("Dimension:", dimension)
	
	groups_id = params["groups_id"]
	# print("Groups id:\t", groups_id)
	
	for i in range(len(data_variations)):
		label_x = []
		temp = []
		for j in range(len(methods)):
			for k in range(len(groups_id)):
				# print(methods[j], groups_id[k], data_variations[i])
				df_edges_filter_weight_filter = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}.csv".format(exp, methods[j], groups_id[k], data_variations[i]))
				df_edges_filter_weight_filter

				G = nx.from_pandas_edgelist(df_edges_filter_weight_filter, source="source", target="target", edge_attr=["weight"], create_using=nx.Graph())
				# graph_partial_detail(G, edges=True)
				SG = G.subgraph([0, 1, 2, 3, 4, 5])
				temp.append(SG.number_of_edges())
				label_x.append("{}-{}".format(methods[j], groups_id[k]))
		
		label_y.append("{}-{}-{}".format(e, data_variations[i], dimension))
		edges_count.append(temp)

In [None]:
fig = go.Figure(data=go.Heatmap( 
				z=np.array(edges_count).T,
				x=label_y,
				y=label_x,
				xgap=1,
				ygap=1,
				hoverongaps=True,
				colorscale="Plasma")) # RdBu, Plasma
fig = fig.update_traces(text=np.array(edges_count).T, texttemplate="%{text}", hovertemplate=None)
fig.update_layout(
	autosize=False,
	width=30 * len(label_y),
	height=30 * len(label_x),
)
fig.show()

In [None]:
exp_num = "exp155" # change

results = []

for group in groups_id:
	df_common_edges = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}.csv".format(exp_num, "greedy", group, "none"))
	G = nx.from_pandas_edgelist(df_common_edges, edge_attr=["weight"])
	results.append(["greedy", group, "none", G.number_of_nodes(), G.number_of_edges()])
	
for method in methods:
	for group in groups_id:
		for data_variation in data_variations:
			df_common_edges = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}.csv".format(exp_num, method, group, data_variation))
			G = nx.from_pandas_edgelist(df_common_edges, edge_attr=["weight"])
			results.append([method, group, data_variation, G.number_of_nodes(), G.number_of_edges()])

df_results = pd.DataFrame(results, columns=["Method", "Group", "Data var.", "Num. nodes", "Num. edges"])
# df_results.to_csv("experiments/output/{}/common_edges/details.csv".format(exp), index=False)
# df_results.replace("greedy", "baseline", inplace=True)
df_results

### Plot node-embeddings

In [None]:
exp = "exp5"
file = open("experiments/output/{}/parameters.json".format(exp))
params = json.load(file)

exp = params["exp"]

print("Exp:\t\t", exp)

methods = params["methods"]
print("Methods:\t", methods)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

controls = params["controls"]
print("Control:\t", controls)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

groups = params["groups"]
print("Groups:\t\t", groups)

In [None]:
def plot_embedding_3d(df_embeddings, labels, reduction="pca", embedding="node", title="", title_legend="", save=False):
	# print(df_embeddings)
	# print(labels)
	if df_embeddings.shape[1] > 3:
		if reduction == "pca":
			df_embeddings_red = PCA(n_components=3).fit_transform(df_embeddings)
		elif reduction == "tsne":
			df_embeddings_red = TSNE(n_components=3).fit_transform(df_embeddings)
		elif reduction == "umap":
			df_embeddings_red = umap.UMAP().fit_transform(df_embeddings)
	else:
		df_embeddings_red = df_embeddings.values
	
	fig = plt.figure(figsize=(6, 6))
	ax = fig.add_subplot(projection="3d")
	
	df_embeddings = pd.DataFrame(df_embeddings_red)
	df_embeddings["labels"] = labels.values
	
	unique_labels = np.unique(labels)
	for i, label in enumerate(unique_labels):
		df_embeddings_filter = df_embeddings[df_embeddings["labels"] == label]

		x = df_embeddings_filter.iloc[:, 0]
		y = df_embeddings_filter.iloc[:, 1]
		z = df_embeddings_filter.iloc[:, 2]
		
		# new_cmap = matplotlib.colors.ListedColormap(plt.cm.tab10.colors[i: i + 1])
		if embedding == "node":
			color = matplotlib.colors.ListedColormap(plt.cm.Dark2.colors[i: i + 1]).colors[0]
			# points = ax.scatter(x, y, z, s=100, c=labels, alpha=0.5, cmap=new_cmap , marker=".") # , edgecolors="black", linewidth=0.5)
			points = ax.scatter(x, y, z, s=100, alpha=0.5, color=color, marker=".", label="Br{}".format(label + 1))
		elif embedding == "edge":
			color = matplotlib.colors.ListedColormap(plt.cm.Dark2.colors[i: i + 1]).colors[0]
			# points = ax.scatter(x, y, z, s=100, c=labels, alpha=0.5, cmap=new_cmap , marker=".") # , edgecolors="black", linewidth=0.5)
			points = ax.scatter(x, y, z, s=100, alpha=0.5, color=color, marker=".", label="Br{}".format(label + 1))
		elif embedding == "outlier":
			# cmap = matplotlib.colormaps.get_cmap("coolwarm", len(unique_labels))
			colors = plt.get_cmap("coolwarm")(np.linspace(0, 1, len(unique_labels)))
			points = ax.scatter(x, y, z, s=100, alpha=0.5, c=[colors[label]], marker=".", label=["inliers", "outliers"][label]) #, edgecolors="black", linewidth=0.5)
	
	# ax.set_xlabel("X")
	# ax.set_ylabel("Y")
	# ax.set_zlabel("Z")
	if not save:
		""" plt.title(title)
		plt.legend(title=title_legend, ncol=1, loc=0, bbox_to_anchor=(1, 0.95)) """
		# fig.colorbar(points, ax=ax, shrink=0.4, aspect=8) # bar
		# ax.legend(["Biological rep.: {}".format(subgroup) for subgroup in np.unique(labels)])
		# print(*points.legend_elements())
		# plt.legend(*points.legend_elements(), bbox_to_anchor=(1.05, 1), loc=2)
	if save:
		# plt.legend(title=title_legend, ncol=1, loc=0, bbox_to_anchor=(1, 0.95)) # ncol=len(unique_labels)
		plt.savefig("experiments/plots/{}.pdf".format(title), format="pdf", bbox_inches="tight") # change    
	plt.show()
	return ax

In [None]:
iteration = 1 # change
save = False # change

for e in [5]: # change, experiment
	exp = "exp" + str(e)
	for method in methods[:]: # change
		for group_id in groups_id[:]: # change
			for data_variation in data_variations: # change
				df_node_embeddings_concat = pd.DataFrame()
				if data_variation == "none":
					k = 0
					for subgroup_id in subgroups_id[group_id]:
						print(exp, method, group_id, subgroup_id, iteration)
						df_node_embeddings = pd.read_csv("experiments/output/{}/node_embeddings/node-embeddings_{}_{}_{}_{}.csv".format(exp, method, group_id, subgroup_id, iteration), index_col=0)
						
						df_node_embeddings["subgroup"] = [k] * len(df_node_embeddings)
						df_node_embeddings_concat = pd.concat([df_node_embeddings_concat, df_node_embeddings])
						k += 1
				else:
					print(exp, method, group_id, data_variation, iteration)
					df_nodes = pd.read_csv("experiments/output/{}/preprocessing/graphs_data/nodes_data_{}_{}.csv".format(exp, group_id, data_variation), index_col=0, usecols=[0, 1])
					df_nodes["subgroup"] = df_nodes["id"].apply(lambda x: ord(x[0]) - 65)

					df_node_embeddings = pd.read_csv("experiments/output/{}/node_embeddings/node-embeddings_{}_{}_{}_{}.csv".format(exp, method, group_id, data_variation, iteration), index_col=0)
					
					df_node_embeddings["subgroup"] = df_nodes["subgroup"]
					df_node_embeddings_concat = pd.concat([df_node_embeddings_concat, df_node_embeddings])
				
				plot_embedding_3d(df_node_embeddings_concat.iloc[:, :-1], df_node_embeddings_concat.iloc[:, -1],
									reduction="pca", embedding="node", title="node-embeddings_{}-{}-{}-{}".format(exp, method, group_id, data_variation), title_legend="", save=save)

### Plot edge-embeddings or outlilers

In [None]:
exp = "exp911"
file = open("experiments/output/{}/parameters.json".format(exp))
params = json.load(file)

exp = params["exp"]
print("Exp:\t\t", exp)

methods = params["methods"]
print("Methods:\t", methods)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

control = params["control"]
print("Control:\t", control)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

groups = params["groups"]
print("Groups:\t\t", groups)

In [None]:
import plotly.graph_objects as go

def plot_embeddings_3d_plotly(df_embeddings, color):
	fig = go.Figure(data=[go.Scatter3d(
		x=df_embeddings.iloc[:, 0],
		y=df_embeddings.iloc[:, 1],
		z=df_embeddings.iloc[:, 2],
		mode="markers",
		marker=dict(
			size=6,
			color=color, # set color to an array/list of desired values
			colorscale="Portland",   # choose a colorscale
			opacity=0.8
		)
	)])

	# tight layout
	fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
	fig.show()

In [None]:
iteration = 1 # change

plot_option = "outlier" # change: edge, outlier
save = True

for e in [911]: # change
	exp = "exp" + str(e)
	for method in methods[:]: # change
		for group_id in groups_id[:]: # change
			for data_variation in data_variations: # change
				print(exp, method, group_id, data_variation, iteration)
				df_edge_embeddings_concat = pd.read_csv("experiments/output/{}/edge_embeddings/edge-embeddings_concat_outlier_{}_{}_{}_{}.csv".format(exp, method, group_id, data_variation, iteration))
				
				# plot without synthetic edges
				df_edge_embeddings_concat = df_edge_embeddings_concat[df_edge_embeddings_concat["subgroup"] != -1]
				
				if plot_option == "edge":
					# edge-embeddings
					plot_embedding_3d(df_edge_embeddings_concat.iloc[:, 2:-2], df_edge_embeddings_concat.iloc[:, -2],
									reduction="pca", embedding=plot_option, title="edge-embeddings_{}-{}-{}-{}".format(exp, method, group_id, data_variation), title_legend="", save=save)
				elif  plot_option == "outlier":
					# outliers
					plot_embedding_3d(df_edge_embeddings_concat.iloc[:, 2:-2], df_edge_embeddings_concat.iloc[:, -1],
										reduction="pca", embedding=plot_option, title="outliers_{}-{}-{}-{}".format(exp, method, group_id, data_variation), title_legend="", save=save)

### Average runtimes

In [None]:
# print(exp)
exp = "exp1101"
file = open("experiments/output/{}/parameters.json".format(exp))
params = json.load(file)

exp = params["exp"]
# print("Exp:\t\t", exp)

methods = params["methods"]
print("Methods:\t", methods)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

control = params["control"]
print("Control:\t", control)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

groups = params["groups"]
print("Groups:\t\t", groups)

In [None]:
edges_count = []
label_x = []
label_y = []
runtime_node_embedding = np.zeros(len(methods) * 3)
runtime_edge_embedding = np.zeros(len(methods) * 3)

dimensions = [2, 3, 4, 8, 16, 32, 64, 128, 256, 512] # change [3, 4, 8, 16, 32, 64, 128, 256, 512]
iterations = 10 # change 1, 3, 5, 10
range_ = range(1101, 1201) # change exp
k = 0
df_runtimes = pd.DataFrame()

for i, e in enumerate(range_, 1):
	exp = "exp" + str(e)
	df_process = pd.read_csv("experiments/output/{}/common_edges/runtimes.csv".format(exp))
	runtime_node_embedding += df_process["Runtime node embedding"]
	runtime_edge_embedding += df_process["Runtime edge embedding"]
	
	if i % iterations == 0:
		runtime_node_embedding = runtime_node_embedding / iterations
		runtime_edge_embedding = runtime_edge_embedding / iterations
		# print(runtime_node_embedding)        
		df_runtimes.insert(k, "Node embedding ({})".format(dimensions[k]), runtime_node_embedding)
		df_runtimes["Edge embedding ({})".format(dimensions[k])] = runtime_edge_embedding
		
		k += 1
		runtime_node_embedding = np.zeros(len(methods) * 3)
		runtime_edge_embedding = np.zeros(len(methods) * 3)
		
df_runtimes.insert(0, "Method", df_process["Method"])
df_runtimes.insert(1, "Data variation", df_process["Data variation"])
df_runtimes

In [None]:
# nodes + edges
""" fig = go.Figure(data=go.Heatmap( 
	z=df_runtimes.iloc[:, 2:],
	x=df_runtimes.iloc[:, 2:].columns,
	y=df_runtimes["Method"] + "-" +  df_runtimes["Data variation"],
	xgap=1,
	ygap=1,
	hoverongaps=True,
	colorscale="Plasma")) # RdBu, Plasma
fig = fig.update_traces(text=df_runtimes.iloc[:, 2:].round(2), texttemplate="%{text}", hovertemplate=None)
fig.update_layout(
	autosize=False,
	width=60 * len(df_runtimes.iloc[:, 2:].columns),
	height=30 * len(df_runtimes["Method"]),
)
fig.show() """

In [None]:
""" from sklearn.preprocessing import FunctionTransformer
transformer = FunctionTransformer(np.log10, validate=True) """

In [None]:
""" from plotly.subplots import make_subplots
import plotly.graph_objects as go

fig = make_subplots(rows=1, cols=2, shared_yaxes=False)

fig.add_trace(
	go.Heatmap( 
		z=transformer.transform(df_runtimes.iloc[:, 2:9]),
		x=df_runtimes.iloc[:, 2:9].columns,
		y=df_runtimes["Method"] + "-" +  df_runtimes["Data variation"],
		xgap=1,
		ygap=1,
		hoverongaps=True),
	row=1, col=1
)

fig.add_trace(
	go.Heatmap( 
		z=df_runtimes.iloc[:, 9:],
		x=df_runtimes.iloc[:, 9:].columns,
		y=df_runtimes["Method"] + "-" +  df_runtimes["Data variation"],
		xgap=1,
		ygap=1,
		hoverongaps=True),
	row=1, col=2
)

fig.update_layout(height=600, width=1000, title_text="Side By Side Subplots")
# fig.update_layout(coloraxis=dict(colorscale='Bluered_r'), showlegend=False)
fig.show() """

In [None]:
df_runtimes = df_runtimes[df_runtimes["Method"].isin(["dgi-tran", "argva-base", "vgae-line", "vgae-base"])]
df_runtimes

In [None]:
df_runtimes.to_csv("experiments/run_details/runtimes_synthetic_random.csv") # change

In [None]:
# z = transformer.transform(df_runtimes.iloc[:, 2:].values)
z = df_runtimes.iloc[:, 2:].values
z

In [None]:
# nodes
""" fig = go.Figure(data=go.Heatmap( 
		z=z[:, :7], # change 5, 7
		x=df_runtimes.iloc[:, 2:9].columns, # change 2:7, 2:9
		y=df_runtimes["Method"] + "-" +  df_runtimes["Data variation"],
		xgap=1,
		ygap=1,
		hoverongaps=True,
		colorscale="Plasma")) # RdBu, Plasma
fig = fig.update_traces(text=np.round(z[:, :7], decimals=2), texttemplate="%{text}", hovertemplate=None) # change 5, 7
fig.update_layout(
	autosize=False,
	width=90 * len(df_runtimes.iloc[:, 2:9].columns), # change 2:7, 2:9
	height=40 * len(df_runtimes["Method"]),
)
fig.show() """

In [None]:
df_runtimes["Method"] = ["DGI"] * 3 + ["ARGVA"] * 3 + ["LVGAE"] * 3 + ["VGAE"] * 3
df_runtimes

In [None]:
# edges
""" fig = go.Figure(data=go.Heatmap( 
		z=z[:, 7:], # change 5, 7
		x=df_runtimes.iloc[:, 9:].columns, # change 7, 9
		y=df_runtimes["Method"] + "-" +  df_runtimes["Data variation"],
		xgap=1,
		ygap=1,
		hoverongaps=True,
		colorscale="Plasma")) # RdBu, Plasma
fig = fig.update_traces(text=np.round(z[:, 7:], decimals=2), texttemplate="%{text}", hovertemplate=None) # change 5, 7
fig.update_layout(
	autosize=False,
	width=90 * len(df_runtimes.iloc[:, 9:].columns), # change 7, 9
	height=40 * len(df_runtimes["Method"]),
)
fig.show() """

In [None]:
# Runtimes node-embeddings
import matplotlib.pyplot as plt 
import numpy as np 

colors = ["#FF00FF", "#3FFF00", "#00FFFF", "#FFF700", "#FF0000", "#0000FF", "#006600", 
	'#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', 'black',"gray"]
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
k = 0

x = ["2", "3", "4", "8", "16", "32", "64", "128", "256", "512"] # change ["3", "4", "8", "16", "32", "64", "128"]
end = 10 # change 5:32, 7:128, 10: 512
name = "mutant_random" # change

plt.figure(figsize=(6, 4))
for i in range(0, len(df_runtimes), 3):
	# print(df_runtimes.iloc[i, :].values)

	plt.plot(x, z[i, :end], label="{}-{}".format(df_runtimes.iloc[i, 0], df_runtimes.iloc[i, 1]), linestyle="-", color=colors[k], linewidth=1)
	plt.plot(x, z[i + 1, :end], label="{}-{}".format(df_runtimes.iloc[i + 1, 0], df_runtimes.iloc[i + 1, 1]), linestyle="--", color=colors[k], linewidth=1)
	plt.plot(x, z[i + 2, 0:end], label="{}-{}".format(df_runtimes.iloc[i + 2, 0], df_runtimes.iloc[i + 2, 1]), linestyle=":", color=colors[k], linewidth=1)
	k += 1

# plt.legend(bbox_to_anchor=(1, 0.45), loc='upper right', fontsize="9", ncol=2) # cancer
# plt.legend(bbox_to_anchor=(1, 0.9), loc='upper right', fontsize="9", ncol=2) # mutant

plt.xticks(x)
plt.xlabel("Dimensions")
plt.ylabel("Runtimes (sec.)")

plt.savefig("experiments/plots/runtimes_node_embeddings_{}.pdf".format(name), format="pdf", bbox_inches="tight") # change
plt.show()

In [None]:
# Runtimes edge-embeddings
import matplotlib.pyplot as plt 
import numpy as np 

colors = ["#FF00FF", "#3FFF00", "#00FFFF", "#FFF700", "#FF0000", "#0000FF", "#006600", 
	'#00CC96', '#AB63FA', '#FFA15A', '#19D3F3', '#FF6692', '#B6E880', '#FF97FF', 'black',"gray"]
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
k = 0

x = ["2", "3", "4", "8", "16", "32", "64", "128", "256", "512"] # change ["3", "4", "8", "16", "32", "64", "128", "256", "512"]
end = 10 # change 5:32, 7:128, 10: 512

plt.figure(figsize=(6, 4))
for i in range(0, len(df_runtimes), 3):
	# print(df_runtimes.iloc[i, :].values)

	plt.plot(x, z[i, end:], label="{}-{}".format(df_runtimes.iloc[i, 0], df_runtimes.iloc[i, 1]), linestyle="-", color=colors[k], linewidth=1)
	plt.plot(x, z[i + 1, end:], label="{}-{}".format(df_runtimes.iloc[i + 1, 0], df_runtimes.iloc[i + 1, 1]), linestyle="--", color=colors[k], linewidth=1)
	plt.plot(x, z[i + 2, end:], label="{}-{}".format(df_runtimes.iloc[i + 2, 0], df_runtimes.iloc[i + 2, 1]), linestyle=":", color=colors[k], linewidth=1)
	k += 1

# plt.legend(bbox_to_anchor=(1, 1.02), ncol=1)
plt.legend(fontsize="9", ncol=2)
plt.xticks(x)
plt.xlabel("Dimensions")
plt.ylabel("Runtimes (sec.)")

plt.savefig("experiments/plots/runtimes_edge_embeddings_{}.pdf".format(name), format="pdf", bbox_inches="tight") # change
plt.show()

### Average count nodes/edges

In [None]:
edges_count = []
label_x = []
label_y = []
range_ = range(1101, 1201) # change

for e in range_:
	exp = "exp" + str(e)
	file = open("experiments/output/{}/parameters.json".format(exp))
	params = json.load(file)
	
	methods = params["methods"]
	# print("Methods:\t", methods)

	data_variations = params["data_variations"]
	# print("Data variations:", data_variations)
	
	dimension = params["dimension"]
	# print("Dimension:", dimension)
	
	groups_id = params["groups_id"]
	# print("Groups id:\t", groups_id)
	
	for i in range(len(data_variations)):
		label_x = []
		temp = []
		for j in range(len(methods)):
			for k in range(len(groups_id)):
				# print(methods[j], groups_id[k], data_variations[i])
				df_edges_filter_weight_filter = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}.csv".format(exp, methods[j], groups_id[k], data_variations[i]))
				df_edges_filter_weight_filter

				G = nx.from_pandas_edgelist(df_edges_filter_weight_filter, source="source", target="target", edge_attr=["weight"], create_using=nx.Graph())
				# graph_partial_detail(G, edges=True)
				# SG = G.subgraph([0, 1, 2, 3, 4, 5])
				temp.append(G.number_of_edges())
				label_x.append("{}-{}".format(methods[j], groups_id[k]))
		
		label_y.append("{}-{}-{}".format(e, data_variations[i], dimension))
		edges_count.append(temp)

df_edges_count = pd.DataFrame(np.array(edges_count).T, index=label_x, columns=label_y)
df_edges_count

In [None]:
""" fig = go.Figure(data=go.Heatmap( 
				z=df_edges_count.values,
				x=df_edges_count.columns,
				y=df_edges_count.index,
				xgap=1,
				ygap=1,
				hoverongaps=True,
				colorscale="Plasma")) # RdBu, Plasma
fig = fig.update_traces(text=df_edges_count.values, texttemplate="%{text}", hovertemplate=None)
fig.update_layout(
	autosize=False,
	width=60 * len(df_edges_count.columns),
	height=30 * len(df_edges_count.index),
)
fig.show() """

Calculate average

In [None]:
# average

df_edges_count_avg = pd.DataFrame()
dimensions = [2, 3, 4, 8, 16, 32, 64, 128, 256, 512] # change 3, 4, 8, 16, 32, 64, 128, 256, 512
iterations = 10 # change 3, 5, 10
k = 0

a = np.zeros(len(methods) * len(groups_id))
b = np.zeros(len(methods) * len(groups_id))
c = np.zeros(len(methods) * len(groups_id))

for i in range(1, len(df_edges_count.columns) + 1, 3):
	# print(i - 1, i, i + 1)
	a += df_edges_count.iloc[:, i - 1]
	b += df_edges_count.iloc[:, i]
	c += df_edges_count.iloc[:, i + 1]
	
	if (i + 2) % (3 * iterations) == 0:
		a = a / iterations
		b = b / iterations
		c = c / iterations
		# print(k)
		df_edges_count_avg["none-{}".format(dimensions[k])] = a
		df_edges_count_avg["str-{}".format(dimensions[k])] = b
		df_edges_count_avg["dyn-{}".format(dimensions[k])] = c
		
		k += 1
		a = np.zeros(len(methods) * len(groups_id))
		b = np.zeros(len(methods) * len(groups_id))
		c = np.zeros(len(methods) * len(groups_id))

df_edges_count_avg

In [None]:
# save

df_edges_count_avg.to_csv("experiments/run_details/edges_avg_synthetic_random.csv") # change

In [None]:
""" fig = go.Figure(data=go.Heatmap( 
				z=df_edges_count_avg.values,
				x=df_edges_count_avg.columns,
				y=df_edges_count_avg.index,
				xgap=1,
				ygap=1,
				hoverongaps=True,
				colorscale="Plasma")) # RdBu, Plasma
fig = fig.update_traces(text=df_edges_count_avg.round(0).values, texttemplate="%{text}", hovertemplate=None)
fig.update_layout(
	autosize=False,
	width=60 * len(df_edges_count_avg.columns),
	height=40 * len(df_edges_count_avg.index),
)
fig.show() """

In [None]:
df_edges_count_avg.iloc[0::3, :].round(2)

Get values by properties

In [None]:
# get number edges by properties (mutant, cancer)

a = df_edges_count_avg.iloc[0::3, :].round(2).values # pck1
b = df_edges_count_avg.iloc[1::3, :].round(2).values # zwf1
c = df_edges_count_avg.iloc[2::3, :].round(2).values # WT
print(a.tolist())
print(b.tolist())
print(c.tolist())
print()

# add greedy
""" a = np.insert(a, len(a), [73]*len(a[0]), axis=0)
b = np.insert(b, len(b), [1777]*len(b[0]), axis=0)
c = np.insert(c, len(c), [5]*len(c[0]), axis=0)
print(a)
print(b)
print(c) """

In [None]:
# get number edges by properties (new cancer)

a = df_edges_count_avg.iloc[0::9, :].round(2).values 
b = df_edges_count_avg.iloc[1::9, :].round(2).values
c = df_edges_count_avg.iloc[2::9, :].round(2).values
print(a.tolist())
print(b.tolist())
print(c.tolist())

In [None]:
# get number edges by properties (leaf, synthetic)

a = df_edges_count_avg.iloc[0::2, :].round(2).values # new
b = df_edges_count_avg.iloc[1::2, :].round(2).values # old
print(a.tolist())
print(b.tolist())

# add greedy
""" a = np.insert(a, len(a), [8365]*len(a[0]), axis=0)
b = np.insert(b, len(b), [7618]*len(b[0]), axis=0)
print(a)
print(b) """

Format names

In [None]:
# mutant, cancer, leaf, synthetic
columns = ["none-2", "str-2", "dyn-2", "none-3", "str-3", "dyn-3", "none-4", "str-4", "dyn-4", "none-8", "str-8", "dyn-8", "none-16", "str-16", "dyn-16", "none-32",
			"str-32", "dyn-32", "none-64", "str-64", "dyn-64", "none-128", "str-128", "dyn-128", "none-256", "str-256", "dyn-256", "none-512", "str-512", "dyn-512"]

row1 = ["DGI", "ARGVA", "LVGAE", "VGAE"] #, "Greedy"]
row2 = ["DGI", "ARGVA", "LVGAE", "VGAE"] #, "Greedy"]
row3 = ["DGI", "ARGVA", "LVGAE", "VGAE"] #, "Greedy"]

In [None]:
# mutant, reinhard
""" import matplotlib.pyplot as plt
import numpy as np

ax1 = plt.subplot(3, 1, 1)
ax1.set_xticks(np.arange(len(columns)), labels="")
ax1.set_yticks(np.arange(len(row1)), labels=row1)
plt.imshow(a, cmap="coolwarm_r")

ax2 = plt.subplot(3 ,1, 2)
ax2.set_xticks(np.arange(len(columns)), labels="")
ax2.set_yticks(np.arange(len(row2)), labels=row2)
plt.imshow(b, cmap="coolwarm_r")

ax3 = plt.subplot(3 ,1, 3)
ax3.set_xticks(np.arange(len(columns)), labels=columns)
ax3.set_yticks(np.arange(len(row3)), labels=row3)
plt.setp(ax3.get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
plt.imshow(c, cmap="coolwarm_r")

# plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
cax = plt.axes((0.92, 0.265, 0.02, 0.468)) # mutant
# cax = plt.axes((0.854, 0.265, 0.02, 0.468)) # reinhard
plt.colorbar(cax=cax)

plt.show() """

In [None]:
# mutant, reinhard

""" import matplotlib.pyplot as plt
import numpy as np

fig, axs = plt.subplots(3, 1, figsize=(6, 3))
# plt.subplots_adjust(left=0.1, right=0.9, bottom=0.1, top=0.9, wspace=0.2, hspace=0.4)
# fig.tight_layout()

ax = axs[0]
pcm = ax.pcolormesh(a, cmap="coolwarm_r")
ax.set_xticks(np.arange(len(columns)), labels="")
ax.set_yticks(np.arange(len(row1)), labels=row1)

ax = axs[1]
pcm = ax.pcolormesh(b, cmap="coolwarm_r")
ax.set_xticks(np.arange(len(columns)), labels="")
ax.set_yticks(np.arange(len(row2)), labels=row2)

ax = axs[2]
pcm = ax.pcolormesh(c, cmap="coolwarm_r")
ax.set_xticks(np.arange(len(columns)), labels=columns)
ax.set_yticks(np.arange(len(row3)), labels=row3)
plt.setp(ax.get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")

fig.colorbar(pcm, ax=axs[:], shrink=0.6)
# plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
# cax = plt.axes((0.92, 0.265, 0.02, 0.468)) # mutant
# cax = plt.axes((0.854, 0.265, 0.02, 0.468)) # reinhard
# plt.colorbar(cax=cax)

plt.show() """

Plot heatmap

In [None]:
# replace outliers values

""" print(a)
b[3][1] = b.min()
b[3][2] = b.min()
print(a) """

In [None]:
print(a.min(), a.max())

In [None]:
# mutant and cancer (Heatmap)
import matplotlib.pyplot as plt
import numpy as np

name = "mutant_random"
# Create a figure and subplots
fig, axes = plt.subplots(3, 1, figsize=(10, 4.5)) # mutant
# Adjust layout
# plt.tight_layout()

# Plot the first image
ax0 = axes[2].imshow(a, cmap="Blues", interpolation="none", vmin=round(a.min()), vmax=round(a.max()))
axes[2].set_xticks(np.arange(len(columns)), labels=columns)
axes[2].set_yticks(np.arange(len(row1)), labels=row1)
axes[2].set_title("pck1")
plt.setp(axes[2].get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
# axes[0].axis('off')
cbar = fig.colorbar(ax0, ax=axes[2], ticks=[round(a.min()), round((a.min() + a.max()) / 2), round(a.max())], pad=0.01)
cbar.set_label("Num. edges")

# Plot the second image
ax1 = axes[1].imshow(b, cmap="Blues", interpolation="none", vmin=round(b.min()), vmax=round(b.max()))
axes[1].set_xticks(np.arange(len(columns)), labels="")
axes[1].set_yticks(np.arange(len(row2)), labels=row2)
axes[1].set_title("zwf1")
# axes[1].axis('off')
cbar = fig.colorbar(ax1, ax=axes[1], ticks=[round(b.min()), round((b.min() + b.max()) / 2), round(b.max())], pad=0.01)
cbar.set_label("Num. edges")

# Plot the third image
ax2 = axes[0].imshow(c, cmap="Blues", interpolation="none", vmin=round(c.min()), vmax=round(c.max()))
axes[0].set_xticks(np.arange(len(columns)), labels="")
axes[0].set_yticks(np.arange(len(row3)), labels=row3)
axes[0].set_title("WT")
# axes[2].axis('off')
cbar = fig.colorbar(ax2, ax=axes[0], ticks=[round(c.min()), round((c.min() + c.max()) / 2), round(c.max())], pad=0.01)
cbar.set_label("Num. edges")

# Create a single colorbar for all subplots
# cbar = fig.colorbar(ax2, ax=axes.ravel().tolist(), fraction=0.02, pad=0.02)
# cbar.set_label("Num. of common edges")

""" cax = plt.axes((0.888, 0.11, 0.02, 0.77)) # change mutant
# cax = plt.axes((0.735, 0.11, 0.02, 0.77)) # change cancer
cbar = fig.colorbar(ax1, cax=cax)
cbar.set_label("Common edges") """

# Show the plot
plt.savefig("experiments/plots/common_edges_{}.pdf".format(name), format="pdf", bbox_inches="tight")
plt.show()

In [None]:
# mutant and cancer (Line)
import matplotlib.pyplot as plt
import numpy as np

# Create a figure and subplots
fig, axes = plt.subplots(3, 1, figsize=(16, 10)) # mutant
# Adjust layout
# plt.tight_layout()
x = np.arange(len(columns))
for i, y in enumerate(a):
  # Plot the first image
  ax0 = axes[0].plot(x, y, label=row1[i], marker=".")
axes[0].set_xticks(np.arange(len(columns)), labels="")
  # axes[0].set_yticks(np.arange(len(row1)), labels=row1)
axes[0].set_title("pck1")
axes[0].set_ylabel("# edges")
# axes[0].axis('off')

# Plot the second image
for i, y in enumerate(b):
  ax1 = axes[1].plot(x, y, label=row2[i], marker=".")
axes[1].set_xticks(np.arange(len(columns)), labels="")
  # axes[1].set_yticks(np.arange(len(row2)), labels=row2)
axes[1].set_title("zwf1")
axes[1].set_ylabel("# edges")
# axes[1].axis('off')

# Plot the third image
for i, y in enumerate(c):
  ax2 = axes[2].plot(x, y, label=row3[i], marker=".")
axes[2].set_xticks(np.arange(len(columns)), labels=columns)
# axes[2].set_yticks(np.arange(len(row3)), labels=row3)
plt.setp(axes[2].get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
axes[2].set_title("WT")
axes[2].set_ylabel("# edges")
# axes[2].axis('off')

# Create a single colorbar for all subplots
# cbar = fig.colorbar(ax2, ax=axes.ravel().tolist(), fraction=0.02, pad=0.02)
# cbar.set_label("Num. of common edges")

""" cax = plt.axes((0.888, 0.11, 0.02, 0.77)) # change mutant
# cax = plt.axes((0.735, 0.11, 0.02, 0.77)) # change cancer
cbar = fig.colorbar(ax1, cax=cax)
cbar.set_label("Common edges") """
plt.legend() 
# Show the plot
plt.savefig("experiments/plots/common_edges_mutant.pdf", format="pdf", bbox_inches="tight") # change
plt.show()

In [None]:
# leaf

""" import matplotlib.pyplot as plt
import numpy as np

ax1 = plt.subplot(3, 1, 1)
ax1.set_xticks(np.arange(len(columns)), labels="")
ax1.set_yticks(np.arange(len(row1)), labels=row1)
plt.imshow(a, cmap="coolwarm_r")

ax2 = plt.subplot(3 ,1, 2)
ax2.set_xticks(np.arange(len(columns)), labels=columns)
ax2.set_yticks(np.arange(len(row2)), labels=row2)
plt.setp(ax2.get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
plt.imshow(b, cmap="coolwarm_r")

# plt.subplots_adjust(bottom=0.1, right=0.8, top=0.9)
cax = plt.axes((0.92, 0.397, 0.02, 0.468))
plt.colorbar(cax=cax)

plt.show() """

In [None]:
# leaf, sythetic (Heatmap)
import matplotlib.pyplot as plt
import numpy as np

name = "synthetic_random_remove" # change
# Create a figure and subplots
fig, axes = plt.subplots(2, 1, figsize=(10, 3))
# Adjust layout
# plt.tight_layout()

# Plot the first image
ax0 = axes[0].imshow(a, cmap="Blues", interpolation="none", vmin=round(a.min()), vmax=round(a.max()))
axes[0].set_xticks(np.arange(len(columns)), labels="")
axes[0].set_yticks(np.arange(len(row1)), labels=row1)
axes[0].set_title("phenotype1") # phenotype1, new
# axes[0].axis('off')
cbar = fig.colorbar(ax0, ax=axes[0], ticks=[round(a.min()), round((a.min() + a.max()) / 2), round(a.max())], pad=0.01)
cbar.set_label("Num. edges")

# Plot the third image
ax1 = axes[1].imshow(b, cmap="Blues", interpolation="none", vmin=round(b.min()), vmax=round(b.max()))
axes[1].set_xticks(np.arange(len(columns)), labels=columns)
axes[1].set_yticks(np.arange(len(row2)), labels=row2)
plt.setp(axes[1].get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
axes[1].set_title("phenotype2") # phenotype2, old
# axes[1].axis('off')
cbar = fig.colorbar(ax1, ax=axes[1], ticks=[round(b.min()), round((b.min() + b.max()) / 2), round(b.max())], pad=0.01)
cbar.set_label("Num. edges")

# Create a single colorbar for all subplots
# cbar = fig.colorbar(ax1, ax=axes.ravel().tolist(), fraction=0.02, pad=0.02, shrink=1)
# cbar.set_label("Num. of common edges")

""" cax = plt.axes((0.873, 0.11, 0.02, 0.77))
cbar = fig.colorbar(ax1, cax=cax)
cbar.set_label("Common edges") """

# Show the plot
plt.savefig("experiments/plots/common_edges_{}.pdf".format(name), format="pdf", bbox_inches="tight")
plt.show()

### Ranking

In [None]:
dataset = "mutant" # change

list_df_edges = []

df_edges_count_avg = pd.read_csv("experiments/run_details/edges_avg_{}.csv".format(dataset), index_col=0)

a = df_edges_count_avg.iloc[0::3, :].round(2) # pck1
b = df_edges_count_avg.iloc[1::3, :].round(2) # zwf1
c = df_edges_count_avg.iloc[2::3, :].round(2) # WT

list_df_edges.append(a)
list_df_edges.append(b)
list_df_edges.append(c)
a

In [None]:
dataset = "leaf" # change

df_edges_count_avg = pd.read_csv("experiments/run_details/edges_avg_{}.csv".format(dataset), index_col=0)

a = df_edges_count_avg.iloc[0::2, :].round(2) # new
b = df_edges_count_avg.iloc[1::2, :].round(2) # old

list_df_edges.append(a)
list_df_edges.append(b)
a

In [None]:
dataset = "synthetic" # change

df_edges_count_avg = pd.read_csv("experiments/run_details/edges_avg_{}.csv".format(dataset), index_col=0)

a = df_edges_count_avg.iloc[0::2, :].round(2) # p1
b = df_edges_count_avg.iloc[1::2, :].round(2) # p2

list_df_edges.append(a)
list_df_edges.append(b)
a

In [None]:
# transform

from sklearn import preprocessing

X_sum = np.zeros(list_df_edges[0].shape)

for df_edge in list_df_edges:
	X_train = df_edge.values
	min_max_scaler = preprocessing.MinMaxScaler() # MinMaxScaler(), MaxAbsScaler()
	X_train_minmax = min_max_scaler.fit_transform(X_train)
	X_sum = X_sum + X_train_minmax

X_avg = X_sum / len(list_df_edges)
X_avg

In [None]:
df_avg = pd.DataFrame(X_avg, index=["DGI", "ARGVA", "LVGAE", "VGAE"])
df_avg

df_avg.T.describe()

### Count metabolities with name

In [None]:
# print(exp)
exp = "exp3"
file = open("experiments/output/{}/parameters.json".format(exp))
params = json.load(file)

exp = params["exp"]
# print("Exp:\t\t", exp)

methods = params["methods"]
print("Methods:\t", methods)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

control = params["control"]
print("Control:\t", control)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

groups = params["groups"]
print("Groups:\t\t", groups)

In [None]:
df_join_raw = pd.read_csv("experiments/input/{}_raw.csv".format(exp), index_col=0, usecols=[0, 1, 2])        
# df_join_raw.columns = ["mz", "name"]
df_join_raw

In [None]:
# count metabolities by name
size1_ = len(df_join_raw)
size2_ = len(df_join_raw[df_join_raw["Metabolite name"] != "Unknown"])
			
names_details = []
for method in methods: # change
	for group_id in groups_id: # change
		for data_variation in data_variations: # change
			# read edges
			df_edges = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}.csv".format(exp, method, group_id, data_variation))
			# print(method, data_variation, group_id)
			unique_nodes = np.unique(np.sort(df_edges[["source", "target"]], axis=None)) # np.sort(np.unique(df_edges[["source", "target"]].values.flatten()))
			# print(unique_nodes)
			df_nodes_raw = df_join_raw.loc[unique_nodes]
			# print(df_nodes_raw)
			
			size1 = len(df_nodes_raw)
			size2 = len(df_nodes_raw[df_nodes_raw["Metabolite name"] != "Unknown"])
			
			names_details.append([method, group_id, data_variation, size1_, size2_, size1, size2])

df_names_details = pd.DataFrame(names_details, columns=["Method", "Group", "Data var.", "Raw-nodes", "Raw-nodes-name", "Filter-nodes", "Filter-nodes-name"])
df_names_details

---

### Similarity analysis

In [None]:
# print(exp)
exp = "exp22"
file = open("experiments/output/{}/parameters.json".format(exp))
params = json.load(file)

exp = params["exp"]
# print("Exp:\t\t", exp)

methods = params["methods"]
print("Methods:\t", methods)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

control = params["control"]
print("Control:\t", control)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

groups = params["groups"]
print("Groups:\t\t", groups)

In [None]:
method = methods[0] # change
print(method)
data_variation = data_variations[2] # change
print(data_variation)
group = groups[0] # change
print(group)

df_join_raw = pd.read_csv("experiments/input/{}_raw.csv".format(exp), index_col=0, usecols=[0, 1, 2])        
# df_join_raw.columns = ["mz", "name"]
df_join_raw

In [None]:
df_change = pd.read_csv("experiments/output/{}/changes/changes_edges_log2_{}_{}_{}_{}.csv".format(exp, method, group[0], group[1], data_variation))
# G = nx.from_pandas_edgelist(df_change_filter, edge_attr=["label"], create_using=nx.DiGraph())
df_change

In [None]:
# 
df_change["significant"].value_counts()

In [None]:
d = df_change[((df_change["significant"] == "*"))]
d["label"].value_counts()

In [None]:
d = df_change[((df_change["significant"] != "*"))]
d["label"].value_counts()

In [None]:
# new filter

""" df_change_filter = df_change[((df_change["significant"] == "*") & (df_change["label"].str[0] != df_change["label"].str[1])) | 
							((df_change["significant"] == "-") & (df_change["label"].str[0] == df_change["label"].str[1]))] """
df_change_filter = df_change[((df_change["significant"] == "*")) | 
							((df_change["significant"] == "-") & (df_change["label"].str[0] == df_change["label"].str[1]))]
df_change_filter
df_change_filter

In [None]:
# df_change[df_change["label"].str.contains("?", regex=False)]
# df_change[df_change["label"].str.contains("?", regex=False) == False]

In [None]:
# df_change_filter = df_change[df_change["p-value"] < 0.01]
df_change_filter_all = df_change[df_change["significant"] == "*"]
df_change_filter_all

In [None]:
# Union

df_change_filter_u = df_change_filter_all.copy()
df_change_filter_u

In [None]:
# Intersection

df_change_filter_i = df_change_filter_all[df_change_filter_all["label"].str.contains("?", regex=False) == False]
df_change_filter_i

In [None]:
df_change_filter = df_change_filter_u # df_change_filter_u, df_change_filter_i

In [None]:
# count nodes, edges, metabolities names
total_nodes = np.unique(np.sort(df_change_filter[["source", "target"]], axis=None))

nodes_source = np.unique(df_change_filter[df_change_filter["source2"] != "Unknown"]["source"].values)
nodes_target = np.unique(df_change_filter[df_change_filter["target2"] != "Unknown"]["target"].values)
nodes_name = np.unique(np.concatenate((nodes_source, nodes_target)))

len(total_nodes), len(nodes_name)

In [None]:
# get nodes with name

nodes_source = np.unique(df_change_filter[df_change_filter["source2"] != "Unknown"]["source"].values)
nodes_target = np.unique(df_change_filter[df_change_filter["target2"] != "Unknown"]["target"].values)
unique_nodes = np.unique(np.concatenate((nodes_source, nodes_target)))
unique_nodes

In [None]:
# df_change_filter[df_change_filter["label"].str.contains("?", regex=False)]

In [None]:
G = nx.from_pandas_edgelist(df_change_filter, source="source", target="target", edge_attr=["label"], create_using=nx.DiGraph())
nodes = sorted(list(G.nodes()))
print(nodes)

df_nodes_raw = df_join_raw.loc[nodes]
df_nodes_raw

In [None]:
# node degree

""" degrees = dict(G.degree())

sorted_nodes = sorted(degrees, key=lambda x: degrees[x], reverse=True)
sorted_nodes[:10] """

In [None]:
# filter by nodes
view_by = 1 # change [("Average Mz", 0), ("Metabolite name", 1)] 

# nodes_select = sorted([32, 51, 56, 59, 67, 78, 80]) # leaf
# nodes_select = [0, 1, 2, 3, 4, 5, 6] # mutant
# nodes_select = sorted([2979, 3030, 1205, 48, 1750, 1862]) # cancer
# nodes_select = sorted([862, 1209, 1207, 1098, 1033, 1210]) # tea
nodes_select = unique_nodes # change

df_nodes_raw_filter = df_nodes_raw.loc[nodes_select,:]
df_nodes_raw_filter

nodes, values = list(df_nodes_raw_filter.index), list(df_nodes_raw_filter.values.tolist())

SG = G.subgraph(nodes)
print(nodes, SG.nodes())
print(SG.edges())

# SG = nx.relabel_nodes(SG, id_mz)
edge_labels = nx.get_edge_attributes(SG, "label")
edge_labels

canvas = algorithmx.jupyter_canvas()
canvas.size((600, 600))
canvas.edgelayout("symmetric").edgelength(85)
canvas.label("title").add({"text": "{} --> {}".format(group[0], group[1])})

canvas.nodes(nodes).add({
	"color": "orange",
	"svgattrs": {
		"stroke-width": 2,
		"stroke": "gray"
	}
}).data(values).add(
	size=(16),
	labels=lambda d: {
		0: {"color": "black", "text": d[view_by], "size": 14}
	}
)

# Add directed edges with weight labels
canvas.edges(SG.edges).add({
	"color": "gray",
	"directed": True,
	"thickness": 2}).data(edge_labels.values()).add(
		labels=lambda label: {
			1: {"color": "black", "text": label, "size": 14},
		}
	)
canvas

In [None]:
df_change_filter[(df_change_filter["source"] == 1205) & (df_change_filter["target"] == 1862)]

In [None]:
df_nodes_raw_filter

### BioCyc

In [None]:
# form

dict_groups = {}
for group in groups:
	dict_groups["-".join(group)] = group
dict_groups

form_item_layout = Layout(
	display='flex',
	flex_flow='row',
	justify_content='space-between'
)

exp_ = Text(
	value="",
	placeholder='Type Experiment Code',
	disabled=False   
)

methods = Dropdown(
	options=["vgae", "dgi"],
	value="vgae",
	disabled=False
)
data_variations = Dropdown(
	options=["none", "str", "dyn"],
	value="none",
	disabled=False
)
groups_ = Dropdown(
	options=list(dict_groups.keys()),
	value=list(dict_groups.keys())[0],
	disabled=False
)
threshold_ratio_ = FloatSlider(
	value=0.5,
	min=-1.0,
	max=1.0,
	step=0.01,
	disabled=False,
	continuous_update=False,
	orientation='horizontal',
	readout=True,
)
views_by = Dropdown(
	options=[("Average Mz", 1), ("Metabolite name", 0)],
	value=1,
	disabled=False
)

form_items = [
	Box([Label(value="Experiment code"), exp_], layout=form_item_layout),
	Box([Label(value="Method"), methods], layout=form_item_layout),
	Box([Label(value="Data variation"), data_variations], layout=form_item_layout),
	Box([Label(value="Group"), groups_], layout=form_item_layout),
	Box([Label(value="Threshold ratio"), threshold_ratio_], layout=form_item_layout),
	Box([Label(value="View by"), views_by], layout=form_item_layout),
]

form = Box(form_items, layout=Layout(
	display='flex',
	flex_flow='column',
	border='solid 1px',
	align_items='stretch',
	width='50%'
))
form

In [None]:
exp = exp_.value
method = methods.value
data_variation = data_variations.value
group = dict_groups[groups_.value]
threshold_ratio = threshold_ratio_.value
view_by = views_by.value

In [None]:
df_biocyc = pd.read_csv("experiments/output/{}/biocyc/biocyc_{}_{}_{}.csv".format(exp, method, "-".join(group), data_variation), sep="\t")
df_biocyc 

In [None]:
df_biocyc_filter = df_biocyc[df_biocyc["Ratio"].abs() > threshold_ratio].copy()
df_biocyc_filter.sort_values(by=["Alignment ID"], inplace=True)
df_biocyc_filter

In [None]:
fig = go.Figure(data=go.Heatmap(
	z=df_biocyc_filter.iloc[:, 3:5].T.values,
	y=group,
	x=list(map(str, df_biocyc_filter.iloc[:, view_by].values)),
	hoverongaps=False))
fig.show()

fig = go.Figure(data=go.Heatmap(
	z=df_biocyc_filter.iloc[:, -1:].T.values,
	y=["Ratio"],
	x=list(map(str, df_biocyc_filter.iloc[:, view_by].values)),
	hoverongaps=False))
fig.show()

### Run details (loss, embeddings plot)

In [None]:
# print(exp)
exp = "exp2004"
file = open("experiments/output/{}/parameters.json".format(exp))
params = json.load(file)

exp = params["exp"]
print("Exp:\t\t", exp)

methods = params["methods"]
print("Methods:\t", methods)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

dimension = params["dimension"]
print("Dimension:\t", dimension)

threshold_corr = params["threshold_corr"]
print("Threshold corr:\t", threshold_corr)

iterations = params["iterations"]
print("Iterations:\t", iterations)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

seeds = params["seeds"]
print("Seeds:\t\t", seeds)

Plot loss

In [None]:
# plot loss

import matplotlib.pyplot as plt
import numpy as np

iteration = 1 # change
dimensions = [2, 3, 4, 8, 16, 32, 64, 128, 256, 512] # change

fig, axs = plt.subplots(3 * 4 * len(methods), len(dimensions), figsize=(60, 160))
e = 2004 # change
for j, dim in enumerate(dimensions): # change, experiment
	exp = "exp" + str(e + j)
	i = 0
	for method in methods[:]: # change
		for group_id in groups_id[:]: # change
			for data_variation in data_variations: # change
				print(method, group_id, data_variation)

				if data_variation == "none":
					for subgroup_id in subgroups_id[group_id]:
						# print(exp, method, group_id, subgroup_id, iteration)
						name_file = "experiments/run_details/{}_{}_{}_{}_1".format(exp, method, group_id, subgroup_id)
						# embeddings = np.load("{}_embeddings.npy".format(name_file))
						losses = np.load("{}_loss.npy".format(name_file))

						x = range(1, len(losses))
						y = losses[:-1]
						axs[i, j].plot(x, y, "o-")
						axs[i, j].axvline(x=losses[-1], color="red")
						axs[i, j].grid()
						axs[i, j].set_title("{}-{}-{}{}-dim: {}".format(method, group_id, data_variation, subgroup_id, dim))
						# plt.show()
						# print(y)
						# print(i, j)
						i += 1
				else:
					name_file = "experiments/run_details/{}_{}_{}_{}_1".format(exp, method, group_id, data_variation)
					# embeddings = np.load("{}_embeddings.npy".format(name_file))
					losses = np.load("{}_loss.npy".format(name_file))

					x = range(1, len(losses))
					y = losses[:-1]
					axs[i, j].plot(x, y, "o-")
					axs[i, j].axvline(x=losses[-1], color="red")
					axs[i, j].grid()
					axs[i, j].set_title("{}-{}-{}-dim: {}".format(method, group_id, data_variation, dim))
					# plt.show()
					# print(y)
					# print(i, j)
					i += 1

Plot node-embeddings

In [None]:
iteration = 1 # change
dimensions = [2, 3, 4, 8, 16, 32, 64, 128, 256, 512] # change

fig, axs = plt.subplots(3 * 3 * len(methods), len(dimensions), figsize=(60, 160), subplot_kw=dict(projection="3d"))
e = 2010 # change
for j, dim in enumerate(dimensions): # change, experiment
	exp = "exp" + str(e + j)
	i = 0
	for method in methods[:]: # change
		for group_id in groups_id[:]: # change
			for data_variation in data_variations: # change
				df_node_embeddings_concat = pd.DataFrame()
				if data_variation == "none":
					k = 0
					for subgroup_id in subgroups_id[group_id]:
						print(exp, method, group_id, subgroup_id, iteration)
						df_node_embeddings = pd.read_csv("experiments/output/{}/node_embeddings/node-embeddings_{}_{}_{}_{}.csv".format(exp, method, group_id, subgroup_id, iteration), index_col=0)
						
						df_node_embeddings["subgroup"] = [k] * len(df_node_embeddings)
						df_node_embeddings_concat = pd.concat([df_node_embeddings_concat, df_node_embeddings])
						k += 1
				else:
					print(exp, method, group_id, data_variation, iteration)
					df_nodes = pd.read_csv("experiments/output/{}/preprocessing/graphs_data/nodes_data_{}_{}.csv".format(exp, group_id, data_variation), index_col=0, usecols=[0, 1])
					df_nodes["subgroup"] = df_nodes["id"].apply(lambda x: ord(x[0]) - 65)

					df_node_embeddings = pd.read_csv("experiments/output/{}/node_embeddings/node-embeddings_{}_{}_{}_{}.csv".format(exp, method, group_id, data_variation, iteration), index_col=0)
					
					df_node_embeddings["subgroup"] = df_nodes["subgroup"]
					df_node_embeddings_concat = pd.concat([df_node_embeddings_concat, df_node_embeddings])
    
				# plot
				df_embeddings, labels = df_node_embeddings_concat.iloc[:, :-1], df_node_embeddings_concat.iloc[:, -1],
				if df_embeddings.shape[1] > 3:
					df_embeddings_red = PCA(n_components=3).fit_transform(df_embeddings)
					# df_embeddings_red = TSNE(n_components=3).fit_transform(df_embeddings)
					# df_embeddings_red = umap.UMAP().fit_transform(df_embeddings)
				else:
					df_embeddings_red = df_embeddings.values
				
				df_embeddings = pd.DataFrame(df_embeddings_red)
				df_embeddings["labels"] = labels.values
				
				unique_labels = np.unique(labels)

				for k, label in enumerate(unique_labels):
					df_embeddings_filter = df_embeddings[df_embeddings["labels"] == label]

					x = df_embeddings_filter.iloc[:, 0]
					y = df_embeddings_filter.iloc[:, 1]
					z = df_embeddings_filter.iloc[:, 2]
					
					color = matplotlib.colors.ListedColormap(plt.cm.Dark2.colors[k: k + 1]).colors[0]
					# points = ax.scatter(x, y, z, s=100, c=labels, alpha=0.5, cmap=new_cmap , marker=".") # , edgecolors="black", linewidth=0.5)
					axs[i, j].scatter(x, y, z, s=100, alpha=0.5, color=color, marker=".", label="Br{}".format(label + 1))
					axs[i, j].set_title("{}-{}-{}-dim: {}".format(method, group_id, data_variation, dim))
				i += 1

Plot edge-embeddings

In [None]:
iteration = 1 # change
dimensions = [2, 3, 4, 8, 16, 32, 64, 128, 256, 512] # change
plot_option = "edge" # change edge, outlier
fig, axs = plt.subplots(3 * 3 * len(methods), len(dimensions), figsize=(60, 160), subplot_kw=dict(projection="3d"))
e = 2010 # change

for j, dim in enumerate(dimensions): # change, experiment
	exp = "exp" + str(e + j)
	i = 0
	for method in methods[:]: # change
		for group_id in groups_id[:]: # change
			for data_variation in data_variations: # change
				print(exp, method, group_id, data_variation, iteration)
				df_edge_embeddings_concat = pd.read_csv("experiments/output/{}/edge_embeddings/edge-embeddings_concat_outlier_{}_{}_{}_{}.csv".format(exp, method, group_id, data_variation, iteration))
				
				# plot without synthetic edges
				df_edge_embeddings_concat = df_edge_embeddings_concat[df_edge_embeddings_concat["subgroup"] != -1]
    
				# plot
				if plot_option == "edge":
					df_embeddings, labels = df_edge_embeddings_concat.iloc[:, 2:-2], df_edge_embeddings_concat.iloc[:, -2]
				elif plot_option == "outlier":
					df_embeddings, labels = df_edge_embeddings_concat.iloc[:, 2:-2], df_edge_embeddings_concat.iloc[:, -1]

				if df_embeddings.shape[1] > 3:
					df_embeddings_red = PCA(n_components=3).fit_transform(df_embeddings)
					# df_embeddings_red = TSNE(n_components=3).fit_transform(df_embeddings)
					# df_embeddings_red = umap.UMAP().fit_transform(df_embeddings)
				else:
					df_embeddings_red = df_embeddings.values
				
				df_embeddings = pd.DataFrame(df_embeddings_red)
				df_embeddings["labels"] = labels.values
				num_cols = len(df_embeddings.columns)
				unique_labels = np.unique(labels)
				# print(unique_labels)
				for k, label in enumerate(unique_labels):
					df_embeddings_filter = df_embeddings[df_embeddings["labels"] == label]

					x = df_embeddings_filter.iloc[:, 0]
					y = df_embeddings_filter.iloc[:, 1]
					if num_cols == 3:
						z = [0] * len(df_embeddings_filter)
					else:
						z = df_embeddings_filter.iloc[:, 2]

					if plot_option == "edge":
						color = matplotlib.colors.ListedColormap(plt.cm.Dark2.colors[k: k + 1]).colors[0]
						axs[i, j].scatter(x, y, z, s=100, alpha=0.5, color=color, marker=".", label="Br{}".format(label + 1))
						axs[i, j].set_title("{}-{}-{}-dim: {}".format(method, group_id, data_variation, dim))
					elif plot_option == "outlier":
						colors = plt.get_cmap("coolwarm")(np.linspace(0, 1, len(unique_labels)))
						axs[i, j].scatter(x, y, z, s=100, alpha=0.5, c=[colors[label]], marker=".", label=["inliers", "outliers"][label]) #, edgecolors="black", linewidth=0.5)
						axs[i, j].set_title("{}-{}-{}-dim: {}".format(method, group_id, data_variation, dim))
				i += 1

Plot interactive embeddings

In [None]:
# %matplotlib widget
# %matplotlib notebook
%matplotlib inline

from IPython import display
from IPython.display import HTML
from matplotlib import animation

df_nodes = pd.read_csv("experiments/output/{}/preprocessing/graphs_data/nodes_data_{}_{}.csv".format(exp, group_id, data_variation), index_col=0, usecols=[0, 1])
df_nodes["subgroup"] = df_nodes["id"].apply(lambda x: ord(x[0]) - 65)

fig = plt.figure(figsize=(8, 8))
ax = fig.add_subplot(projection="3d")
""" plt.axis('off')
plt.tick_params(left=False,
				bottom=False,
				labelleft=False,
				labelbottom=False) """
def animate(i):
	df_node_embeddings = pd.DataFrame(embeddings[i])
	df_node_embeddings["subgroup"] = df_nodes["subgroup"]
	
	x = df_node_embeddings.iloc[:, 0]
	y = df_node_embeddings.iloc[:, 1]
	z = df_node_embeddings.iloc[:, 2]
	labels = df_node_embeddings.iloc[:, 3]
	
	ax.clear()
	ax.scatter(x, y, z, c=labels, s=100)
	plt.title(f'Epoch: {i + 1} | Loss: {losses[i]:.2f}', fontsize=12, pad=10)

ani = matplotlib.animation.FuncAnimation(fig, animate, frames=len(embeddings), interval=1000)
html = HTML(ani.to_jshtml())
display.display(html)

In [None]:
iteration = 1 # change
print(exp, method, group_id, data_variation, iteration)
df_edge_embeddings_concat = pd.read_csv("experiments/output/{}/edge_embeddings/edge-embeddings_concat_outlier_{}_{}_{}_{}.csv".format(exp, method, group_id, data_variation, iteration))

plot_embedding_3d(df_edge_embeddings_concat.iloc[:, 2:-2], df_edge_embeddings_concat.iloc[:, -1], reduction="pca", embedding="edge", title="{}-{}".format(method, group_id))

In [None]:
df_edge_embeddings_concat

In [None]:
import plotly.graph_objects as go

fig = go.Figure(data=[go.Scatter3d(
	x=df_edge_embeddings_concat.iloc[:, 2],
	y=df_edge_embeddings_concat.iloc[:, 3],
	z=df_edge_embeddings_concat.iloc[:, 4],
	mode="markers",
	marker=dict(
		size=6,
		color=df_edge_embeddings_concat.iloc[:, 6], # set color to an array/list of desired values
		colorscale="Portland",   # choose a colorscale
		opacity=0.8
	)
)])

# tight layout
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0))
fig.show()

### Comparison on synthetic raw dataset

In [None]:
# diff sigmas

sigma_p1 = np.load("synthetic_dataset/sigma1_v5.npy") # load
sigma_p2 = np.load("synthetic_dataset/sigma2_v5.npy") # load
diff = sigma_p1 - sigma_p2

np.fill_diagonal(sigma_p1, 0)
np.fill_diagonal(sigma_p2, 0)

plt.imshow(sigma_p1, cmap="coolwarm", interpolation="none")
plt.colorbar()
plt.show()

plt.imshow(sigma_p2, cmap="coolwarm", interpolation="none")
plt.colorbar()
plt.show()

plt.imshow(diff, cmap="coolwarm", interpolation="none")
plt.colorbar()
plt.show()

In [None]:
sigma_p1.shape

In [None]:
raw, col = sigma_p1.shape
for i in range(raw):
	for j in range(i, col):
		if abs(sigma_p1[i][j]) < 0.5:
			sigma_p1[i][j] = 0
			sigma_p1[j][i] = 0
			
plt.imshow(sigma_p1, cmap="coolwarm", interpolation="none")
plt.colorbar()
plt.show()

In [None]:
raw, col = sigma_p2.shape
for i in range(raw):
	for j in range(i, col):
		if abs(sigma_p2[i][j]) < 0.5:
			sigma_p2[i][j] = 0
			sigma_p2[j][i] = 0
			
plt.imshow(sigma_p2, cmap="coolwarm", interpolation="none")
plt.colorbar()
plt.show()

In [None]:
# get significant edges 
list_edges_sigma = []
row, col = diff.shape
th = 0.5

for i in range(row):
	for j in range(i, col):
		if True: # (sigma_p1[i, j] * sigma_p2[i, j] < 0): # or (abs(sigma_p1[i, j] - sigma_p2[i, j]) >= th):
			list_edges_sigma.append([i, j,]) #, "*"])

print(len(list_edges_sigma))
list_edges_sigma

In [None]:
aux = np.array(list_edges_sigma)
unique_nodes = np.unique(aux.flatten()).tolist()
unique_nodes

In [None]:
exp = "exp571"

df_join_raw = pd.read_csv("experiments/input/{}_raw.csv".format(exp), index_col=0)        
# df_join_raw.columns = ["mz", "name"]
df_join_raw


In [None]:
s1 = df_join_raw.iloc[unique_nodes,:]
print(s1.shape)
s1


---

In [None]:
# save

# save
s1.to_csv("experiments/raw_data/synthetic_greedy_all_format.csv", index=True, sep="|")

In [None]:
# print(exp)
exp = "exp601"
file = open("experiments/output/{}/parameters.json".format(exp))
params = json.load(file)

exp = params["exp"]
# print("Exp:\t\t", exp)

methods = params["methods"]
print("Methods:\t", methods)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

control = params["control"]
print("Control:\t", control)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

groups = params["groups"]
print("Groups:\t\t", groups)

In [None]:
df_join_raw = pd.read_csv("experiments/input/{}_raw.csv".format(exp), index_col=0, usecols=[0, 1, 2])        
# df_join_raw.columns = ["mz", "name"]
df_join_raw

In [None]:
def sort_edges_(edges):
	edges_ = []
	for k in range(len(edges)):
		if edges[k][0] > edges[k][1]:
			edges_.append((edges[k][1], edges[k][0]))
		else:
			edges_.append((edges[k][0], edges[k][1]))
	return edges_

In [None]:
data = []
for method in methods:
	for group in groups:
		for data_variation in data_variations:
			""" print(method)
			print(data_variation)
			print(group) """
			
			df_change = pd.read_csv("experiments/output/{}/changes/changes_edges_log2_{}_{}_{}_{}.csv".format(exp, method, group[0], group[1], data_variation))
			# G = nx.from_pandas_edgelist(df_change_filter, edge_attr=["label"], create_using=nx.DiGraph())
			df_change
			
			df_change_filter = df_change[df_change["significant"] == "*"]
			df_change_filter
			
			list_edges_gnn = df_change_filter.iloc[:, [0, 1]].values.tolist()
			# print(len(list_edges_gnn))
			list_edges_gnn
			
			# print(len(list_edges_sigma))
			list_edges_sigma
			
			s1 = set(sort_edges_(list_edges_sigma))
			s2 = set(sort_edges_(list_edges_gnn))
			
			intersection = s1 & s2
			print(len(list_edges_sigma), len(list_edges_gnn), len(intersection))
			ratio = len(intersection) / len(list_edges_sigma)
			ratio
			
			data.append([method, group, data_variation, ratio])
df = pd.DataFrame(data, columns=["Method", "Group", "Data var.", "Ratio"])
df

---

** 

In [None]:
# print(exp)
exp = "exp800"
file = open("experiments/output/{}/parameters.json".format(exp))
params = json.load(file)

exp = params["exp"]
# print("Exp:\t\t", exp)

methods = params["methods"]
print("Methods:\t", methods)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

control = params["control"]
print("Control:\t", control)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

groups = params["groups"]
print("Groups:\t\t", groups)

In [None]:
df_change_greedy = pd.read_csv("experiments/output/{}/changes/changes_edges_log2_{}_{}_{}_{}.csv".format(exp, "greedy", "AA", "BB", "none"))
df_change_greedy = df_change_greedy[df_change_greedy["significant"] == "*"]
df_change_greedy

In [None]:
G0 = nx.from_pandas_edgelist(df_change_greedy, "source", "target", edge_attr="weight1")

edges_greedy = sort_edges(G0.edges())
edges_greedy[:5]

In [None]:
data = []
for method in methods:
	for group in groups:
		for data_variation in data_variations:
			""" print(method)
			print(data_variation)
			print(group) """
			
			df_change = pd.read_csv("experiments/output/{}/changes/changes_edges_log2_{}_{}_{}_{}.csv".format(exp, method, group[0], group[1], data_variation))
			# G = nx.from_pandas_edgelist(df_change_filter, edge_attr=["label"], create_using=nx.DiGraph())
			
			df_change_filter = df_change[df_change["significant"] == "*"]
			df_change_filter
			
			G1 = nx.from_pandas_edgelist(df_change_filter, "source", "target", edge_attr="weight1")
			
			edges = sort_edges(G1.edges())

			s1 = set(sort_edges(G0.edges()))
			s2 = set(sort_edges(G1.edges()))
			
			intersection = s1 & s2
			""" print(len(list_edges_sigma), len(list_edges_gnn), len(intersection))
			ratio = len(intersection) / len(list_edges_sigma)
			ratio """
			
			data.append([method, group, data_variation, len(edges_greedy), len(edges), len(intersection)])
df = pd.DataFrame(data, columns=["Method", "Group", "Data var.", "* greedy", "* GNN", "common"])
df

In [None]:
import matplotlib.pyplot as plt
from matplotlib_venn import venn2

for i in range(len(df)):
	# print(df.iloc[i,:])
	a = df.iloc[i,:]["* greedy"]
	b = df.iloc[i,:]["* GNN"]
	c = df.iloc[i,:]["common"]
	
	method = df.iloc[i,:]["Method"] + "-" + df.iloc[i,:]["Data var."]
	print(method)
	
	venn2((a - c, b - c, c), set_labels=("Greedy", method))
	plt.show()

---

In [None]:
import matplotlib.pyplot as plt
from matplotlib_venn import venn2

exp = "exp571"

# details
list_graph_greedy = []
list_graph_gnn = []

for group in groups_id:
	df_common_edges = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}.csv".format(exp, "greedy", group, "none"))
	G = nx.from_pandas_edgelist(df_common_edges, edge_attr=["weight"])
	list_graph_greedy.append(G)

for k, group in enumerate(groups_id):
	for method in methods:
		for data_variation in data_variations:
			df_edges_filter_weight_filter = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}.csv".format(exp, method, group, data_variation))
			G = nx.from_pandas_edgelist(df_edges_filter_weight_filter, "source", "target", edge_attr="weight")
			
			print(method, group, data_variation)
			nodes1 = set(list(list_graph_greedy[k].nodes()))
			nodes2 = set(list(G.nodes()))
			
			edges1 = set(sort_edges(list_graph_greedy[k].edges()))
			edges2 = set(sort_edges(G.edges()))
			
			fig, axes = plt.subplots(1, 2, figsize=(10, 5))
			# Plot the Venn diagram for the first column
			venn2(subsets=[nodes1, nodes2],
				set_labels=("Greedy", method),
				ax=axes[0])
			axes[0].set_title("Nodes")

			venn2(subsets=[edges1, edges2],
				set_labels=("Greedy", method),
				ax=axes[1])
			axes[1].set_title("Edges")

			plt.show()

### Verify synthetic raw data

In [None]:
k = 1001
for br in [2, 3, 4, 5]:
	for ar in [2, 3, 4, 5]:
		df = pd.read_csv("experiments/raw_data/synthetic_br{}_ar{}_v7_format.csv".format(br, ar), sep="|")
		print(k, br, ar)
		print(df.dtypes)
		print(df.describe())
		print()
		k += 1

In [None]:
torch.tensor(1250126, dtype=torch.float)

In [None]:
nodes_data = pd.read_csv("experiments/output/{}/preprocessing/graphs_data/nodes_data_{}_{}.csv".format("exp1009", "AA", subgroup)).iloc[:, 2:]

### Comparison (num edges) difference number of Br and Ar

In [None]:
# on sysnthetic dataset

# print(exp)
exp = "exp1233"
file = open("experiments/output/{}/parameters.json".format(exp))
params = json.load(file)

exp = params["exp"]
# print("Exp:\t\t", exp)

methods = params["methods"]
print("Methods:\t", methods)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

control = params["control"]
print("Control:\t", control)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

groups = params["groups"]
print("Groups:\t\t", groups)

By number of edges

In [None]:
# by number of edges

list_br = [2, 3, 4, 5]
list_ar = [2, 3, 4, 5]
e = 1233 # change

df_details = pd.DataFrame()

for br in list_br:
	for ar in list_ar:
		exp = "exp" + str(e)
		print(exp)
		
		list_details = []
		list_graph_greedy = []
  
		for group in groups_id:
			df_common_edges = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}.csv".format(exp, "greedy", group, "none"))
			G = nx.from_pandas_edgelist(df_common_edges, edge_attr=["weight"])
			# edges_greedy = sort_edges(G0.edges())
			list_graph_greedy.append(G)
			
		for method in methods:
			for k, group in enumerate(groups_id):
				for data_variation in data_variations:
					df_edges_filter_weight_filter = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}.csv".format(exp, method, group, data_variation))

					G0 = list_graph_greedy[k]
					G1 = nx.from_pandas_edgelist(df_edges_filter_weight_filter, "source", "target", edge_attr="weight")
					
					s1 = set(sort_edges(G0.edges()))
					s2 = set(sort_edges(G1.edges()))
					
					intersection = s1 & s2
					
					list_details.append([method, group, data_variation, br, ar, G0.number_of_edges(), G1.number_of_edges(), len(intersection)])
		e += 1
		df_aux = pd.DataFrame(list_details, columns=["Method", "Group", "Data var.", "Br", "Ar", "Num. edges (Greedy)", "Num. edges (GNN)", "Num. common edges"])
		df_aux["common (%)"] = np.round((df_aux["Num. edges (GNN)"] / df_aux["Num. edges (Greedy)"]) * 100, 0)
		# print(df_aux)

		df_details = pd.concat([df_details, df_aux], ignore_index=True)
		df_details

In [None]:
df_details.to_csv("temp/z_comparison_br_ar_num_edges_random.csv") # change
df_details

df_details = pd.read_csv("temp/z_comparison_br_ar_num_edges_random.csv", index_col=0)
df_details

In [None]:
# Load and Plot

df_details1 = pd.read_csv("temp/z_comparison_br_ar_num_edges.csv", index_col=0)
df_details1

df_details2 = pd.read_csv("temp/z_comparison_br_ar_num_edges_laplacian.csv", index_col=0)
df_details2

df_details3 = pd.read_csv("temp/z_comparison_br_ar_num_edges_random.csv", index_col=0)
df_details3["Method"] = (["DGI"] * 6 + ["ARGVA"] * 6 + ["LVGAE"] * 6 + ["VGAE"] * 6) * 16
df_details3

columns = df_details3["Method"] + "-Br(" + df_details3["Br"].astype(str) + ")-Ar(" + df_details3["Ar"].astype(str) + ")"
print(columns)

columns_ = [columns[0]]
range_ = [0]
for k in range(6, len(columns.values), 6):
  # columns_.append(columns.values[k - 1])
  columns_.append(columns.values[k])

  # range_.append(k - 1)
  range_.append(k)
columns_.append(columns.values[-1])
range_.append(len(columns.values) - 1)

x = list(range(len(df_details3)))

plt.subplots(figsize=(16, 6))
y = df_details1["Num. common edges"].values
plt.plot(x, y, label="GNN", linestyle="solid", c="tab:blue")
# y = df_details1["Num. edges (Greedy)"].values
# plt.plot(x, y, label="Base-greedy", linestyle="dotted", c="tab:red")

y = df_details2["Num. common edges"].values
plt.plot(x, y, label="GNN + Laplacian", linestyle="solid", c="tab:orange")
# y = df_details2["Num. edges (Greedy)"].values
# plt.plot(x, y, label="Laplacian-greedy", linestyle="dotted", c="tab:orange")

y = df_details3["Num. common edges"].values
plt.plot(x, y, label="GNN + RandomWalk", linestyle="solid", c="tab:green")
y = df_details3["Num. edges (Greedy)"].values
plt.plot(x, y, label="Greedy", linestyle="dotted", c="tab:red")

plt.xticks(range_, labels=columns_, rotation=60, ha="right", rotation_mode="anchor")
plt.ylabel("Num. edges")
plt.legend(loc="best", frameon=False)
plt.grid(axis="y")
# plt.grid()
plt.savefig("experiments/plots/comparison_Br_Ar_positional_enconding.pdf", format="pdf", bbox_inches="tight")
plt.show()

By number of significant edges

In [None]:
# by number of edges
list_graph_greedy = []

list_br = [2, 3, 4, 5]
list_ar = [2, 3, 4, 5]
e = 1051

df_details = pd.DataFrame()
for br in list_br:
	for ar in list_ar:
		exp = "exp" + str(e)
		print(exp)
		
		df_change_greedy = pd.read_csv("experiments/output/{}/changes/changes_edges_log22_{}_{}_{}_{}.csv".format(exp, "greedy", "AA", "BB", "none"))
		df_change_greedy = df_change_greedy[df_change_greedy["significant"] == "*"]
		df_change_greedy
		G0 = nx.from_pandas_edgelist(df_change_greedy, "source", "target", edge_attr="weight1")
		
		list_details = []

		for method in methods:
			for k, group in enumerate(groups):
				for data_variation in data_variations:
					try:
						df_change = pd.read_csv("experiments/output/{}/changes/changes_edges_log22_{}_{}_{}_{}.csv".format(exp, method, group[0], group[1], data_variation))
						# G = nx.from_pandas_edgelist(df_change_filter, edge_attr=["label"], create_using=nx.DiGraph())
					
						df_change_filter = df_change[df_change["significant"] == "*"]
						df_change_filter
						G1 = nx.from_pandas_edgelist(df_change_filter, "source", "target", edge_attr="weight1")
						
						s1 = set(sort_edges(G0.edges()))
						s2 = set(sort_edges(G1.edges()))

						intersection = s1 & s2
						
						list_details.append([method, group, data_variation, br, ar, G0.number_of_edges(), G1.number_of_edges(), len(intersection)])
					except:
						list_details.append([method, group, data_variation, br, ar, G0.number_of_edges(), 0, 0])
		e += 1
		df_aux = pd.DataFrame(list_details, columns=["Method", "Group", "Data var.", "Br", "Ar", "Num. edges (Greedy)", "Num. edges (GNN)", "Num. common edges"])
		df_aux["common (%)"] = np.round((df_aux["Num. edges (GNN)"] / df_aux["Num. edges (Greedy)"]) * 100, 0)
		# print(df_aux)

		df_details = pd.concat([df_details, df_aux], ignore_index=True)
		df_details

In [None]:
df_details.to_csv("z_comparison_br_ar_num_edges_significant.csv")
df_details

### Comparision positional encoding

#### Heatmap (common edges)

In [None]:
dataset = "leaf" # change

list_df = []

df_edges_count_avg1 = pd.read_csv("experiments/run_details/edges_avg_{}.csv".format(dataset), index_col=0)
list_df.append(df_edges_count_avg1)

df_edges_count_avg2 = pd.read_csv("experiments/run_details/edges_avg_{}_laplacian.csv".format(dataset), index_col=0)
list_df.append(df_edges_count_avg2)

df_edges_count_avg3 = pd.read_csv("experiments/run_details/edges_avg_{}_random.csv".format(dataset), index_col=0)
list_df.append(df_edges_count_avg3)

df_edges_count_avg3


Load

In [None]:
# mutant

a = list_df[0].iloc[0::3, :].round(2).values # pck1
b = list_df[0].iloc[1::3, :].round(2).values # zwf1
c = list_df[0].iloc[2::3, :].round(2).values # WT

for k in range(1, len(list_df)):
  a_ = list_df[k].iloc[0::3, :].round(2).values # pck1
  b_ = list_df[k].iloc[1::3, :].round(2).values # zwf1
  c_ = list_df[k].iloc[2::3, :].round(2).values # WT
  
  a = np.concatenate((a, a_), axis=1)
  b = np.concatenate((b, b_), axis=1)
  c = np.concatenate((c, c_), axis=1)

In [None]:
# leaf, synthetic

a = list_df[0].iloc[0::2, :].round(2).values # new, phetotype1
b = list_df[0].iloc[1::2, :].round(2).values # old phenotype2

for k in range(1, len(list_df)):
  a_ = list_df[k].iloc[0::2, :].round(2).values # 
  b_ = list_df[k].iloc[1::2, :].round(2).values # 
  
  a = np.concatenate((a, a_), axis=1)
  b = np.concatenate((b, b_), axis=1)


Format

In [None]:
# mutant, cancer, leaf, synthetic
columns = ["none-2", "str-2", "dyn-2", "none-3", "str-3", "dyn-3", "none-4", "str-4", "dyn-4", "none-8", "str-8", "dyn-8", "none-16", "str-16", "dyn-16", "none-32",
			"str-32", "dyn-32", "none-64", "str-64", "dyn-64", "none-128", "str-128", "dyn-128", "none-256", "str-256", "dyn-256", "none-512", "str-512", "dyn-512"] * 3

row1 = ["DGI", "ARGVA", "LVGAE", "VGAE"] #, "Greedy"]
row2 = ["DGI", "ARGVA", "LVGAE", "VGAE"] #, "Greedy"]
row3 = ["DGI", "ARGVA", "LVGAE", "VGAE"] #, "Greedy"]

Replace

In [None]:
# Replace (mutant)

""" a[3][4] = a.min()
a[3][5] = a.min()

b[3][61] = b.min()
b[3][62] = b.min()

c[3][61] = c.min()
c[3][62] = c.min() """

In [None]:
# Replace (leaf)

""" a[3][4] = a.min()
a[3][5] = a.min()

b[3][61] = b.min()
b[3][62] = b.min() """

In [None]:
# Replace (synthetic)

""" a[3][4] = a.min()
a[3][5] = a.min()

b[3][61] = b.min()
b[3][62] = b.min() """

Plot common edges (heatmap)

In [None]:
# mutant and cancer (Heatmap)
import matplotlib.pyplot as plt
import numpy as np

name = "mutant_global" # change

# Create a figure and subplots
fig, axes = plt.subplots(3, 1, figsize=(30, 4.5))
# Adjust layout
# plt.tight_layout()

# Plot the first image
ax0 = axes[2].imshow(a, cmap="Blues", interpolation="none", vmin=round(a.min()), vmax=round(a.max()))
axes[2].set_xticks(np.arange(len(columns)), labels=columns)
axes[2].set_yticks(np.arange(len(row1)), labels=row1)
axes[2].set_title("pck1")
plt.setp(axes[2].get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
# axes[0].axis('off')
cbar = fig.colorbar(ax0, ax=axes[2], ticks=[round(a.min()), round((a.min() + a.max()) / 2), round(a.max())], pad=0.005)
cbar.set_label("Num. edges")

# Plot the second image
ax1 = axes[1].imshow(b, cmap="Blues", interpolation="none", vmin=round(b.min()), vmax=round(b.max()))
axes[1].set_xticks(np.arange(len(columns)), labels="")
axes[1].set_yticks(np.arange(len(row2)), labels=row2)
axes[1].set_title("zwf1")
# axes[1].axis('off')
cbar = fig.colorbar(ax1, ax=axes[1], ticks=[round(b.min()), round((b.min() + b.max()) / 2), round(b.max())], pad=0.005)
cbar.set_label("Num. edges")

# Plot the third image
ax2 = axes[0].imshow(c, cmap="Blues", interpolation="none", vmin=round(c.min()), vmax=round(c.max()))
axes[0].set_xticks(np.arange(len(columns)), labels="")
axes[0].set_yticks(np.arange(len(row3)), labels=row3)
axes[0].set_title("WT")
# axes[2].axis('off')
cbar = fig.colorbar(ax2, ax=axes[0], ticks=[round(c.min()), round((c.min() + c.max()) / 2), round(c.max())], pad=0.005)
cbar.set_label("Num. edges")

# Create a single colorbar for all subplots
# cbar = fig.colorbar(ax2, ax=axes.ravel().tolist(), fraction=0.02, pad=0.02)
# cbar.set_label("Num. of common edges")

""" cax = plt.axes((0.888, 0.11, 0.02, 0.77)) # change mutant
# cax = plt.axes((0.735, 0.11, 0.02, 0.77)) # change cancer
cbar = fig.colorbar(ax1, cax=cax)
cbar.set_label("Common edges") """

# Show the plot
plt.savefig("experiments/plots/common_edges_{}.pdf".format(name), format="pdf", bbox_inches="tight")
plt.show()

In [None]:
# leaf, sythetic (Heatmap)
import matplotlib.pyplot as plt
import numpy as np

name = "leaf_global" # change

# Create a figure and subplots
fig, axes = plt.subplots(2, 1, figsize=(30, 3))
# Adjust layout
# plt.tight_layout()

# Plot the first image
ax0 = axes[0].imshow(a, cmap="Blues", interpolation="none", vmin=round(a.min()), vmax=round(a.max()))
axes[0].set_xticks(np.arange(len(columns)), labels="")
axes[0].set_yticks(np.arange(len(row1)), labels=row1)
axes[0].set_title("new") # phenotype1, new
# axes[0].axis('off')
cbar = fig.colorbar(ax0, ax=axes[0], ticks=[round(a.min()), round((a.min() + a.max()) / 2), round(a.max())], pad=0.005)
cbar.set_label("Num. edges")

# Plot the third image
ax1 = axes[1].imshow(b, cmap="Blues", interpolation="none", vmin=round(b.min()), vmax=round(b.max()))
axes[1].set_xticks(np.arange(len(columns)), labels=columns)
axes[1].set_yticks(np.arange(len(row2)), labels=row2)
plt.setp(axes[1].get_xticklabels(), rotation=60, ha="right", rotation_mode="anchor")
axes[1].set_title("old") # phenotype2, old
# axes[1].axis('off')
cbar = fig.colorbar(ax1, ax=axes[1], ticks=[round(b.min()), round((b.min() + b.max()) / 2), round(b.max())], pad=0.005)
cbar.set_label("Num. edges")

# Create a single colorbar for all subplots
# cbar = fig.colorbar(ax1, ax=axes.ravel().tolist(), fraction=0.02, pad=0.02, shrink=1)
# cbar.set_label("Num. of common edges")

""" cax = plt.axes((0.873, 0.11, 0.02, 0.77))
cbar = fig.colorbar(ax1, cax=cax)
cbar.set_label("Common edges") """

# Show the plot
plt.savefig("experiments/plots/common_edges_{}.pdf".format(name), format="pdf", bbox_inches="tight")
plt.show()

Plot common edges (Line)

In [None]:
dataset = "synthetic" # change

df_edges_count_avg1 = pd.read_csv("experiments/run_details/edges_avg_{}.csv".format(dataset), index_col=0)
df_edges_count_avg1

df_edges_count_avg2 = pd.read_csv("experiments/run_details/edges_avg_{}_laplacian.csv".format(dataset), index_col=0)
df_edges_count_avg2

df_edges_count_avg3 = pd.read_csv("experiments/run_details/edges_avg_{}_random.csv".format(dataset), index_col=0)
df_edges_count_avg3.head()

In [None]:
columns = list(df_edges_count_avg1.columns)
columns

indexes = list(df_edges_count_avg1.index)
indexes

for k in range(len(df_edges_count_avg1)):
  print(indexes[k])
  x = list(range(len(columns)))
  
  plt.subplots(figsize=(12, 5))
  y = df_edges_count_avg1.iloc[k].values
  plt.plot(x, y, label="GNN", marker=".")
  # plt.bar(x, y)
  
  y = df_edges_count_avg2.iloc[k].values
  plt.plot(x, y, label="GNN + Laplacian", marker=".")
  # plt.bar(x, y)
  
  y = df_edges_count_avg3.iloc[k].values
  plt.plot(x, y, label="GNN + RandomWalk", marker=".")
  # plt.bar(x, y)

  plt.xticks(np.arange(len(columns)), labels=columns, rotation=60, ha="right", rotation_mode="anchor")
  plt.xlabel("Variation + Dimension")
  plt.ylabel("# edges")
  plt.legend()
  
  # plt.savefig("experiments/plots/common_edges_{}.pdf".format(indexes[k]), format="pdf", bbox_inches="tight")
  plt.show()

#### Plot runtimes

Load

In [None]:
dataset = "synthetic" # change

list_df = []

df_edges_count_avg1 = pd.read_csv("experiments/run_details/runtimes_{}.csv".format(dataset), index_col=0)
list_df.append(df_edges_count_avg1)

df_edges_count_avg2 = pd.read_csv("experiments/run_details/runtimes_{}_laplacian.csv".format(dataset), index_col=0)
list_df.append(df_edges_count_avg2)

df_edges_count_avg3 = pd.read_csv("experiments/run_details/runtimes_{}_random.csv".format(dataset), index_col=0)
list_df.append(df_edges_count_avg3)

df_edges_count_avg3

In [None]:
# node embeddings

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
x = ["2", "3", "4", "8", "16", "32", "64", "128", "256", "512"] # change ["3", "4", "8", "16", "32", "64", "128"]
titles = ["GNN", "GNN + Laplacian", "GNN + RandomWalk"]

name = "synthetic_global" # change

fig, axs = plt.subplots(1, 3, figsize=(15, 4), sharey=True)

for d, df in enumerate(list_df):
  df_runtimes = df.copy()
  df_runtimes["Method"] = ["DGI"] * 3 + ["ARGVA"] * 3 + ["LVGAE"] * 3 + ["VGAE"] * 3
  df_runtimes
  
  z = df_runtimes.iloc[:, 2:].values
  
  k = 0
  end = 10 # change 5:32, 7:128, 10: 512

  for i in range(0, len(df_runtimes), 3):
    axs[d].plot(x, z[i, :end], label="{}-{}".format(df_runtimes.iloc[i, 0], df_runtimes.iloc[i, 1]), linestyle="-", color=colors[k], linewidth=1)
    axs[d].plot(x, z[i + 1, :end], label="{}-{}".format(df_runtimes.iloc[i + 1, 0], df_runtimes.iloc[i + 1, 1]), linestyle="--", color=colors[k], linewidth=1)
    axs[d].plot(x, z[i + 2, :end], label="{}-{}".format(df_runtimes.iloc[i + 2, 0], df_runtimes.iloc[i + 2, 1]), linestyle=":", color=colors[k], linewidth=1)
    k += 1
  axs[d].set_title(titles[d])
  axs[d].grid(axis="y")

fig.tight_layout()
axs[0].set_ylabel("Runtimes (sec.)")
axs[1].set_xlabel("Dimensions")
axs[1].legend(ncols=4, fontsize=8, loc="upper center", frameon=False)

plt.savefig("experiments/plots/runtimes_node_embeddings_{}.pdf".format(name), format="pdf", bbox_inches="tight")
plt.show()
  

In [None]:
# edge embeddings

colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
x = ["2", "3", "4", "8", "16", "32", "64", "128", "256", "512"] # change ["3", "4", "8", "16", "32", "64", "128"]
titles = ["GNN", "GNN + Laplacian", "GNN + RandomWalk"]

fig, axs = plt.subplots(1, 3, figsize=(15, 4), sharey=True)

for d, df in enumerate(list_df):
  df_runtimes = df.copy()
  df_runtimes["Method"] = ["DGI"] * 3 + ["ARGVA"] * 3 + ["LVGAE"] * 3 + ["VGAE"] * 3
  df_runtimes
  
  z = df_runtimes.iloc[:, 2:].values
  
  k = 0
  end = 10 # change 5:32, 7:128, 10: 512

  for i in range(0, len(df_runtimes), 3):
    axs[d].plot(x, z[i, end:], label="{}-{}".format(df_runtimes.iloc[i, 0], df_runtimes.iloc[i, 1]), linestyle="-", color=colors[k], linewidth=1)
    axs[d].plot(x, z[i + 1, end:], label="{}-{}".format(df_runtimes.iloc[i + 1, 0], df_runtimes.iloc[i + 1, 1]), linestyle="--", color=colors[k], linewidth=1)
    axs[d].plot(x, z[i + 2, end:], label="{}-{}".format(df_runtimes.iloc[i + 2, 0], df_runtimes.iloc[i + 2, 1]), linestyle=":", color=colors[k], linewidth=1)
    k += 1
  axs[d].set_title(titles[d])
  axs[d].grid(axis="y")
    
fig.tight_layout()
axs[0].set_ylabel("Runtimes (sec.)")
axs[1].set_xlabel("Dimensions")
axs[1].legend(ncols=4, fontsize=8, loc="upper center", frameon=False)

plt.savefig("experiments/plots/runtimes_edge_embeddings_{}.pdf".format(name), format="pdf", bbox_inches="tight") # change
plt.show()
  

#### Plot runtimes 1-1

Load

In [None]:
dataset = "mutant" # change

df_edges_count_avg1 = pd.read_csv("experiments/run_details/runtimes_{}.csv".format(dataset), index_col=0)
df_edges_count_avg1

df_edges_count_avg2 = pd.read_csv("experiments/run_details/runtimes_{}_laplacian.csv".format(dataset), index_col=0)
df_edges_count_avg2

df_edges_count_avg3 = pd.read_csv("experiments/run_details/runtimes_{}_random.csv".format(dataset), index_col=0)
df_edges_count_avg3.head()

In [None]:
columns = list(df_edges_count_avg3.iloc[:, 2:].columns)
columns

indexes = list(df_edges_count_avg3["Method"].values)
indexes

for k in range(len(df_edges_count_avg1.iloc[:, 2:])):
  print(indexes[k])
  x = list(range(len(columns)))[:10]
  
  fig, axs = plt.subplots(1, 2, figsize=(16, 4))
  # plt.subplots(figsize=(12, 5))
  y = df_edges_count_avg1.iloc[k, 2:12].values
  axs[0].plot(x, y, label="GNN", marker=".")
  
  y = df_edges_count_avg2.iloc[k, 2:12].values
  axs[0].plot(x, y, label="GNN + Laplacian", marker=".")
  
  y = df_edges_count_avg3.iloc[k, 2:12].values
  axs[0].plot(x, y, label="GNN + RandomWalk", marker=".")
  
  axs[0].set_xticks(np.arange(len(columns) / 2), labels=columns[:10], rotation=60, ha="right", rotation_mode="anchor")
  
  # x = list(range(len(columns)))[10:]
  
  y = df_edges_count_avg1.iloc[k, 12:].values
  axs[1].plot(x, y, label="GNN", marker=".")
  # plt.bar(x, y)
    
  y = df_edges_count_avg2.iloc[k, 12:].values
  axs[1].plot(x, y, label="GNN + Laplacian", marker=".")
  # plt.bar(x, y)
  
  y = df_edges_count_avg3.iloc[k, 12:].values
  axs[1].plot(x, y, label="GNN + RandomWalk", marker=".")
  # plt.bar(x, y)

  axs[1].set_xticks(np.arange(len(columns) / 2), labels=columns[10:], rotation=60, ha="right", rotation_mode="anchor")
  # plt.xlabel("Dimensions")
  # plt.ylabel("Runtimes (sec.)")
  plt.legend()
  # plt.savefig("experiments/plots/common_edges_{}.pdf".format(indexes[k]), format="pdf", bbox_inches="tight")
  plt.show()

### Over-smoothing measurement

In [None]:
exp = "exp1251"
file = open("experiments/output/{}/parameters.json".format(exp))
params = json.load(file)

exp = params["exp"]

print("Exp:\t\t", exp)

methods = params["methods"]
print("Methods:\t", methods)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

control = params["control"]
print("Control:\t", control)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

groups = params["groups"]
print("Groups:\t\t", groups)

In [None]:
def plot_embedding_3d(df_embeddings, labels, reduction="pca", embedding="node", title="", title_legend="", save=False):
	# print(df_embeddings)
	# print(labels)
	if df_embeddings.shape[1] > 3:
		if reduction == "pca":
			df_embeddings_red = PCA(n_components=3).fit_transform(df_embeddings)
		elif reduction == "tsne":
			df_embeddings_red = TSNE(n_components=3).fit_transform(df_embeddings)
		elif reduction == "umap":
			df_embeddings_red = umap.UMAP().fit_transform(df_embeddings)
	else:
		df_embeddings_red = df_embeddings.values
	
	fig = plt.figure(figsize=(6, 6))
	ax = fig.add_subplot(projection="3d")
	
	df_embeddings = pd.DataFrame(df_embeddings_red)
	df_embeddings["labels"] = labels.values
	
	unique_labels = np.unique(labels)
	for i, label in enumerate(unique_labels):
		df_embeddings_filter = df_embeddings[df_embeddings["labels"] == label]

		x = df_embeddings_filter.iloc[:, 0]
		y = df_embeddings_filter.iloc[:, 1]
		z = df_embeddings_filter.iloc[:, 2]
		
		# new_cmap = matplotlib.colors.ListedColormap(plt.cm.tab10.colors[i: i + 1])
		if embedding == "node":
			color = matplotlib.colors.ListedColormap(plt.cm.Dark2.colors[i: i + 1]).colors[0]
			# points = ax.scatter(x, y, z, s=100, c=labels, alpha=0.5, cmap=new_cmap , marker=".") # , edgecolors="black", linewidth=0.5)
			points = ax.scatter(x, y, z, s=100, alpha=0.5, color=color, marker=".", label="Br{}".format(label + 1))
		elif embedding == "edge":
			color = matplotlib.colors.ListedColormap(plt.cm.Dark2.colors[i: i + 1]).colors[0]
			# points = ax.scatter(x, y, z, s=100, c=labels, alpha=0.5, cmap=new_cmap , marker=".") # , edgecolors="black", linewidth=0.5)
			points = ax.scatter(x, y, z, s=100, alpha=0.5, color=color, marker=".", label="Br{}".format(label + 1))
		elif embedding == "outlier":
			# cmap = matplotlib.colormaps.get_cmap("coolwarm", len(unique_labels))
			colors = plt.get_cmap("coolwarm")(np.linspace(0, 1, len(unique_labels)))
			points = ax.scatter(x, y, z, s=100, alpha=0.5, c=[colors[label]], marker=".", label=["inliers", "outliers"][label]) #, edgecolors="black", linewidth=0.5)
	
	# ax.set_xlabel("X")
	# ax.set_ylabel("Y")
	# ax.set_zlabel("Z")
	if not save:
		""" plt.title(title)
		plt.legend(title=title_legend, ncol=1, loc=0, bbox_to_anchor=(1, 0.95)) """
		# fig.colorbar(points, ax=ax, shrink=0.4, aspect=8) # bar
		# ax.legend(["Biological rep.: {}".format(subgroup) for subgroup in np.unique(labels)])
		# print(*points.legend_elements())
		# plt.legend(*points.legend_elements(), bbox_to_anchor=(1.05, 1), loc=2)
	if save:
		# plt.legend(title=title_legend, ncol=1, loc=0, bbox_to_anchor=(1, 0.95)) # ncol=len(unique_labels)
		plt.savefig("experiments/plots/{}.pdf".format(title), format="pdf", bbox_inches="tight") # change    
	plt.show()
	return ax

In [None]:
import torch
import torch.nn.functional as F
import numpy as np
from sklearn.metrics import pairwise_distances

#releated paper:(AAAI2020) Measuring and Relieving the Over-smoothing Problem for Graph Neural Networks from the Topological View.
#https://aaai.org/ojs/index.php/AAAI/article/view/5747

#the numpy version for mad (Be able to compute quickly)
#in_arr:[node_num * hidden_dim], the node feature matrix;
#mask_arr: [node_num * node_num], the mask matrix of the target raltion;
#target_idx = [1,2,3...n], the nodes idx for which we calculate the mad value;
def mad_value(in_arr, mask_arr, distance_metric='cosine', digt_num=4, target_idx =None):
    dist_arr = pairwise_distances(in_arr, in_arr, metric=distance_metric)
    
    mask_dist = np.multiply(dist_arr,mask_arr)

    divide_arr = (mask_dist != 0).sum(1) + 1e-8

    node_dist = mask_dist.sum(1) / divide_arr

    if target_idx.any()==None:
        mad = np.mean(node_dist)
    else:
        node_dist = np.multiply(node_dist,target_idx)
        mad = node_dist.sum()/((node_dist!=0).sum()+1e-8)

    mad = round(mad, digt_num)

    return mad

#the tensor version for mad_gap (Be able to transfer gradients)
#intensor: [node_num * hidden_dim], the node feature matrix;
#neb_mask,rmt_mask:[node_num * node_num], the mask matrices of the neighbor and remote raltion;
#target_idx = [1,2,3...n], the nodes idx for which we calculate the mad_gap value;
def mad_gap_regularizer(intensor,neb_mask,rmt_mask,target_idx):
    node_num,feat_num = intensor.size()

    input1 = intensor.expand(node_num,node_num,feat_num)
    input2 = input1.transpose(0,1)

    input1 = input1.contiguous().view(-1,feat_num)
    input2 = input2.contiguous().view(-1,feat_num)

    simi_tensor = F.cosine_similarity(input1,input2, dim=1, eps=1e-8).view(node_num,node_num)
    dist_tensor = 1 - simi_tensor

    neb_dist = torch.mul(dist_tensor,neb_mask)
    rmt_dist = torch.mul(dist_tensor,rmt_mask)
    
    divide_neb = (neb_dist!=0).sum(1).type(torch.FloatTensor).cuda() + 1e-8
    divide_rmt = (rmt_dist!=0).sum(1).type(torch.FloatTensor).cuda() + 1e-8

    neb_mean_list = neb_dist.sum(1) / divide_neb
    rmt_mean_list = rmt_dist.sum(1) / divide_rmt

    neb_mad = torch.mean(neb_mean_list[target_idx])
    rmt_mad = torch.mean(rmt_mean_list[target_idx])

    mad_gap = rmt_mad - neb_mad

    return mad_gap

In [None]:
iteration = 1 # change
save = True # change

for e in [1251]: # change, experiment
	exp = "exp" + str(e)
	for method in methods[:]: # change
		for group_id in groups_id[:]: # change
			for data_variation in data_variations: # change
				df_node_embeddings_concat = pd.DataFrame()
				if data_variation == "none":
					k = 0
					for subgroup_id in subgroups_id[group_id]:
						print(exp, method, group_id, subgroup_id, iteration)
						df_node_embeddings = pd.read_csv("experiments/output/{}/node_embeddings/node-embeddings_{}_{}_{}_{}.csv".format(exp, method, group_id, subgroup_id, iteration), index_col=0)
						
						df_node_embeddings["subgroup"] = [k] * len(df_node_embeddings)
						df_node_embeddings_concat = pd.concat([df_node_embeddings_concat, df_node_embeddings])
						k += 1
				else:
					print(exp, method, group_id, data_variation, iteration)
					df_nodes = pd.read_csv("experiments/output/{}/preprocessing/graphs_data/nodes_data_{}_{}.csv".format(exp, group_id, data_variation), index_col=0, usecols=[0, 1])
					df_nodes["subgroup"] = df_nodes["id"].apply(lambda x: ord(x[0]) - 65)

					df_node_embeddings = pd.read_csv("experiments/output/{}/node_embeddings/node-embeddings_{}_{}_{}_{}.csv".format(exp, method, group_id, data_variation, iteration), index_col=0)
					
					df_node_embeddings["subgroup"] = df_nodes["subgroup"]
					df_node_embeddings_concat = pd.concat([df_node_embeddings_concat, df_node_embeddings])

				# calculate MAD
				for g in np.unique(df_node_embeddings_concat.iloc[:, -1]):
					df_temp = df_node_embeddings_concat[df_node_embeddings_concat["subgroup"] == g].iloc[:, :-1]
					mad = mad_value(df_temp.values, np.full((len(df_temp), len(df_temp)), True), distance_metric='cosine', digt_num=4, target_idx=np.arange(len(df_temp)))
					print(g, mad)
				plot_embedding_3d(df_node_embeddings_concat.iloc[:, :-1], df_node_embeddings_concat.iloc[:, -1],
									reduction="pca", embedding="node", title="node-embeddings_{}-{}-{}-{}".format(exp, method, group_id, data_variation), title_legend="", save=save)

### Tracking of nodes/edges

In [None]:
import pandas as pd
import json

In [None]:
exp = "exp1006"
file = open("experiments/output/{}/parameters.json".format(exp))
params = json.load(file)

exp = params["exp"]
print("Exp:\t\t", exp)

methods = params["methods"]
print("Methods:\t", methods)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

controls = params["controls"]
print("Control:\t", controls)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

groups = params["groups"]
print("Groups:\t\t", groups)

In [None]:
""" df_nodes = pd.read_csv("experiments/output/{}/preprocessing/graphs_data/nodes_data_{}_{}.csv".format(exp, group_id, data_variations[0]))
dict_id = dict(zip(df_nodes["idx"], df_nodes["id"]))
print(dict_id)

df_nodes """

In [None]:
df_join_raw = pd.read_csv("experiments/input/{}_raw.csv".format(exp), index_col=0)
df_join_raw

In [None]:
# Find by Alignment ID

n1_id = 0
n2_id = 1

name1 = df_join_raw.loc[n1_id, "Metabolite name"]
name2 = df_join_raw.loc[n2_id, "Metabolite name"]
print("INPUT")
print(n1_id, name1)
print(n2_id, name2)
print()

# Correlations
print("CORRELATIONS")
for group_id in groups_id:
    for subgroup_id in subgroups_id[group_id]:
        df_corr = pd.read_csv("experiments/output/{}/correlations/correlations_{}_{}.csv".format(exp, group_id, subgroup_id), index_col=0)
        
        corr = df_corr.loc[n1_id, str(n2_id)]
        print(group_id, subgroup_id, corr)
print()
df_corr

# Edges list
print("EDGES LIST")
for group_id in groups_id:
    for subgroup_id in subgroups_id[group_id]:
        df_edges = pd.read_csv("experiments/output/{}/preprocessing/edges/edges_{}_{}.csv".format(exp, group_id, subgroup_id))
        
        df_temp = df_edges[
            ((df_edges["source"] == n1_id) & (df_edges["target"] == n2_id)) |
            ((df_edges["source"] == n2_id) & (df_edges["target"] == n1_id))
            ]
        df_temp
        print(group_id, subgroup_id, df_temp["weight"].values)
print()
df_edges

# Commond edges
# before new partial correlatios
f = "f1"
print("COMMON EDGES")
print("Before new partial correlations")
for group_id in groups_id:
    df_edges = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}_{}_1.csv".format(exp, f, methods[0], group_id, data_variations[0]))
    
    df_temp = df_edges[
        ((df_edges["source"] == n1_id) & (df_edges["target"] == n2_id)) |
        ((df_edges["source"] == n2_id) & (df_edges["target"] == n1_id))
        ]
    df_temp
    print(group_id, df_temp.values)
print()
df_edges

# with all new partial correlations
print("With all new partial correlations")
for group_id in groups_id:
    df_edges = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}_{}_all.csv".format(exp, f, methods[0], group_id, data_variations[0]))
    
    df_temp = df_edges[
        ((df_edges["source"] == n1_id) & (df_edges["target"] == n2_id)) |
        ((df_edges["source"] == n2_id) & (df_edges["target"] == n1_id))
        ]
    df_temp
    print(group_id, df_temp["weight"].values)
print()
df_edges

# with filter (th_corr) on new partial correlations
print("With filter (th_corr) on new partial correlations")
for group_id in groups_id:
    df_edges = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}_{}.csv".format(exp, f, methods[0], group_id, data_variations[0]))
    
    df_temp = df_edges[
        ((df_edges["source"] == n1_id) & (df_edges["target"] == n2_id)) |
        ((df_edges["source"] == n2_id) & (df_edges["target"] == n1_id))
        ]
    df_temp
    print(group_id, df_temp["weight"].values)
print()
# df_edges

### Get filter matrix

In [None]:
import pandas as pd

In [None]:
exp = "exp402"
file = open("experiments/output/{}/parameters.json".format(exp))
params = json.load(file)

exp = params["exp"]
print("Exp:\t\t", exp)

methods = params["methods"]
print("Methods:\t", methods)

data_variations = params["data_variations"]
print("Data variations:", data_variations)

controls = params["controls"]
print("Control:\t", controls)

groups_id = params["groups_id"]
print("Groups id:\t", groups_id)

subgroups_id = params["subgroups_id"]
print("Subgroups id:\t", subgroups_id)

groups = params["groups"]
print("Groups:\t\t", groups)

f = "f1"

In [None]:
# read raw data

df_join_raw = pd.read_csv("experiments/input/{}_raw.csv".format(exp), index_col=0)
df_join_raw

In [None]:
nodes_raw_data = set(list(df_join_raw.index))
print(len(nodes_raw_data), nodes_raw_data)
list_common = []

for group_id in groups_id:
    
    df_edges_filter_weight_filter = pd.read_csv("experiments/output/{}/common_edges/common_edges_{}_{}_{}_{}.csv".format(exp, f, method, group_id, data_variation))
    df_edges_filter_weight_filter
    
    nodes = set(np.unique(df_edges_filter_weight_filter.iloc[:, [0, 1]].values.flatten()))
    # list_common.append([nodes]) # common, n-common
    df_temp = pd.DataFrame(nodes, columns=[group_id])
    df_temp.to_csv("z_{}.csv".format(group_id), index=False)
print(list_common)

In [None]:
df_edges_filter_weight_filter