# Result Analysis

In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import plotly.graph_objects as go

In [2]:
# Plot line chart of Z-scores for each graph_name
def plot_line_chart(data, title, x_label, y_label):
    fig = go.Figure()
    for graph_name in data["graph_name"].unique():
        df = data[data["graph_name"] == graph_name]
        fig.add_trace(
            go.Scatter(
                x=df["motif"], y=df[y_label], mode="lines+markers", name=graph_name
            )
        )
    fig.update_layout(title=title, xaxis_title=x_label, yaxis_title=y_label)
    fig.show()

### Load Data

In [3]:
# Data path directory 
RESULTS_PATH = '../results/'
NETWORKS_PATH = '../data/'

In [4]:
# Load data
def load_data(file):
    data = pd.read_csv(RESULTS_PATH + file)
    return data

## Enzymes Networks

In [5]:
file_name = 'enzymes.csv'
enzymes_df = load_data(file_name)
# cast graph_name as string
enzymes_df["graph_name"] = enzymes_df["graph_name"].astype(str)

In [6]:
# Enzymes Information (Stats)
enzymes_info = pd.read_csv(RESULTS_PATH + 'enzymes_info.csv')

# Show the Information of the Enzymes
enzymes_info.head()

Unnamed: 0,graph,nodes,edges,max_degree,avg_degree
0,ENZYMES_g296,125,282,5.0,4.512
1,ENZYMES_g293,96,218,6.0,4.541667
2,ENZYMES_g123,90,254,9.0,5.644444


### Original Enzymes

In [7]:
# Plot only the original graphs
plot_line_chart(
    enzymes_df[~enzymes_df["graph_name"].str.contains("sample")],
    f"Z-scores for Enzymes Original Graphs from {file_name}",
    "motif",
    "significance_profile",
)

### Enzymes Samples for Each Original Graph

In [8]:
# Plot line chart of Z-scores for each sample size of the graph ENZYMES_g296
selected_graph = "ENZYMES_g296"
plot_line_chart(
    enzymes_df[enzymes_df["graph_name"].str.contains(selected_graph)],
    f"Z-scores for Enzymes Generated Samples of {selected_graph}",
    "motif",
    "significance_profile",
)

## Google+ Egonets

In [9]:
file_name = 'gplus.csv'
motif_gplus = load_data(file_name)
# cast graph_name as string
motif_gplus["graph_name"] = motif_gplus["graph_name"].astype(str)

### Google+ Original Egonets

In [10]:
# Gplus Information (Stats)
gplus_info = pd.read_csv(RESULTS_PATH + 'gplus_info.csv')

# Show the Information of the Gplus
gplus_info.head()

Unnamed: 0,graph,nodes,edges,max_degree,avg_degree
0,111213696402662884531,258,9214,137.0,71.426357
1,117798157258572080176,168,1411,70.5,16.797619
2,113455290791279442483,108,2884,70.0,53.407407
3,117503822947457399073,307,13800,195.5,89.90228
4,113597493946570654755,321,8558,116.0,53.320872


In [11]:
# Plot only the original graphs
plot_line_chart(
    motif_gplus[~motif_gplus["graph_name"].str.contains("sample")],
    f"Z-scores for Google Plus Original Graphs from {file_name}",
    "motif",
    "significance_profile",
)

### Google+ Egonets Samples for Each Original Graph

In [12]:
# Plot line chart of Z-scores for each sample size of the graph 117503822947457399073
selected_graph = "117503822947457399073"
plot_line_chart(
    motif_gplus[motif_gplus["graph_name"].str.contains(selected_graph)],
    f"Z-scores for Google Plus Generated Samples of {selected_graph}",
    "motif",
    "significance_profile",
)


In [13]:
# Plot line chart of Z-scores for each sample size of the graph 117798157258572080176
selected_graph = "117798157258572080176"
plot_line_chart(
    motif_gplus[motif_gplus["graph_name"].str.contains(selected_graph)],
    f"Z-scores for Google Plus Generated Samples of {selected_graph}",
    "motif",
    "significance_profile",
)

In [14]:
# Plot line chart of Z-scores for each sample size of the graph 113455290791279442483
selected_graph = "113455290791279442483"
plot_line_chart(
    motif_gplus[motif_gplus["graph_name"].str.contains(selected_graph)],
    f"Z-scores for Google Plus Generated Samples of {selected_graph}",
    "motif",
    "significance_profile",
)

In [15]:
# Plot line chart of Z-scores for each sample size of the graph 113597493946570654755
selected_graph = "113597493946570654755"
plot_line_chart(
    motif_gplus[motif_gplus["graph_name"].str.contains(selected_graph)],
    f"Z-scores for Google Plus Generated Samples of {selected_graph}",
    "motif",
    "significance_profile",
)

## Twitter Egonets

In [16]:
# Show the Information of the Twitter
twitter_info = pd.read_csv(RESULTS_PATH + 'twitter_info.csv')

# Show the Information of the Twitter
twitter_info.head()

Unnamed: 0,graph,nodes,edges,max_degree,avg_degree
0,86775971,154,5811,96.5,75.467532
1,7861312,220,4740,78.5,43.090909
2,256497288,213,17930,177.5,168.356808
3,14203895,215,3640,85.0,33.860465
4,14338082,215,3237,135.5,30.111628


In [17]:
# Load the Twitter data
file_name = 'twitter.csv'
motif_twitter = load_data(file_name)
# cast graph_name as string
motif_twitter["graph_name"] = motif_twitter["graph_name"].astype(str)

### Twitter Original Egonets

In [18]:
# Plot only the original graphs of Twitter
plot_line_chart(
    motif_twitter[~motif_twitter["graph_name"].str.contains("sample")],
    f"Z-scores for Twitter Original Graphs from {file_name}",
    "motif",
    "significance_profile",
)

### Twitter Sampled Egonets

In [19]:
# Plot line chart of Z-scores for each sample size of the graph 256497288
selected_graph = "256497288"
plot_line_chart(
    motif_twitter[motif_twitter["graph_name"].str.contains(selected_graph)],
    f"Z-scores for Twitter Generated Samples of {selected_graph}",
    "motif",
    "significance_profile",
)

In [20]:
# Plot line chart of Z-scores for each sample size of the graph 86775971
selected_graph = "86775971"
plot_line_chart(
    motif_twitter[motif_twitter["graph_name"].str.contains(selected_graph)],
    f"Z-scores for Twitter Generated Samples of {selected_graph}",
    "motif",
    "significance_profile",
)

In [21]:
# Plot line chart of Z-scores for each sample size of the graph 14338082
selected_graph = "14338082"
plot_line_chart(
    motif_twitter[motif_twitter["graph_name"].str.contains(selected_graph)],
    f"Z-scores for Twitter Generated Samples of {selected_graph}",
    "motif",
    "significance_profile",
)

In [22]:
# Plot line chart of Z-scores for each sample size of the graph 7861312
selected_graph = "7861312"
plot_line_chart(
    motif_twitter[motif_twitter["graph_name"].str.contains(selected_graph)],
    f"Z-scores for Twitter Generated Samples of {selected_graph}",
    "motif",
    "significance_profile",
)