# Result Analysis

In [1]:
import pandas as pd
import plotly.graph_objects as go

In [2]:
# Plot line chart of Z-scores for each graph_name
def plot_line_chart(data, title, x_label, y_label):
    fig = go.Figure()
    for graph_name in data["graph_name"].unique():
        df = data[data["graph_name"] == graph_name]
        fig.add_trace(
            go.Scatter(
                x=df["motif"], y=df[y_label], mode="lines+markers", name=graph_name
            )
        )
    fig.update_layout(title=title, xaxis_title=x_label, yaxis_title=y_label)
    fig.show()

### Load Data

In [3]:
# Data path directory
RESULTS_PATH = "../../results/"
NETWORKS_PATH = "../../data/"

In [4]:
# Load data
def load_data(file):
    data = pd.read_csv(RESULTS_PATH + file)
    return data

## Twitter Egonets

In [5]:
# Show the Information of the Twitter
twitter_info = pd.read_csv(RESULTS_PATH + "twitter_info.csv")

# Show the Information of the Twitter
twitter_info.head()

Unnamed: 0,graph_name,nodes,edges,max_degree,avg_degree
0,86775971,154,5811,96.5,75.467532
1,7861312,220,4740,78.5,43.090909
2,256497288,213,17930,177.5,168.356808
3,14203895,215,3640,85.0,33.860465
4,14338082,215,3237,135.5,30.111628


In [6]:
# Load the Twitter data
file_name = "twitter_samples.csv"
motif_twitter = load_data(file_name)
# cast graph_name as string
motif_twitter["graph_name"] = motif_twitter["graph_name"].astype(str)

In [7]:
motif_twitter

Unnamed: 0,graph_name,motif,average_count,standard_deviation,z_score,significance_profile
0,256497288,motif_1,52435.44,887.535214,22.877470,0.011187
1,256497288,motif_2,36026.44,646.921175,1.198848,0.000586
2,256497288,motif_3,72416.32,1275.567250,-28.681608,-0.014026
3,256497288,motif_4,24514.04,563.694545,50.319735,0.024607
4,256497288,motif_5,33661.00,741.003430,127.988611,0.062588
...,...,...,...,...,...,...
1360,256497288_sample_rn_20,motif_9,46.60,12.254251,49.892890,0.275955
1361,256497288_sample_rn_20,motif_10,42.96,9.926732,40.601481,0.224565
1362,256497288_sample_rn_20,motif_11,74.76,13.379960,7.790756,0.043090
1363,256497288_sample_rn_20,motif_12,32.08,13.162193,46.186831,0.255457


### Twitter Original Egonets

In [8]:
# Plot only the original graphs of Twitter
plot_line_chart(
    motif_twitter[~motif_twitter["graph_name"].str.contains("sample")],
    f"Z-scores for Twitter Original Graphs from {file_name}",
    "motif",
    "significance_profile",
)

## Twitter Samples

In [9]:
# Show the Information of the Twitter Samples
twitter_samples_info = pd.read_csv(RESULTS_PATH + "twitter_samples_info.csv")

twitter_samples_info

Unnamed: 0,graph_name,nodes,edges,max_degree,avg_degree
0,14203895_sample_rj_20,43,601,33.0,27.953488
1,14203895_sample_bsf_10,21,168,16.5,16.000000
2,86775971_sample_rn_30,46,472,25.5,20.521739
3,14338082_sample_rj_20,43,305,28.5,14.186047
4,7861312_sample_rj_40,87,1581,45.0,36.344828
...,...,...,...,...,...
95,86775971_sample_rj_10,15,100,10.0,13.333333
96,14203895_sample_sff_20,44,376,18.0,17.090909
97,14203895_sample_bsf_40,86,1800,63.5,41.860465
98,7861312_sample_rj_10,22,134,10.5,12.181818


In [10]:
sample_percents = ["40", "30", "20", "10"]

selected_graph = "86775971"

for sp in sample_percents:
    # Query data
    query_df = motif_twitter.query(
        "graph_name == @selected_graph or (graph_name.str.startswith(@selected_graph) and graph_name.str.endswith(@sp))",
    )

    plot_line_chart(
        query_df,
        f"Z-scores for Twitter Generated Samples of {selected_graph}",
        "motif",
        "significance_profile",
    )

    query_info_df = twitter_samples_info.query(
        "graph_name.str.startswith(@selected_graph) and graph_name.str.endswith(@sp)",
    )

    print(query_info_df)

                graph_name  nodes  edges  max_degree  avg_degree
12   86775971_sample_rj_40     61    941        34.0   30.852459
45  86775971_sample_rpn_40     48    772        31.5   32.166667
77  86775971_sample_sff_40     76   2682        60.0   70.578947
78   86775971_sample_rn_40     61    742        32.0   24.327869
94  86775971_sample_bsf_40     61   1627        50.0   53.344262


                graph_name  nodes  edges  max_degree  avg_degree
2    86775971_sample_rn_30     46    472        25.5   20.521739
19  86775971_sample_bsf_30     46    610        28.0   26.521739
49  86775971_sample_sff_30     46    878        35.0   38.173913
72  86775971_sample_rpn_30     42    585        28.0   27.857143
86   86775971_sample_rj_30     46    843        32.5   36.652174


                graph_name  nodes  edges  max_degree  avg_degree
14  86775971_sample_rpn_20     23    198        15.0   17.217391
25   86775971_sample_rj_20     30    315        19.5   21.000000
60   86775971_sample_rn_20     27    196        16.5   14.518519
73  86775971_sample_bsf_20     30    246        16.0   16.400000
89  86775971_sample_sff_20     74   2566        67.0   69.351351


                graph_name  nodes  edges  max_degree  avg_degree
7   86775971_sample_bsf_10     15    169        13.0   22.533333
24   86775971_sample_rn_10     14     51         8.5    7.285714
29  86775971_sample_sff_10     50   1047        47.0   41.880000
55  86775971_sample_rpn_10     14     67         9.5    9.571429
95   86775971_sample_rj_10     15    100        10.0   13.333333


In [11]:
query_info_df = twitter_samples_info.query(
    "graph_name.str.startswith(@selected_graph) and graph_name.str.endswith(@sp)",
)

query_info_df

Unnamed: 0,graph_name,nodes,edges,max_degree,avg_degree
7,86775971_sample_bsf_10,15,169,13.0,22.533333
24,86775971_sample_rn_10,14,51,8.5,7.285714
29,86775971_sample_sff_10,50,1047,47.0,41.88
55,86775971_sample_rpn_10,14,67,9.5,9.571429
95,86775971_sample_rj_10,15,100,10.0,13.333333


In [12]:
sample_percents = ["40", "30", "20", "10"]

selected_graph = "256497288"

for sp in sample_percents:
    # Query data
    query_df = motif_twitter.query(
        "graph_name == @selected_graph or (graph_name.str.startswith(@selected_graph) and graph_name.str.endswith(@sp))",
    )

    plot_line_chart(
        query_df,
        f"Z-scores for Twitter Generated Samples of {selected_graph}",
        "motif",
        "significance_profile",
    )

    query_info_df = twitter_samples_info.query(
        "graph_name.str.startswith(@selected_graph) and graph_name.str.endswith(@sp)",
    )

    print(query_info_df)

                 graph_name  nodes  edges  max_degree  avg_degree
6   256497288_sample_rpn_40     61   1757        50.5   57.606557
43   256497288_sample_rj_40     85   3309        69.0   77.858824
56  256497288_sample_bsf_40     85   3254        68.0   76.564706
65   256497288_sample_rn_40     85   2899        71.0   68.211765
83  256497288_sample_sff_40    110   7588       105.0  137.963636


                 graph_name  nodes  edges  max_degree  avg_degree
10  256497288_sample_sff_30    118   7526       108.0  127.559322
33  256497288_sample_bsf_30     63   1656        46.5   52.571429
39   256497288_sample_rn_30     61   1643        48.0   53.868852
70   256497288_sample_rj_30     63   1937        53.5   61.492063
81  256497288_sample_rpn_30     59   1456        46.5   49.355932


                 graph_name  nodes  edges  max_degree  avg_degree
21   256497288_sample_rj_20     42    765        35.0   36.428571
31  256497288_sample_rpn_20     32    481        27.0   30.062500
57  256497288_sample_sff_20     74   3783        71.0  102.243243
85  256497288_sample_bsf_20     42   1013        35.5   48.238095
99   256497288_sample_rn_20     41    620        29.0   30.243902


                 graph_name  nodes  edges  max_degree  avg_degree
16  256497288_sample_sff_10    115   6898       103.5  119.965217
34   256497288_sample_rn_10     21    192        17.5   18.285714
46  256497288_sample_bsf_10     21    293        19.0   27.904762
50   256497288_sample_rj_10     21    286        19.5   27.238095
93  256497288_sample_rpn_10     19    183        14.5   19.263158
