# Result Analysis

In [1]:
import os
import sys
import pandas as pd

# Get the absolute path to the src directory
src_path = os.path.abspath(os.path.join(os.getcwd(), "../"))

# Add src_path to the system path
sys.path.insert(0, src_path)

# Now you can import your module
import scripts.graph_utils as gru

### Load Data

In [2]:
# Data path directory
RESULTS_PATH = "../../results/"
NETWORKS_PATH = "../../data/"
IMAGES_PATH = "../../img/motifs-draw/"

In [3]:
# Load data
def load_data(file):
    data = pd.read_csv(RESULTS_PATH + file)
    return data

## Twitter Egonets

In [4]:
# Load the Twitter data
file_name = "twitter.csv"
motif_twitter = load_data(file_name)
# cast graph_name as string
motif_twitter["graph_name"] = motif_twitter["graph_name"].astype(str)

In [5]:
motif_twitter

Unnamed: 0,graph_name,motif,average_count,standard_deviation,z_score,significance_profile,nodes,edges,max_degree,avg_degree
0,256497288,motif_1,52435.44,887.535214,22.877470,0.011187,213.0,17930.0,177.5,168.356808
1,256497288,motif_2,36026.44,646.921175,1.198848,0.000586,213.0,17930.0,177.5,168.356808
2,256497288,motif_3,72416.32,1275.567250,-28.681608,-0.014026,213.0,17930.0,177.5,168.356808
3,256497288,motif_4,24514.04,563.694545,50.319735,0.024607,213.0,17930.0,177.5,168.356808
4,256497288,motif_5,33661.00,741.003430,127.988611,0.062588,213.0,17930.0,177.5,168.356808
...,...,...,...,...,...,...,...,...,...,...
1490,111045742,motif_9,482.44,44.499139,48.170820,0.076639,149.0,3380.0,65.5,45.369128
1491,111045742,motif_10,577.92,41.198625,81.169700,0.129140,149.0,3380.0,65.5,45.369128
1492,111045742,motif_11,877.88,76.263971,6.072592,0.009661,149.0,3380.0,65.5,45.369128
1493,111045742,motif_12,354.12,50.406944,135.832079,0.216106,149.0,3380.0,65.5,45.369128


### Twitter Original Egonets

In [6]:
# Plot only the original graphs of Twitter
gru.plot_line_chart_with_images(
    motif_twitter[~motif_twitter["graph_name"].str.contains("sample")],
    f"Z-scores for Twitter Original Graphs from {file_name}",
    "motif",
    "significance_profile",
    IMAGES_PATH
)

In [7]:
motif_twitter.query(
    "graph_name.str.contains('sample') == False and motif == 'motif_1'"
)[["graph_name", "nodes", "edges", "max_degree", "avg_degree"]].sort_values("nodes")

Unnamed: 0,graph_name,nodes,edges,max_degree,avg_degree
1456,105398724,21.0,166.0,14.5,15.809524
1365,311227912,35.0,276.0,24.5,15.771429
1443,64496469,41.0,400.0,23.5,19.512195
1404,39070445,67.0,654.0,37.5,19.522388
1417,98633794,141.0,2354.0,64.5,33.390071
1482,111045742,149.0,3380.0,65.5,45.369128
13,86775971,154.0,5811.0,96.5,75.467532
1469,33511585,159.0,3200.0,67.5,40.251572
1430,11027262,183.0,3499.0,78.5,38.240437
0,256497288,213.0,17930.0,177.5,168.356808


## Twitter Samples

In [8]:
sample_percents = ["40", "30", "20", "10"]

selected_graph = "86775971"

for sp in sample_percents:
    # Query data
    query_df = motif_twitter.query(
        "graph_name == @selected_graph or (graph_name.str.startswith(@selected_graph) and graph_name.str.endswith(@sp))",
    )

    gru.plot_line_chart_with_images(
        query_df,
        f"Z-scores for Twitter Generated Samples of {selected_graph}",
        "motif",
        "significance_profile",
        IMAGES_PATH
    )

    print(
        query_df[["graph_name", "motif", "nodes", "edges"]].query("motif == 'motif_1'")
    )

                  graph_name    motif  nodes   edges
13                  86775971  motif_1  154.0  5811.0
858    86775971_sample_rn_40  motif_1   61.0   742.0
923   86775971_sample_rpn_40  motif_1   48.0   772.0
949    86775971_sample_rj_40  motif_1   60.0  1568.0
1014  86775971_sample_bsf_40  motif_1   61.0  1606.0
1274  86775971_sample_sff_40  motif_1   61.0  2099.0


                  graph_name    motif  nodes   edges
13                  86775971  motif_1  154.0  5811.0
273    86775971_sample_rj_30  motif_1   46.0   894.0
507    86775971_sample_rn_30  motif_1   46.0   472.0
533   86775971_sample_rpn_30  motif_1   42.0   585.0
819   86775971_sample_bsf_30  motif_1   46.0  1065.0
1222  86775971_sample_sff_30  motif_1   46.0  1071.0


                  graph_name    motif  nodes   edges
13                  86775971  motif_1  154.0  5811.0
624    86775971_sample_rj_20  motif_1   29.0   450.0
650   86775971_sample_bsf_20  motif_1   30.0   459.0
754   86775971_sample_rpn_20  motif_1   23.0   198.0
884    86775971_sample_rn_20  motif_1   27.0   196.0
1313  86775971_sample_sff_20  motif_1   30.0   384.0


                  graph_name    motif  nodes   edges
13                  86775971  motif_1  154.0  5811.0
299   86775971_sample_bsf_10  motif_1   15.0    55.0
403    86775971_sample_rj_10  motif_1   15.0    55.0
572   86775971_sample_rpn_10  motif_1   14.0    67.0
1001   86775971_sample_rn_10  motif_1   14.0    51.0
1144  86775971_sample_sff_10  motif_1   15.0    99.0


In [9]:
sample_percents = ["40", "30", "20", "10"]

selected_graph = "14203895"

for sp in sample_percents:
    # Query data
    query_df = motif_twitter.query(
        "graph_name == @selected_graph or (graph_name.str.startswith(@selected_graph) and graph_name.str.endswith(@sp))",
    )

    gru.plot_line_chart_with_images(
        query_df,
        f"Z-scores for Twitter Generated Samples of {selected_graph}",
        "motif",
        "significance_profile",
    )

    print(
        query_df[["graph_name", "motif", "nodes", "edges"]].query("motif == 'motif_1'")
    )

TypeError: plot_line_chart_with_images() missing 1 required positional argument: 'image_directory'

In [None]:
twitter_combined = gru.read_directed_graph_from_edge_list(
    "../../data/twitter/twitter_combined.edges"
)
gru.plot_degree_distribution(twitter_combined)
gru.plot_degree_distribution_scatter(twitter_combined)

In [None]:
twitter_ego = gru.read_directed_graph_from_edge_list(
    "../../data/twitter/14203895.edges"
)
gru.plot_degree_distribution(twitter_ego)
gru.plot_degree_distribution_scatter(twitter_ego)

In [None]:
twitter_ssf_10 = gru.read_directed_graph_from_edge_list(
    "../../data/twitter_samples/14203895_sample_sff_10.edges"
)
gru.plot_degree_distribution(twitter_ssf_10)
gru.plot_degree_distribution_scatter(twitter_ssf_10)