# Result Analysis

In [1]:
import pandas as pd
import networkx as nx
import plotly.graph_objects as go

In [2]:
# Plot line chart of Z-scores for each graph_name
def plot_line_chart(data, title, x_label, y_label):
    fig = go.Figure()
    for graph_name in data["graph_name"].unique():
        df = data[data["graph_name"] == graph_name]
        fig.add_trace(
            go.Scatter(
                x=df["motif"], y=df[y_label], mode="lines+markers", name=graph_name
            )
        )
    fig.update_layout(title=title, xaxis_title=x_label, yaxis_title=y_label)
    fig.show()

### Load Data

In [3]:
# Data path directory 
RESULTS_PATH = '../results/'
NETWORKS_PATH = '../data/'
file_name = 'twitter_egonets.csv'

In [4]:
# Load data
def load_data(file):
    data = pd.read_csv(RESULTS_PATH + file)
    return data

## Egonets (Original 5 Examples)

In [5]:
motif_df = load_data(file_name)
# cast graph_name as string
motif_df["graph_name"] = motif_df["graph_name"].astype(str)

In [6]:
motif_df.head()

Unnamed: 0,graph_name,motif,original_count,average_count,std_dev,z_score,significance_profile
0,7861312,motif_1,17276,21836.2,618.057036,-7.378283,-0.009069
1,7861312,motif_2,20947,22531.28,520.196399,-3.045542,-0.003743
2,7861312,motif_3,7461,34880.64,789.412011,-34.734257,-0.042693
3,7861312,motif_4,20756,6252.84,401.182178,36.151058,0.044434
4,7861312,motif_5,21858,6123.96,395.064497,39.82651,0.048952


In [7]:
plot_line_chart(
    motif_df, "Z-scores for Twitter Egonets", "motif", "significance_profile"
)

## Egonets Samples from 256497288 File

In [8]:
file_name = "twitter_ego_samples.csv"
motif_samples_df = load_data(file_name)
# cast graph_name as string
motif_samples_df["graph_name"] = motif_samples_df["graph_name"].astype(str)

In [9]:
plot_line_chart(
    motif_samples_df,
    "Z-scores for Twitter Generated Samples",
    "motif",
    "significance_profile",
)

## Google+ Egonets

In [10]:
file_name = 'gplus.csv'
motif_gplus = load_data(file_name)
# cast graph_name as string
motif_gplus["graph_name"] = motif_gplus["graph_name"].astype(str)

In [11]:
plot_line_chart(
    motif_gplus, "Z-scores for Google Plus Egonets", "motif", "significance_profile"
)

## Network Analysis

In [12]:
# Plot degree distribution (log-log scale)

# Read the network file (edge list) in NetworkX
network_file = "gplus/111213696402662884531.edges"

G = nx.read_edgelist(NETWORKS_PATH + network_file, nodetype=int)

# Get the degree of each node
degree_sequence = sorted([d for n, d in G.degree()], reverse=True)

# Plot the degree distribution (y: degree, x: number of nodes with that degree)
fig = go.Figure()
fig.add_trace(
    go.Scatter(
        x=list(range(len(degree_sequence))),
        y=degree_sequence,
        mode="markers",
        marker=dict(size=5),
    )
)

fig.update_layout(
    title="Degree Distribution of Twitter Network (log-log scale)",
    xaxis_title="Node",
    yaxis_title="Degree",
    xaxis_type="log",
    yaxis_type="log",
)

fig.show()