## Importing Libraries

In [2]:
!pip install networkx

Collecting networkx
  Downloading networkx-3.5-py3-none-any.whl.metadata (6.3 kB)
Downloading networkx-3.5-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m58.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: networkx
Successfully installed networkx-3.5

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [46]:
import pandas as pd
import networkx as nx
import plotly.graph_objects as go
import numpy as np

In [43]:
df = pd.read_csv("pairwise_52seconds_share.csv")
df.head()


Unnamed: 0.1,Unnamed: 0,videoID,userID_1,userID_2,timestamp_1,timestamp_2,time_diff_seconds
0,0,-6bGXfM8-gs,19372991|840224732847833,19372991|840224732847833,2018-07-22 21:19:58,2018-07-22 21:19:58,0.0
1,10,-fJbMWhkTAw,Ej8Mm0YMadzmx4osDA_hgg,Ej8Mm0YMadzmx4osDA_hgg,2018-08-01 00:51:08,2018-08-01 00:51:08,0.0
2,11,-ilNuSh1Fgw,feNNP607aG1F64jR6bk8jw,CVEf5dB1MvNRTQFYivAIPQ,2018-04-27 22:28:49,2018-04-27 22:29:36,47.0
3,12,-ilNuSh1Fgw,5SDVRa-J-_cWYP6g0WNzLw,jz6hyweGgVHGTw-PbEMqKw,2018-05-14 16:52:08,2018-05-14 16:52:24,16.0
4,13,-ilNuSh1Fgw,42Egn_22OjOzg2XMqAa9_g,poH0yvIGbS5_7MdXM4EuRA,2018-05-14 16:55:04,2018-05-14 16:55:15,11.0


## Draw a graph

In [13]:
def draw_graph(G, title):
    
    pos = nx.spring_layout(G, seed=42)

    # Extract edge coordinates for Plotly
    edge_x = []
    edge_y = []
    for edge in G.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x += [x0, x1, None]
        edge_y += [y0, y1, None]

    # Create edge trace (gray lines)
    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.5, color='#888'),
        hoverinfo='none',
        mode='lines'
    )

    # Extract node coordinates and degrees
    node_x = []
    node_y = []
    for node in G.nodes():
        x, y = pos[node]
        node_x.append(x)
        node_y.append(y)

    node_degree = [G.degree(node) for node in G.nodes()]

    # Create node trace (colored by degree)
    node_trace = go.Scatter(
        x=node_x, y=node_y,
        mode='markers',
        hoverinfo='text',
        marker=dict(
            showscale=True,
            colorscale='YlOrRd',
            color=node_degree,
            size=[3 + d*1.5 for d in node_degree],
            colorbar=dict(
                thickness=15,
                title='Node Degree',
                xanchor='left'
            ),
            line_width=1
        )
    )

    # Add hover text
    node_text = [f"Node {node}<br>Degree: {deg}" for node, deg in zip(G.nodes(), node_degree)]
    node_trace.text = node_text

    
    fig = go.Figure(data=[edge_trace, node_trace],
        layout=go.Layout(
        title=title,
        showlegend=False,
        hovermode='closest',
        margin=dict(b=0,l=0,r=0,t=40),
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
    ))

    fig.show()

## Getting Metrics

In [48]:
def get_metrics(G):
    # Degree distribution
    degrees = [d for n, d in G.degree()]
    mean_degree = np.mean(degrees)
    var_degree = np.var(degrees)

    # Average clustering coefficient
    clustering = nx.average_clustering(G)

    # Average shortest path length (works only for connected graphs)
    avg_path_length = nx.average_shortest_path_length(G)

    # Assortativity coefficient
    assortativity = nx.degree_assortativity_coefficient(G)

    # Centralization proxy (standard deviation of degrees)
    degree_centralization = np.std(degrees)

    print(f"Mean degree: {mean_degree:.2f}")
    print(f"Degree variance: {var_degree:.2f}")
    print(f"Average clustering coefficient: {clustering:.4f}")
    print(f"Average path length: {avg_path_length:.4f}")
    print(f"Assortativity coefficient: {assortativity:.4f}")
    print(f"Degree centralization (std): {degree_centralization:.2f}")

## Building a graph from dataset

In [50]:
# Getting valid users and videos

# Filter valid user pairs (less or equal than 52 seconds)
filtered = df[df['time_diff_seconds'] <= 52]

# Removing records with same user in user_1 and user_2
filtered = filtered[filtered['userID_1'] != filtered['userID_2']]

# Get all videos with at least one valid pair
valid_videos = filtered['videoID'].unique()

# Get all users involved in those videos
users_involved = pd.unique(filtered[['userID_1', 'userID_2']].values.ravel())

### User - User Graph

In [51]:
# Building the graph with user interaction
G_users = nx.Graph()

# Add user nodes
for user in users_involved:
    G_users.add_node(user, type='user')

# Getting average time difference per pair of users
df_agg = filtered.groupby(['userID_1', 'userID_2']).agg({'time_diff_seconds': 'mean'}).reset_index()

# Add user-user edges
for _, row in df_agg.iterrows():
    G_users.add_edge(
        row['userID_1'],
        row['userID_2'],
        # video=row['videoID'],
        time_diff=row['time_diff_seconds']
    )

print(f"Length of no filtered data: {len(df)}")
print(f"Length of filtered data: {len(filtered)}")
print(f"Length of graph dataset: {len(df_agg)}")


# Writing graph to file
nx.write_gexf(G_users, 'users_graph.gexf')

Length of no filtered data: 4471
Length of filtered data: 4127
Length of graph dataset: 4078


### User - Video Graph

In [52]:
G_users_video = nx.Graph()

# Add user nodes
for user in users_involved:
    G_users_video.add_node(user, type='user')

# Add video nodes
for video in valid_videos:
    G_users_video.add_node(video, type='video')


# Getting user and videos dataframe
user1_video_df = filtered[['videoID', 'userID_1']].drop_duplicates()
user2_video_df = filtered[['videoID', 'userID_2']].drop_duplicates()
user2_video_df.rename(columns={'userID_2': 'userID_1'}, inplace=True)
user_video_df = pd.concat([user1_video_df, user2_video_df]).drop_duplicates()
user_video_df.rename(columns={'userID_1': 'user', 'videoID': 'video'}, inplace=True)

# Adding edges
for _, row in user_video_df.iterrows():
    user = row['user']
    video = row['video']
    G_users_video.add_edge(user, video)


# Writing graph to file
nx.write_gexf(G_users_video, 'users_video_graph.gexf')


### User - User - Video Graph

In [53]:
G_users2_video = nx.Graph()

# Add user nodes
for user in users_involved:
    G_users2_video.add_node(user, type='user')

# Add video nodes
for video in valid_videos:
    G_users2_video.add_node(video, type='video')


# Getting user and videos dataframe
user1_video_df = filtered[['videoID', 'userID_1']].drop_duplicates()
user2_video_df = filtered[['videoID', 'userID_2']].drop_duplicates()
user2_video_df.rename(columns={'userID_2': 'userID_1'}, inplace=True)
user_video_df = pd.concat([user1_video_df, user2_video_df]).drop_duplicates()
user_video_df.rename(columns={'userID_1': 'user', 'videoID': 'video'}, inplace=True)

# Adding edges
for _, row in user_video_df.iterrows():
    user = row['user']
    video = row['video']
    G_users2_video.add_edge(user, video)

# Getting user interaction
user_interactions = filtered[['userID_1', 'userID_2']].drop_duplicates()

# Adding edges
for _, row in user_interactions.iterrows():
    user1 = row['userID_1']
    user2 = row['userID_2']
    G_users2_video.add_edge(user1, user2)

# Writing graph to file
nx.write_gexf(G_users2_video, 'users2_video_graph.gexf')

## Barabási-Albert model

In [49]:


# Generate a Scale-Free (Barabási–Albert) network
n = 50   # number of nodes
m = 2    # edges added per new node
G = nx.barabasi_albert_graph(n, m)

get_metrics(G)



Mean degree: 3.84
Degree variance: 10.61
Average clustering coefficient: 0.1774
Average path length: 2.6016
Assortativity coefficient: -0.2930
Degree centralization (std): 3.26


In [54]:
get_metrics(G_users)

NetworkXError: Graph is not connected.