In [5]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

In [6]:
from pyvis.network import Network
import numpy as np

# Load your CSV
df = pd.read_csv('refined_per_data.csv')

# filter weights <= 10 but save the list of inlfuencers
influencers = [
    'ibai',
    'auronplay',
    'marta díaz',
    'djmario',
    'clakovi',
    'elrubius',
    'emma chamberlain',
    'malbert',
    'mostopapi',
    'orslok',
    'vegetta'
]

df = df[(df['Weight'] > 15) | (df['Target'].isin(influencers))]


# Determine the 10th and 90th percentile weights
lower_bound = np.percentile(df['Weight'], 10)
upper_bound = np.percentile(df['Weight'], 90)

# Calculate cumulative weight for each target node
node_weights = df.groupby('Target')['Weight'].sum().to_dict()

# Normalize node sizes for visualization
min_node_size = 10
max_node_size = 50
node_sizes = {
    node: np.interp(weight, (min(node_weights.values()), max(node_weights.values())), (min_node_size, max_node_size))
    for node, weight in node_weights.items()
}

# Create a pyvis network
net = Network(notebook=True)

# Add nodes with sizes based on cumulative weights
for node in pd.concat([df['Source'], df['Target']]).unique():  # Ensure every unique node is added
    size = node_sizes.get(node, min_node_size)  # If a node doesn't have a cumulative weight, use the minimum size
    net.add_node(node, size=size)

# Add edges with widths based on individual weights
for index, row in df.iterrows():
    source = row['Source']
    target = row['Target']
    weight = row['Weight']
    
    # Normalize edge width
    width = np.interp(weight, (lower_bound, upper_bound), (0.5, 5))
    
    net.add_edge(source, target, width=width)

# Display the network
net.show("network.html")




network.html


In [7]:
from IPython.display import display

# Sort the dataframe by 'Weight' in descending order
sorted_df = df.sort_values(by='Weight', ascending=False)

# Use pandas styling to highlight the 'Weight' column with a gradient and display the top rows 
display(sorted_df.head(20).style.background_gradient(cmap='Blues', subset=['Weight']))


Unnamed: 0,Source,Target,Weight
190,YouTuber,emma,1019
193,YouTuber,emma chamberlain,511
107,YouTuber,chamberlain,311
10,YouTuber,amanda,155
369,YouTuber,joakimkarud,78
368,YouTuber,joakim karud,78
317,YouTuber,james,75
559,YouTuber,olivia,74
715,YouTuber,weston,60
241,YouTuber,god,59


In [8]:
# Filter the DataFrame to only show rows corresponding to the influencers
filtered_df = df[df['Target'].isin(influencers)]

# Sort by weight in descending order
filtered_df = filtered_df.sort_values(by='Weight', ascending=False)

# Display the table
display(filtered_df.style.background_gradient(cmap='Blues', subset=['Weight']))

Unnamed: 0,Source,Target,Weight
193,YouTuber,emma chamberlain,511
