In [1]:
if 'google.colab' in str(get_ipython()):
    from google.colab import drive
    drive.mount('/content/drive')
    basepath = '/content/drive/MyDrive/Projects/'
else:
    basepath = '/C:/Users/Vishu/Downloads/Projects/Netflix'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
from pathlib import Path

In [3]:
base_folder = Path(basepath)
data_folder = base_folder/'Data/Netflix'
model_folder = base_folder/'Models/Netflix'

In [4]:
model_folder.mkdir(exist_ok=True, parents = True)

In [5]:
model_folder

PosixPath('/content/drive/MyDrive/Projects/Models/Netflix')

In [6]:
dataset = data_folder/'Cleaned_netflix.csv'

In [7]:
!pip install --upgrade pyvis networkx pandas jinja2



In [8]:
import pandas as pd
import networkx as nx
from pyvis.network import Network
from IPython.display import IFrame

In [9]:
!pip install --upgrade pandas networkx bokeh



In [10]:
import pandas as pd
import networkx as nx
from bokeh.io import show, output_notebook
from bokeh.plotting import figure
from bokeh.models import Circle
from bokeh.models import GraphRenderer, StaticLayoutProvider, HoverTool
from bokeh.io import output_notebook, reset_output

In [11]:
# Read CSV
df = pd.read_csv(dataset)

df['cast_list'] = df['cast'].apply(lambda x: x.split(", "))
df

Unnamed: 0,show_id,type,title,director,cast,country,date_added,release_year,rating,duration,listed_in,description,added_month,added_year,cast_list
0,s1,Movie,Dick Johnson Is Dead,Kirsten Johnson,Unknown,United States,"September 25, 2021",2020,13+,90,Documentaries,"As her father nears the end of his life, filmm...",9,2021,[Unknown]
1,s2,TV Show,Blood & Water,Unknown,Ama Qamata,South Africa,"September 24, 2021",2021,17+,1800,International TV Shows,"After crossing paths at a party, a Cape Town t...",9,2021,[Ama Qamata]
2,s2,TV Show,Blood & Water,Unknown,Khosi Ngema,South Africa,"September 24, 2021",2021,17+,1800,International TV Shows,"After crossing paths at a party, a Cape Town t...",9,2021,[Khosi Ngema]
3,s2,TV Show,Blood & Water,Unknown,Gail Mabalane,South Africa,"September 24, 2021",2021,17+,1800,International TV Shows,"After crossing paths at a party, a Cape Town t...",9,2021,[Gail Mabalane]
4,s2,TV Show,Blood & Water,Unknown,Thabang Molaba,South Africa,"September 24, 2021",2021,17+,1800,International TV Shows,"After crossing paths at a party, a Cape Town t...",9,2021,[Thabang Molaba]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201986,s8807,Movie,Zubaan,Mozez Singh,Manish Chaudhary,India,"March 2, 2019",2015,14+,111,Music & Musicals,A scrappy but poor boy worms his way into a ty...,3,2019,[Manish Chaudhary]
201987,s8807,Movie,Zubaan,Mozez Singh,Meghna Malik,India,"March 2, 2019",2015,14+,111,Music & Musicals,A scrappy but poor boy worms his way into a ty...,3,2019,[Meghna Malik]
201988,s8807,Movie,Zubaan,Mozez Singh,Malkeet Rauni,India,"March 2, 2019",2015,14+,111,Music & Musicals,A scrappy but poor boy worms his way into a ty...,3,2019,[Malkeet Rauni]
201989,s8807,Movie,Zubaan,Mozez Singh,Anita Shabdish,India,"March 2, 2019",2015,14+,111,Music & Musicals,A scrappy but poor boy worms his way into a ty...,3,2019,[Anita Shabdish]


In [12]:
df.shape

(201991, 15)

In [13]:
# Remove directors labeled as "Unknown"
df = df[df['director'] != 'Unknown']

In [14]:
# Initialize a graph to track overlaps dynamically
G_overlap = nx.Graph()

def calculate_overlap(row):
    director = row['director']
    title = row['title']

    # Calculate the overlap as the total degree of the director and title
    director_degree = G_overlap.degree[director] if G_overlap.has_node(director) else 0
    title_degree = G_overlap.degree[title] if G_overlap.has_node(title) else 0
    return director_degree + title_degree

# Add initial nodes without edges to track degrees
for _, row in df.iterrows():
    G_overlap.add_node(row['director'])
    G_overlap.add_node(row['title'])

# Calculate overlap for each row
df['overlap'] = df.apply(calculate_overlap, axis=1)

# Sort rows by overlap (ascending)
df_sorted = df.sort_values(by='overlap')

# Select a subset with minimal overlap
df_subset = df_sorted.head(20000)

# Add the subset to the final graph
G = nx.Graph()
for _, row in df_subset.iterrows():
    director = row['director']
    title = row['title']

    # Add director and title nodes
    G.add_node(director, node_type='director')
    G.add_node(title, node_type='title')

    # Edge: director -> title
    G.add_edge(director, title)

print("Number of nodes in the bipartite graph:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['overlap'] = df.apply(calculate_overlap, axis=1)


Number of nodes in the bipartite graph: 1449
Number of edges: 827


In [15]:
# Compute graph layout
pos = nx.spring_layout(G, seed=42, scale=50)

In [16]:
# Extract nodes and compute positions
nodes = list(G.nodes())
node_indices = list(range(len(nodes)))

x_coords = [pos[n][0] for n in nodes]
y_coords = [pos[n][1] for n in nodes]

# Compute edge start and end points
edge_start = []
edge_end = []
for (n1, n2) in G.edges():
    edge_start.append(nodes.index(n1))
    edge_end.append(nodes.index(n2))

# Dynamic node sizes based on the number of titles directed
node_sizes = []
hover_info = []
colors = []

# Count the number of titles for each director
director_movie_count = {n: G.degree(n) for n in nodes if G.nodes[n]['node_type'] == 'director'}

for n in nodes:
    if G.nodes[n]['node_type'] == 'director':
        degree = director_movie_count.get(n, 1)  # Get number of titles directed
        hover_info.append(f"Director: {n}, Titles: {degree}")
        colors.append("orange")
        node_sizes.append(0.2 + (degree * 0.2))  # Increase size based on titles
    else:
        hover_info.append(f"Title: {n}")
        colors.append("green")
        node_sizes.append(0.2)  # Fixed smaller size for movie titles

In [17]:
graph_renderer = GraphRenderer()

# Add node sizes and colors to the data source
graph_renderer.node_renderer.data_source.data = {
    'index': node_indices,
    'x': x_coords,
    'y': y_coords,
    'desc': hover_info,  # Hover text
    'color': colors,     # Node colors
    'size': node_sizes   # Dynamic sizes
}

# Use Circle glyph for node representation
graph_renderer.node_renderer.glyph = Circle(radius='size', fill_color='color')

# Edge data
graph_renderer.edge_renderer.data_source.data = dict(
    start=edge_start,
    end=edge_end
)

# Assign x, y positions
graph_renderer.layout_provider = StaticLayoutProvider(
    graph_layout=dict(zip(node_indices, zip(x_coords, y_coords)))
)


In [18]:
# Reset Bokeh to prevent conflicts
reset_output()
output_notebook()

# Create figure
p1 = figure(
    title="#Movies (Director–Title Bipartite) with Dynamic Title Sizes",
    x_range=(-50, 50),
    y_range=(-50, 50),
    tools="pan,wheel_zoom,save,reset",
    toolbar_location="right",
    width=1500,
    height=800
)

# Attach graph_renderer
p1.renderers.append(graph_renderer)

# Add HoverTool referencing the 'desc' field
hover_tool = HoverTool(tooltips=[("", "@desc")])
p1.add_tools(hover_tool)

# Show the graph first (DO NOT save in the same cell)
show(p1)

In [19]:
from bokeh.io import output_file, save

#  Set the output file
output_file("Director_Title_Bipartite.html")

#  Save the graph as an interactive HTML file
save(p1)

print("Graph saved as Director_Title_Bipartite.html")

Graph saved as Director_Title_Bipartite.html


In [20]:
G = nx.Graph()

for _, row in df.iterrows():
    director = row['director']
    title = row['title']

    # Add director and title nodes
    G.add_node(director, node_type='director')
    G.add_node(title, node_type='title')

    # Add edge between director and title
    G.add_edge(director, title)

# -----------------------------------------------------------------
# 3. Initialize PyVis network
# -----------------------------------------------------------------
net = Network(height="800px", width="100%", notebook=True)

# Populate PyVis graph from NetworkX graph
net.from_nx(G)

# 4. Style nodes (with both labels and titles)
for node in net.nodes:
    node_id = node['id']
    node_type = G.nodes[node_id]['node_type']

    # Set label (for visible text)
    node['label'] = node_id

    # Set hover tooltip
    if node_type == 'director':
        node['color'] = 'orange'
        node['size'] = 20
        node['title'] = f"Director: {node_id}"
    else:
        node['color'] = 'teal'
        node['size'] = 15
        node['title'] = f"Movie Title: {node_id}"

# 5. Optionally enable hover explicitly
net.set_options("""
var options = {
  "interaction": {
    "hover": true,
    "tooltipDelay": 100
  }
}
""")

# 6. Generate HTML
net.show("director_title_bipartite.html")

# 7. Display in notebook (may or may not show tooltips in IFrame)
IFrame(src="director_title_bipartite.html", width="100%", height="800px")

director_title_bipartite.html


In [21]:
# Initialize a graph to track overlaps dynamically
G_overlap = nx.Graph()

def calculate_overlap(row):
    title = row['title']
    cast_list = row['cast_list']

    # Calculate the overlap as the total degree of the title and all cast members
    title_degree = G_overlap.degree[title] if G_overlap.has_node(title) else 0
    cast_degrees = sum(G_overlap.degree[actor] for actor in cast_list if G_overlap.has_node(actor))
    return title_degree + cast_degrees

# Add initial nodes without edges to track degrees
for _, row in df.iterrows():
    G_overlap.add_node(row['title'])
    for actor in row['cast_list']:
        G_overlap.add_node(actor)

# Calculate overlap for each row
df['overlap'] = df.apply(calculate_overlap, axis=1)

# Sort rows by overlap (ascending)
df_sorted = df.sort_values(by='overlap')

# Select a subset with minimal overlap (e.g., 2000 movies)
df_subset = df_sorted.head(5000)

# Add the subset to the graph
G = nx.Graph()
for _, row in df_subset.iterrows():
    title = row['title']
    cast_list = row['cast_list']

    # Add the movie node
    G.add_node(title, node_type='title')

    # Add each cast member as a node and connect to the movie
    for actor in cast_list:
        G.add_node(actor, node_type='actor')
        G.add_edge(actor, title)

print("Number of nodes in the bipartite graph:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['overlap'] = df.apply(calculate_overlap, axis=1)


Number of nodes in the bipartite graph: 1631
Number of edges: 1639


In [22]:
# Compute positions
pos = nx.spring_layout(G, seed=42, scale=80)
nodes = list(G.nodes())
node_indices = list(range(len(nodes)))

x_coords = [pos[n][0] for n in nodes]
y_coords = [pos[n][1] for n in nodes]

edge_start = []
edge_end = []
for (n1, n2) in G.edges():
    edge_start.append(nodes.index(n1))
    edge_end.append(nodes.index(n2))

In [23]:
# Count the number of movies each actor is in
actor_movie_count = {n: G.degree(n) for n in nodes if G.nodes[n]['node_type'] == 'actor'}

# Count the number of cast members for each title
title_cast_count = {n: G.degree(n) for n in nodes if G.nodes[n]['node_type'] == 'title'}

# Create GraphRenderer
graph_renderer = GraphRenderer()

# Node data
graph_renderer.node_renderer.data_source.data = {
    'index': node_indices
}

hover_info = []
colors = []
node_sizes = []

for n in nodes:
    if G.nodes[n]['node_type'] == 'actor':
        num_titles = actor_movie_count.get(n, 1)  # Get number of movies the actor appeared in
        hover_info.append(f"Actor: {n}, Titles: {num_titles}")

        # If the actor is in more than 1 title, color it red; otherwise, keep it blue
        colors.append("red" if num_titles > 1 else "blue")

        node_sizes.append(0.3)  # Fixed size for actors
    else:  # This is a title node
        num_cast = title_cast_count.get(n, 1)  # Get number of actors in this title
        hover_info.append(f"Title: {n}, Cast Members: {num_cast}")

        colors.append("green")  # Keep title color green

        # Increase size based on the number of actors in this movie
        node_sizes.append(0.1 + (num_cast * 0.1))

# Assign hover text, colors, and sizes
graph_renderer.node_renderer.data_source.add(hover_info, 'desc')   # for hover
graph_renderer.node_renderer.data_source.add(colors, 'color')      # for color
graph_renderer.node_renderer.data_source.add(node_sizes, 'size')   # for node sizes

# Use size-based circles for node representation
graph_renderer.node_renderer.glyph = Circle(radius='size', fill_color='color')

# Edge data
graph_renderer.edge_renderer.data_source.data = dict(
    start=edge_start,
    end=edge_end
)

# Assign x/y positions
graph_renderer.layout_provider = StaticLayoutProvider(
    graph_layout=dict(zip(node_indices, zip(x_coords, y_coords)))
)

In [24]:
from bokeh.io import output_notebook, show, reset_output
from bokeh.models import HoverTool

# Reset Bokeh to prevent conflicts
reset_output()

# Enable inline rendering
output_notebook()

# Create figure
p2 = figure(
    title="#Movies (Cast–Title Bipartite with Minimal Overlap)",
    x_range=(-80, 80),
    y_range=(-80, 80),
    tools="pan,wheel_zoom,save,reset",
    toolbar_location="right",
    width=2000,
    height=500
)

# Attach graph_renderer
p2.renderers.append(graph_renderer)

# Add HoverTool
hover_tool = HoverTool(tooltips=[("", "@desc")])
p2.add_tools(hover_tool)

# Show the graph first (DO NOT save in the same cell)
show(p2)

In [25]:
from bokeh.io import output_file, save

#  Set the output file
output_file("Cast_Title_Bipartite.html")

#  Save the graph as an interactive HTML file
save(p2)

print("Graph saved as Cast_Title_Bipartite.html")

Graph saved as Cast_Title_Bipartite.html


In [26]:
# Initialize a graph to track overlaps dynamically
G_overlap = nx.Graph()

def calculate_overlap(row):
    director = row['director']
    genre = row['listed_in']

    # Calculate the overlap as the total degree of the director and genre
    director_degree = G_overlap.degree[director] if G_overlap.has_node(director) else 0
    genre_degree = G_overlap.degree[genre] if G_overlap.has_node(genre) else 0
    return director_degree + genre_degree

# Add initial nodes without edges to track degrees
for _, row in df.iterrows():
    G_overlap.add_node(row['director'])
    G_overlap.add_node(row['listed_in'])

# Calculate overlap for each row
df['overlap'] = df.apply(calculate_overlap, axis=1)

# Sort rows by overlap (ascending)
df_sorted = df.sort_values(by='overlap')

# Select a subset with minimal overlap
df_subset = df_sorted.head(30000)

# Add the subset to the final graph
G = nx.Graph()
for _, row in df_subset.iterrows():
    director = row['director']
    genre = row['listed_in']

    # Add director and genre nodes
    G.add_node(director, node_type='director')
    G.add_node(genre, node_type='genre')

    # Edge: director -> genre
    G.add_edge(director, genre)

print("Number of nodes in the bipartite graph:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges())


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['overlap'] = df.apply(calculate_overlap, axis=1)


Number of nodes in the bipartite graph: 1151
Number of edges: 2518


In [27]:
# Compute positions
pos = nx.spring_layout(G, seed=42, scale=80)
nodes = list(G.nodes())
node_indices = list(range(len(nodes)))

x_coords = [pos[n][0] for n in nodes]
y_coords = [pos[n][1] for n in nodes]

edge_start = []
edge_end = []
for (n1, n2) in G.edges():
    edge_start.append(nodes.index(n1))
    edge_end.append(nodes.index(n2))

In [28]:
from bokeh.transform import factor_cmap

# Count the number of directors for each genre
genre_director_count = {n: G.degree(n) for n in nodes if G.nodes[n]['node_type'] == 'genre'}

# Count the number of genres for each director
director_genre_count = {n: G.degree(n) for n in nodes if G.nodes[n]['node_type'] == 'director'}

# Create GraphRenderer
graph_renderer = GraphRenderer()

# Node data
graph_renderer.node_renderer.data_source.data = {
    'index': node_indices
}

hover_info = []
colors = []
node_sizes = []

# Define distinguishable colors for different genre counts
color_map = {
    1: "yellow",
    2: "green",
    3: "blue",
    4: "purple",
    5: "orange",
    6: "pink",
    7: "brown",
    8: "cyan",
    9: "lime",
    10: "magenta"
}

for n in nodes:
    if G.nodes[n]['node_type'] == 'director':
        num_genres = director_genre_count.get(n, 1)  # Get number of genres linked to this director
        hover_info.append(f"Director: {n}, Genres: {num_genres}")

        # Assign color based on genre count, default to red if >10 genres
        colors.append(color_map.get(num_genres, "red"))

        # Increase size based on the number of genres for this director
        node_sizes.append(0.1 + (num_genres * 0.001))
    else:  # This is a genre node
        num_directors = genre_director_count.get(n, 1)  # Get number of directors linked to this genre
        hover_info.append(f"Genre: {n}, Directors: {num_directors}")
        colors.append("red")  # Keep genres red

        # Increase size based on the number of directors in this genre
        node_sizes.append(0.1 + (num_directors * 0.001))

# Assign hover text, colors, and sizes
graph_renderer.node_renderer.data_source.add(hover_info, 'desc')   # for hover
graph_renderer.node_renderer.data_source.add(colors, 'color')      # for color
graph_renderer.node_renderer.data_source.add(node_sizes, 'size')   # for node sizes

# Use size-based circles for node representation
graph_renderer.node_renderer.glyph = Circle(radius='size', fill_color='color')

# Edge data
graph_renderer.edge_renderer.data_source.data = dict(
    start=edge_start,
    end=edge_end
)

# Assign x/y positions
graph_renderer.layout_provider = StaticLayoutProvider(
    graph_layout=dict(zip(node_indices, zip(x_coords, y_coords)))
)

In [29]:
from bokeh.io import output_notebook, show, reset_output
from bokeh.models import HoverTool

# Reset Bokeh to prevent conflicts
reset_output()

# Enable inline rendering
output_notebook()

# Create figure
p3 = figure(
    title="#Movies (Director–Genre Bipartite Network with Minimal Overlap)",
    x_range=(-25, 25),
    y_range=(-30, 50),
    tools="pan,wheel_zoom,save,reset",
    toolbar_location="right",
    width=2000,
    height=500
)

# Attach graph_renderer
p3.renderers.append(graph_renderer)

# Add HoverTool
hover_tool = HoverTool(tooltips=[("", "@desc")])
p3.add_tools(hover_tool)

# Show the graph first (DO NOT save in the same cell)
show(p3)

In [30]:
from bokeh.io import output_file, save

# Set the output file
output_file("Director_Genre_Bipartite.html")

# Save the graph as an interactive HTML file
save(p3)

print("Graph saved as Director_Genre_Bipartite.html")

Graph saved as Director_Genre_Bipartite.html


In [31]:
# Initialize a graph to track overlaps dynamically
G_overlap = nx.Graph()

def calculate_overlap(row):
    director = row['director']
    cast_list = row['cast_list']

    # Calculate the overlap as the total degree of the director and all cast members
    director_degree = G_overlap.degree[director] if G_overlap.has_node(director) else 0
    cast_degrees = sum(G_overlap.degree[actor] for actor in cast_list if G_overlap.has_node(actor))
    return director_degree + cast_degrees

# Add initial nodes without edges to track degrees
for _, row in df.iterrows():
    G_overlap.add_node(row['director'])
    for actor in row['cast_list']:
        G_overlap.add_node(actor)

# Calculate overlap for each row
df['overlap'] = df.apply(calculate_overlap, axis=1)

# Sort rows by overlap (ascending)
df_sorted = df.sort_values(by='overlap')

# Select a subset with minimal overlap (e.g., 20 relationships)
df_subset = df_sorted.head(5000)

# Add the subset to the final graph
G = nx.Graph()
for _, row in df_subset.iterrows():
    director = row['director']
    cast_list = row['cast_list']

    # Add director and cast members as nodes and edges
    G.add_node(director, node_type='director')
    for actor in cast_list:
        G.add_node(actor, node_type='actor')
        G.add_edge(director, actor)

print("Number of nodes in the bipartite graph:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['overlap'] = df.apply(calculate_overlap, axis=1)


Number of nodes in the bipartite graph: 1620
Number of edges: 1692


In [32]:
# Compute positions
pos = nx.spring_layout(G, seed=42, scale=50)
nodes = list(G.nodes())
node_indices = list(range(len(nodes)))

x_coords = [pos[n][0] for n in nodes]
y_coords = [pos[n][1] for n in nodes]

edge_start = []
edge_end = []
for (n1, n2) in G.edges():
    edge_start.append(nodes.index(n1))
    edge_end.append(nodes.index(n2))

In [33]:
# Count the number of actors each director has worked with
director_actor_count = {n: G.degree(n) for n in nodes if G.nodes[n]['node_type'] == 'director'}

# Count the number of directors each actor has worked with
actor_director_count = {n: G.degree(n) for n in nodes if G.nodes[n]['node_type'] == 'actor'}

# Create GraphRenderer
graph_renderer = GraphRenderer()

# Node data
graph_renderer.node_renderer.data_source.data = {
    'index': node_indices
}

hover_info = []
colors = []
node_sizes = []

for n in nodes:
    if G.nodes[n]['node_type'] == 'director':
        num_actors = director_actor_count.get(n, 1)  # Get number of actors linked to this director
        hover_info.append(f"Director: {n}, Actors: {num_actors}")

        # Increase size based on the number of actors the director has worked with
        node_sizes.append(0.2 + (num_actors * 0.02))

        # Directors remain orange
        colors.append("orange")
    else:  # This is an actor node
        num_directors = actor_director_count.get(n, 1)  # Get number of directors linked to this actor
        hover_info.append(f"Actor: {n}, Directors: {num_directors}")

        # If the actor worked with more than 1 director, change to green, else keep blue
        colors.append("red" if num_directors > 1 else "purple")

        # Fixed size for actors
        node_sizes.append(0.15)

# Assign hover text, colors, and sizes
graph_renderer.node_renderer.data_source.add(hover_info, 'desc')   # for hover
graph_renderer.node_renderer.data_source.add(colors, 'color')      # for color
graph_renderer.node_renderer.data_source.add(node_sizes, 'size')   # for node sizes

# Use size-based circles for node representation
graph_renderer.node_renderer.glyph = Circle(radius='size', fill_color='color')

# Edge data
graph_renderer.edge_renderer.data_source.data = dict(
    start=edge_start,
    end=edge_end
)

# Assign x/y positions
graph_renderer.layout_provider = StaticLayoutProvider(
    graph_layout=dict(zip(node_indices, zip(x_coords, y_coords)))
)

In [34]:
from bokeh.io import output_notebook, show, reset_output
from bokeh.models import HoverTool

# Reset Bokeh to prevent conflicts
reset_output()

# Enable inline rendering
output_notebook()

# Create figure
p4 = figure(
    title="#Movies (Director–Cast Network with Minimal Overlap)",
    x_range=(-50, 50),
    y_range=(-50, 50),
    tools="pan,wheel_zoom,save,reset",
    toolbar_location="right",
    width=2000,
    height=1000
)

# Attach graph_renderer
p4.renderers.append(graph_renderer)

# Add HoverTool
hover_tool = HoverTool(tooltips=[("", "@desc")])
p4.add_tools(hover_tool)

# Show the graph first (DO NOT save in the same cell)
show(p4)

In [35]:
from bokeh.io import output_file, save

# Set the output file
output_file("Director_Cast_Bipartite.html")

# Save the graph as an interactive HTML file
save(p4)

print("Graph saved as Director_Cast_Bipartite.html")

Graph saved as Director_Cast_Bipartite.html
