<a href="https://colab.research.google.com/github/searchsolved/search-solved-public-seo/blob/main/visualise_links_screaming_frog/visualise_links_screaming_frog.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Visualise Internal Linking Structure with Treemaps

## File Formats (CSV Exports Only!)
*   Screaming Frog Crawl Export
*   ahrefs.com Backlinks Report
*   Search Console Internal Links Report
*   Search Console External Links Report

# How to use:
Run all cells and upload a csv export from either screaming frog, ahrefs, or search console to visualise internal or external links to different section of your Website.

In [None]:
!pip install pandas
!pip install pip install plotly==5.0.0

In [None]:
import pandas as pd
import urllib.parse as urlparse
import plotly.express as px
import io
from google.colab import files

# Upload a .csv file in the next cell

In [None]:
# upload the Screaming Frog crawl File (internal_html.csv)
uploaded = files.upload()

In [None]:
filename = list(uploaded.keys())[0]
df = pd.read_csv(io.StringIO(uploaded[filename].decode('utf-8', errors='ignore')))
print("Imported Crawl File ..")

In [None]:
df.rename(columns={"Target page": "Address", "Internal links": "Inlinks", "Target Page":"Address", "Linking Sites": "Inlinks", "Link URL": "Address", "Referring Domains": "Inlinks"}, inplace=True)
print(df)

In [None]:
try:
  df = df[~df["Indexability"].isin(["Non-Indexable"])]
  print("Dropped Non-Indexable Rows")
except Exception:
  pass
  print("Indexability Column Not Found - Unable to Drop Non-Indexable Pages!")
print(df)

In [None]:
df['protocol'],df['domain'],df['path'],df['query'],df['fragment'] = zip(*df['Address'].map(urlparse.urlsplit))

In [None]:
# make temp df to split with expand=true and then merge back by the index.
df2 = df['path'].str.split('/', expand=True)
df = pd.merge(df, df2, left_index=True, right_index=True)

In [None]:
fig = px.treemap(df, path=[1, 'Address'], values='Inlinks')

In [None]:
fig.show()

In [None]:
fig.write_html("internal_links_treemap.html")
files.download("internal_links_treemap.html")