steps:

1. get a page (to get the new index): https://developer.eagle.cool/plugin-api
2. save index to `index.md`
3. ue this script to go through all pages listed in the index, and save them as markdown locally

In [3]:
# !pip install mistune

index -> links:

In [None]:
import re
import sys
import os
from tqdm.auto import tqdm


# Get the absolute path of the src directory
sys.path.append(os.path.abspath("../src"))
from url_to_md import url_to_md

def parse_index_to_links(index_file, ignore_strings=["https://www.gitbook.com/", "https://www.figma.com/"]):
    with open(index_file, "r") as f:
        index_md = f.read()

    # Regex pattern to match markdown links
    link_pattern = re.compile(r'\[(.*?)\]\((.*?)\)')

    # Extract links
    links = link_pattern.findall(index_md)    

    returns, ignores = [], []
    for text, url in links:
        # Ignore links that contain ignore strings
        if any(ignore_string in url for ignore_string in ignore_strings):
            ignores.append((text, url))
        else:
            returns.append((text, url))

    
    print(f"Found {len(returns)} links | Ignored {len(ignores)} links")
    return returns


def postprocess_eagle_md(md_str) -> str:
    """remove unwanted things (such as header, index, etc from a parsed eagle docs page)

    Returns:
        str: cleaned markdown documentation
    """
    lines = md_str.split("\n")

    # find first line that contains "Powered by GitBook"
    powered_by_gitbook_index = None
    for i, line in enumerate(lines):
        if "Powered by GitBook" in line:
            powered_by_gitbook_index = i
            break
    
    if powered_by_gitbook_index is not None:
        lines = lines[powered_by_gitbook_index+1:]
    else:
        print("WARNING: Powered by GitBook not found in the markdown")
    
    return "\n".join(lines).strip()

def sanitize_filename(name):
    """Sanitize the filename by replacing invalid characters and spaces"""
    name = name.strip().replace(" ", "_")  # Replace spaces with underscores
    name = re.sub(r'[^\w\-_().]', '', name)  # Remove any special characters except allowed ones
    return name + ".md"

def save_md(md_str, file_path):
    """Ensure directory exists and save markdown file"""
    os.makedirs(os.path.dirname(file_path), exist_ok=True)
    with open(file_path, "w", encoding="utf-8") as f:
        f.write(md_str)

Found 41 links | Ignored 2 links


[('Introduction', '/plugin-api'),
 ('Your First Plugin', '/plugin-api/get-started/creating-your-first-plugin'),
 ('File Structure Overview',
  '/plugin-api/get-started/anatomy-of-an-extension'),
 ('Plugin Types', '/plugin-api/get-started/plugin-types'),
 ('Window', '/plugin-api/get-started/plugin-types/window')]

index -> links:

In [None]:
INDEX_FILE = "../docs/index.md"
ignore_strings = [
    "https://www.figma.com/",
    "https://www.gitbook.com/",
]


links = parse_index_to_links(INDEX_FILE, ignore_strings=ignore_strings)
links[:5]

links -> markdown:

In [None]:
EAGLE_DOCS_ROOT = "https://developer.eagle.cool".rstrip("/")
MD_SAVE_DIR = "../docs"



for curr_name, curr_page in tqdm(links):
    print(f"Processing {curr_name} | {curr_page}")

    curr_link = f"{EAGLE_DOCS_ROOT}{curr_page}"
    curr_md = url_to_md(curr_link)
    curr_md = postprocess_eagle_md(curr_md)

    # Convert web path into directory structure while using a readable filename
    sanitized_filename = sanitize_filename(curr_name)
    full_path = os.path.join(MD_SAVE_DIR, os.path.dirname(curr_page.lstrip("/")), sanitized_filename)

    save_md(curr_md, full_path)

    print(f"Saved: {full_path}")

Processing Introduction | /plugin-api
Saved: ../docs\Introduction.md
Processing Your First Plugin | /plugin-api/get-started/creating-your-first-plugin
Saved: ../docs\plugin-api/get-started\Your_First_Plugin.md
Processing File Structure Overview | /plugin-api/get-started/anatomy-of-an-extension
Saved: ../docs\plugin-api/get-started\File_Structure_Overview.md
Processing Plugin Types | /plugin-api/get-started/plugin-types
Saved: ../docs\plugin-api/get-started\Plugin_Types.md
Processing Window | /plugin-api/get-started/plugin-types/window
Saved: ../docs\plugin-api/get-started/plugin-types\Window.md
Processing Background Service | /plugin-api/get-started/plugin-types/service
Saved: ../docs\plugin-api/get-started/plugin-types\Background_Service.md
Processing Format Extension | /plugin-api/get-started/plugin-types/preview
Saved: ../docs\plugin-api/get-started/plugin-types\Format_Extension.md
Processing Inspector | /plugin-api/get-started/plugin-types/inspector
Saved: ../docs\plugin-api/get-st