In [15]:
import requests
from bs4 import BeautifulSoup
from markdownify import markdownify as md
import re

def fetch_page(url: str, headers: dict = None) -> str:
    """
    请求网页并返回 HTML 文本。
    :param url: 目标网页地址
    :param headers: 可选，请求头（模拟浏览器，减少被拒）
    """
    default_headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
    }
    headers = headers or default_headers
    resp = requests.get(url, headers=headers, timeout=10)
    resp.raise_for_status()
    resp.encoding = resp.apparent_encoding  # 避免乱码
    return resp.text

def parse_html(html: str):
    """用 BeautifulSoup 解析 HTML，便于提取内容。"""
    return BeautifulSoup(html, "html.parser")


def html_to_markdown(html_or_soup: str | BeautifulSoup, selector: str = None, **kwargs) -> str:
    """
    将 HTML 或 BeautifulSoup 对象解析为 Markdown 文本。

    :param html_or_soup: HTML 字符串或 BeautifulSoup 对象
    :param selector: 可选，只转换该 CSS 选择器内的内容，如 "article", ".content", "#main"
    :param kwargs: 传给 markdownify 的选项，如 heading_style="ATX", strip=["script","style"]
    :return: Markdown 字符串
    """
    default_options = {
        "heading_style": "ATX",      # # 标题
        "strip": ["script", "style"], # 去掉 script/style 标签
    }
    default_options.update(kwargs)

    if isinstance(html_or_soup, BeautifulSoup):
        soup = html_or_soup
    else:
        soup = parse_html(html_or_soup)

    if selector:
        elem = soup.select_one(selector)
        if elem is None:
            return ""
        return md(str(elem), **default_options)
    return md(str(soup), **default_options)


def get_title_for_markdown(soup: BeautifulSoup, strip_suffix: str = " | Seeed Studio Wiki") -> str:
    """获取用于 Markdown 的标题，可去掉站点后缀。"""
    title = get_title(soup)
    if strip_suffix and title.endswith(strip_suffix):
        return title[: -len(strip_suffix)].strip()
    return title


def get_title(soup: BeautifulSoup) -> str:
    """获取页面标题（来自 <title> 标签）。"""
    tag = soup.find("title")
    return tag.get_text(strip=True) if tag else ""

In [5]:
# 网页地址
target_url = "https://wiki.seeedstudio.com/create_backup_and_restore_on_recomputer/"

# 1. 请求并获取 HTML
html = fetch_page(target_url)
# 2. 解析
soup = parse_html(html)
# 从 <title> 开始：作为 Markdown 第一行标题
title = get_title_for_markdown(soup)
print("页面标题:", title)


页面标题: Create Backup and Restore on reComputer


In [16]:
markdown_text = html_to_markdown(soup, selector=".theme-doc-markdown")
if "## Resources" in markdown_text:
    markdown_text = markdown_text.split("## Resources")[0].rstrip()

#删除零宽字符
markdown_text = re.sub(r"\[\s*â€‹\s*\]\(#[^)]+\)", "", markdown_text)

print(markdown_text)

# Create Backup and Restore on reComputer

## Introduction[â€‹](#introduction "Direct link to Introduction")

reComputer is a powerful and compact intelligent edge box to bring up to 275TOPS modern AI performance to the edge.When you have configured and installed the software and environment necessary for your business on recomputer, and need to replicate the project from another new recomputer, reinstalling the software is not efficient. Therefore, this wiki page will use [reComputer J3011](https://www.seeedstudio.com/reComputer-J3011B-p-6405.html) to introduce how to back up your existing software and environment on the recomputer series, making it convenient for you to restore and transplant it to the new recomputer.

note

Our testing platform is reComputer J3011, JetPack 5.1.3 is provided for reference.

## Prerequisite[â€‹](#prerequisite "Direct link to Prerequisite")

* Ubuntu Host Computer
* USB Type-C data transmission cable
* reComputer J3011 (with JetPack 5.1.3 OS)

info

In