In [3]:
import os
import xml.etree.ElementTree as ET
from xml.dom import minidom
from datetime import datetime

def generate_sitemap(directory, base_url):
    urls = []
    
    # 遍历文件夹并获取所有文件的相对路径
    for dirpath, dirnames, filenames in os.walk(directory):
        for filename in filenames:
            # 过滤出常见的网页相关文件类型，可以根据需要增加其他类型
            if filename.endswith(('.html', '.htm', '.css', '.js', '.jpg', '.png', '.gif', '.pdf', '.txt')):
                # 计算文件的相对路径
                file_path = os.path.relpath(os.path.join(dirpath, filename), directory)
                # 转换为 URL
                url = base_url + '/' + file_path.replace("\\", "/")
                urls.append(url)
    
    return urls

def create_sitemap(urls):
    # 创建根节点
    urlset = ET.Element("urlset", xmlns="http://www.sitemaps.org/schemas/sitemap/0.9")
    
    for url in urls:
        url_element = ET.SubElement(urlset, "url")
        loc = ET.SubElement(url_element, "loc")
        loc.text = url
        lastmod = ET.SubElement(url_element, "lastmod")
        lastmod.text = datetime.now().strftime("%Y-%m-%d")
        changefreq = ET.SubElement(url_element, "changefreq")
        changefreq.text = "monthly"  # 假设页面每月更新一次
        priority = ET.SubElement(url_element, "priority")
        priority.text = "0.5"  # 默认优先级

    # 将 XML 转换为字符串
    rough_string = ET.tostring(urlset, encoding="utf-8")
    # 使用 minidom 格式化 XML
    reparsed = minidom.parseString(rough_string)
    pretty_xml = reparsed.toprettyxml(indent="  ")  # 设置缩进
    
    # 将格式化后的 XML 写入文件
    with open("sitemap.xml", "w", encoding="utf-8") as f:
        f.write(pretty_xml)

if __name__ == "__main__":
    directory = "./"  # 替换为你的网站目录
    base_url = "https://zhangrui4041.github.io/awesome-paper-test.github.io/"  # 替换为你的网站的基础 URL
    urls = generate_sitemap(directory, base_url)
    create_sitemap(urls)
    print("站点地图已生成：sitemap.xml")


站点地图已生成：sitemap.xml
