In [11]:
import re
import requests
from pathlib import Path
from bs4 import BeautifulSoup
from bs4.element import Comment
from urllib.parse import urljoin
import mimetypes

In [10]:
admin_lte_home = Path('AdminLTE')
pages_home = admin_lte_home.joinpath('dist/pages')
app_home = Path('mainframe')
template_home = app_home.joinpath('templates/mainframe')
static_home = app_home.joinpath('static/mainframe')

In [28]:
# def get_css_external(url):
#     parent = urljoin(url, './') 
#     response = requests.get(url)    
#     src_content = re.findall('@font-face\{.*?(src:.*?)\}', response.text)
#     if not src_content: return []
#     return [urljoin(parent, _) for _ in re.findall('url\("(.*?)"\)', src_content[0])]

In [29]:
def extract_resource(url):
    """ """
    path = pages_home.joinpath(url)
    if path.exists(): 
        new_path = static_home.joinpath(url.replace('../../', ''))
        new_path.parent.mkdir(exist_ok=True, parents=True)
        new_path.write_bytes(path.read_bytes())        
    else:
        new_path = static_home.joinpath(re.sub('https://.*/npm/@*', '', url).replace('@', '/'))
        new_path.parent.mkdir(exist_ok=True, parents=True)
        new_path.write_bytes(requests.get(url).content)
        mime_type, _ = mimetypes.guess_type(url) 
        # if mime_type == 'text/css':
        #     for external_url in get_css_external(url):
        #         print(external_url)
        #         print(extract_resource(external_url))            
    return new_path

In [31]:
def solve_resource(filename):
    with open(filename, 'r') as f: 
        soup = BeautifulSoup(f.read(), 'html.parser')    
    attrname_dict = {'link': 'href', 'script': 'src', 'img': 'src'}
    for tag in soup.find_all(['script', 'link', 'img']):
        attrname = attrname_dict.get(tag.name, None)
        if not attrname: continue
        url = tag.get(attrname)
        if not url: continue
        new_path = extract_resource(url)
        tag[attrname] = Path('/').joinpath(new_path.relative_to(app_home)).as_posix()
        # print(tag[attrname])
        # print(url)
        # print(tag[attrname])
    return soup.prettify()

In [32]:
original_html = solve_resource(
    pages_home.joinpath('index.html').as_posix())

https://cdn.jsdelivr.net/npm/@fontsource/source-sans-3@5.0.12/index.css
https://cdn.jsdelivr.net/npm/overlayscrollbars@2.3.0/styles/overlayscrollbars.min.css
https://cdn.jsdelivr.net/npm/bootstrap-icons@1.11.0/font/bootstrap-icons.min.css
../../dist/css/adminlte.css
https://cdn.jsdelivr.net/npm/apexcharts@3.37.1/dist/apexcharts.css
https://cdn.jsdelivr.net/npm/jsvectormap@1.5.3/dist/css/jsvectormap.min.css
../../dist/assets/img/user1-128x128.jpg
../../dist/assets/img/user8-128x128.jpg
../../dist/assets/img/user3-128x128.jpg
../../dist/assets/img/user2-160x160.jpg
../../dist/assets/img/user2-160x160.jpg
../../dist/assets/img/AdminLTELogo.png
../../dist/assets/img/user1-128x128.jpg
../../dist/assets/img/user3-128x128.jpg
../../dist/assets/img/user1-128x128.jpg
../../dist/assets/img/user3-128x128.jpg
../../dist/assets/img/user1-128x128.jpg
../../dist/assets/img/user7-128x128.jpg
../../dist/assets/img/user3-128x128.jpg
../../dist/assets/img/user5-128x128.jpg
../../dist/assets/img/user6-128

In [99]:
soup = BeautifulSoup(original_html, 'html.parser')

# Clear menu
sidebar_menu = soup.find(attrs={'class': 'sidebar-menu'})
li_list = []
for i, li in enumerate(sidebar_menu.find_all('li', recursive=False)):
    if i in [0, 1, 6, 7]:
        continue
    li.replace_with('')

# Clear content
soup.find(attrs={'class': 'app-content'}).replace_with('')

# Toggle switch
# soup.find(attrs={'data-lte-toggle': 'sidebar'}).find('i').replace_with('🌐')

# Remove useless script
for script in soup.find_all('script'):
    src = script.attrs.get('src')
    if src is not None:        
        if any([k in src for k in ['adminlte', 'bootstrap']]):
            continue    
    script.replace_with('')

# Remove useless css
for css in soup.find_all('link'):
    href = css.attrs.get('href')
    if href is not None:
        if any([k in href for k in ['adminlte', 'bootstrap']]):
            if 'bootstrap-icons' in href:
                print(href)
                # bootstrap-icons 要手动下载
                # https://github.com/twbs/icons/releases/download/v1.11.1/bootstrap-icons-1.11.1.zip
                css.attrs['href'] = '/static/mainframe/bootstrap-icons/bootstrap-icons.min.css'
                del css.attrs['integrity']
            continue    
    css.replace_with('')  

# Clear all comments
for commnet in soup.find_all(string=lambda text: isinstance(text, Comment)):
    commnet.replace_with('')

# Delete meta
for meta in soup.find_all('meta')[2:]:
    meta.replace_with('')

# Replace title
soup.find('title').string='数据中台管理'


# Split pages
qs = 'body div[class="app-wrapper"]'
for tag in [x for x in soup.select(qs)[0].children if str(x).strip()]:
    path = template_home.joinpath(f'{ tag.name }.html')
    path.write_text(tag.prettify())
    tag.replace_with(f'{{% include "{ app_home }/{ tag.name }.html" %}}')

path = template_home.joinpath('index.html')
path.parent.mkdir(exist_ok=True, parents=True)
path.write_text(soup.prettify())

/static/mainframe/bootstrap-icons/1.11.0/font/bootstrap-icons.min.css


935

In [142]:
path = template_home.joinpath('aside.html')
aside_html = path.read_text()
aside = BeautifulSoup(aside_html, 'html.parser')
# qs = 'div[class="sidebar-wrapper"] nav[class="mt-2"]'
# children = [x for x in aside.select(qs)[0].children if str(x).strip()]
# for ch in children:
#     print(ch.name)
qs = 'div[class="sidebar-wrapper"] nav[class="mt-2"] li[class="nav-item"]'
for li in aside.select(qs):
    if len(li.select('li[class="nav-item"]')) == 0:
        li.replace_with(f'''{{% include "{ app_home }/nav_item.html" with name="{ li.text.strip() }" href="{ li.find('a').attrs['href']}" %}}''')
path.write_text(aside.prettify())

3159

'./examples/lockscreen.html'

In [130]:
li.attrs

{'class': ['nav-item']}

In [148]:
value = {}
str(type(value)).split("'")[1]

'dict'