In [1]:
import re
import requests
from pathlib import Path
from bs4 import BeautifulSoup
from bs4.element import Comment
from urllib.parse import urljoin
import mimetypes

In [2]:
admin_lte_home = Path('AdminLTE-3.2.0')
pages_home = admin_lte_home.joinpath('dist/pages')
app_home = Path('mainframe')
template_home = app_home.joinpath('templates/mainframe')
static_home = app_home.joinpath('static/mainframe')

In [3]:
# def get_css_external(url):
#     parent = urljoin(url, './') 
#     response = requests.get(url)    
#     src_content = re.findall('@font-face\{.*?(src:.*?)\}', response.text)
#     if not src_content: return []
#     return [urljoin(parent, _) for _ in re.findall('url\("(.*?)"\)', src_content[0])]

In [9]:
def extract_resource(url):
    """ """
    path = admin_lte_home.joinpath(url)
    if path.exists(): 
        new_path = static_home.joinpath(url.replace('../../', ''))
        new_path.parent.mkdir(exist_ok=True, parents=True)
        new_path.write_bytes(path.read_bytes())        
    else:
        new_path = static_home.joinpath(re.sub('https://.*/npm/@*', '', url).replace('@', '/'))
        new_path.parent.mkdir(exist_ok=True, parents=True)
        new_path.write_bytes(requests.get(url).content)
        mime_type, _ = mimetypes.guess_type(url) 
        # if mime_type == 'text/css':
        #     for external_url in get_css_external(url):
        #         print(external_url)
        #         print(extract_resource(external_url))            
    return new_path

In [12]:
def solve_resource(filename):
    with open(filename, 'r') as f: 
        soup = BeautifulSoup(f.read(), 'html.parser')    
    attrname_dict = {'link': 'href', 'script': 'src', 'img': 'src'}
    for tag in soup.find_all(['script', 'link', 'img']):
        attrname = attrname_dict.get(tag.name, None)
        if not attrname: continue
        url = tag.get(attrname)
        if not url: continue
        print(url)
        new_path = extract_resource(url)
        tag[attrname] = Path('/').joinpath(new_path.relative_to(app_home)).as_posix()
    return soup.prettify()

In [20]:
original_html = solve_resource(
    admin_lte_home.joinpath('iframe.html').as_posix())

https://fonts.googleapis.com/css?family=Source+Sans+Pro:300,400,400i,700&display=fallback
plugins/fontawesome-free/css/all.min.css
dist/css/adminlte.min.css
plugins/overlayScrollbars/css/OverlayScrollbars.min.css
dist/img/user1-128x128.jpg
dist/img/user8-128x128.jpg
dist/img/user3-128x128.jpg
dist/img/AdminLTELogo.png
dist/img/user2-160x160.jpg
plugins/jquery/jquery.min.js
plugins/jquery-ui/jquery-ui.min.js
plugins/bootstrap/js/bootstrap.bundle.min.js
plugins/overlayScrollbars/js/jquery.overlayScrollbars.min.js
dist/js/adminlte.js
dist/js/demo.js


In [22]:
soup = BeautifulSoup(original_html, 'html.parser')

# Clear menu
aside = soup.find('aside')
li_list = []
for i, li in enumerate(aside.find_all('li', recursive=False)):
    if i in [0, 1, 6, 7]:
        continue
    li.replace_with('')

# Clear content
# soup.find(attrs={'class': 'app-content'}).replace_with('')

# Toggle switch
# soup.find(attrs={'data-lte-toggle': 'sidebar'}).find('i').replace_with('🌐')

# Remove useless script
for script in soup.find_all('script'):
    src = script.attrs.get('src')
    if src is not None:        
        if any([k in src for k in ['adminlte', 'bootstrap']]):
            continue    
    script.replace_with('')

# Remove useless css
for css in soup.find_all('link'):
    href = css.attrs.get('href')
    if href is not None:
        if any([k in href for k in ['adminlte', 'bootstrap']]):
            if 'bootstrap-icons' in href:
                print(href)
                # bootstrap-icons 要手动下载
                # https://github.com/twbs/icons/releases/download/v1.11.1/bootstrap-icons-1.11.1.zip
                css.attrs['href'] = '/static/mainframe/bootstrap-icons/bootstrap-icons.min.css'
                del css.attrs['integrity']
            continue    
    css.replace_with('')  

# Clear all comments
for commnet in soup.find_all(string=lambda text: isinstance(text, Comment)):
    commnet.replace_with('')

# Delete meta
for meta in soup.find_all('meta')[2:]:
    meta.replace_with('')

# Replace title
soup.find('title').string='数据中台管理'


# Split pages
qs = 'body div[class="wrapper"]'
for tag in [x for x in soup.select(qs)[0].children if str(x).strip()]:
    path = template_home.joinpath(f'{ tag.name }.html')
    path.write_text(tag.prettify())
    tag.replace_with(f'{{% include "{ app_home }/{ tag.name }.html" %}}')

path = template_home.joinpath('index.html')
path.parent.mkdir(exist_ok=True, parents=True)
path.write_text(soup.prettify())

758

In [142]:
path = template_home.joinpath('aside.html')
aside_html = path.read_text()
aside = BeautifulSoup(aside_html, 'html.parser')
# qs = 'div[class="sidebar-wrapper"] nav[class="mt-2"]'
# children = [x for x in aside.select(qs)[0].children if str(x).strip()]
# for ch in children:
#     print(ch.name)
qs = 'div[class="sidebar-wrapper"] nav[class="mt-2"] li[class="nav-item"]'
for li in aside.select(qs):
    if len(li.select('li[class="nav-item"]')) == 0:
        li.replace_with(f'''{{% include "{ app_home }/nav_item.html" with name="{ li.text.strip() }" href="{ li.find('a').attrs['href']}" %}}''')
path.write_text(aside.prettify())

3159

In [15]:
soup = BeautifulSoup(original_html, 'html.parser')

In [16]:
soup

<!DOCTYPE html>

<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="width=device-width, initial-scale=1" name="viewport"/>
<title>
   AdminLTE 3 | Dashboard
  </title>
<!-- Google Font: Source Sans Pro -->
<link href="/static/mainframe/https:/fonts.googleapis.com/css?family=Source+Sans+Pro:300,400,400i,700&amp;display=fallback" rel="stylesheet"/>
<!-- Font Awesome -->
<link href="/static/mainframe/plugins/fontawesome-free/css/all.min.css" rel="stylesheet"/>
<!-- Ionicons -->
<link href="/static/mainframe/https:/code.ionicframework.com/ionicons/2.0.1/css/ionicons.min.css" rel="stylesheet"/>
<!-- Tempusdominus Bootstrap 4 -->
<link href="/static/mainframe/plugins/tempusdominus-bootstrap-4/css/tempusdominus-bootstrap-4.min.css" rel="stylesheet"/>
<!-- iCheck -->
<link href="/static/mainframe/plugins/icheck-bootstrap/icheck-bootstrap.min.css" rel="stylesheet"/>
<!-- JQVMap -->
<link href="/static/mainframe/plugins/jqvmap/jqvmap.min.css" rel="stylesheet"/>
<!-- Theme style -->
<