# wuxia-pdf

wuxia-pdf is a tool that creates a pdf of any novel on [wuxiaworld](http://wuxiaworld.com).

My reason for creating this was to easily read my novels in bulk on mobile, in areas where I don't have internet service.

In [1]:
from bs4 import BeautifulSoup, NavigableString
import urllib3

from IPython.display import IFrame

from reportlab.lib.colors import HexColor
from reportlab.lib.enums import TA_CENTER
from reportlab.lib.units import inch
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer

from PyPDF2 import PdfFileMerger, PdfFileReader
import os

http = urllib3.PoolManager()

def load_chapter(novel, num):
    url = f"http://www.wuxiaworld.com/{novel}-index/{novel}-chapter-{num}"
    response = http.request("GET", url)
    soup = BeautifulSoup(response.data, "html5lib").find("body")
    
    article = soup.find("div", itemprop="articleBody")
    if article is None:
        return None
    
    top = True
    chapter = {"body": ""}
    chapter["novel"] = novel
    chapter["number"] = num
    
    for p in article.find("hr").next_siblings:
        if isinstance(p, NavigableString) or p.find("a"): continue
        if p.name == "hr": 
            break
        if top:
            chapter["title"] = p.text
            top = False
        else:
            chapter["body"]+= f"{p.text}<br/><br/>"
            
    return chapter

# PDF Generation

Using the reportlab module, you can create readable, customizable chapters.

In [2]:
styles = {"default": getSampleStyleSheet()["Normal"]}
styles["title"] = ParagraphStyle("title",
                                 parent=styles["default"],
                                 fontName="Courier",
                                 fontSize=18,
                                 alignment=TA_CENTER,
                                 textColor=HexColor(0x3970D0))

def make_pdf(chapter):
    file_name = f"{chapter['novel']}_{chapter['number']}.pdf"
    doc = SimpleDocTemplate(file_name,
                            rightMargin=inch/2,
                            leftMargin=inch/2,
                            topMargin=inch/4,
                            bottomMargin=inch/2)

    flow = []
    flow.append(Paragraph(chapter["title"], styles["title"]))
    flow.append(Spacer(0, inch / 3))
    flow.append(Paragraph(chapter["body"], styles["default"]))
    
    doc.build(flow)
    
    return open(file_name, "rb")

# PDF Merging

PyPDF2 allows for the merging of multiple PDF files.

After a PDF is created, it is merged and promptly deleted

In [3]:
merger = PdfFileMerger()

def group_pdf(novel, start=1, depth=1000):
    if start < 1 or depth < 1:
        raise ValueError("Chapters must be greater than 1")
        
    dist = depth
        
    for n in range(start, start + depth):
        chap = load_chapter(novel, n)
        if chap is None:
            dist = n - 1
            break

        pdf = make_pdf(chap)
        merger.append(PdfFileReader(pdf))
        
        pdf.close()
        os.remove(pdf.name)
          
    out_file = f"{novel}_{start}-{dist}.pdf"
    merger.write(out_file)
    
    return open(out_file, "rb")
    
grouped = group_pdf("emperor", 1, 10)
IFrame(grouped.name, 500, 500)

In [4]:
grouped.close()