Skip to content

Commit

Permalink
Merge pull request #786 from chrispyles/nbconvert-webpdf
Browse files Browse the repository at this point in the history
 Convert PDF via HTML exporter to nbconvert WebPDF exporter
  • Loading branch information
chrispyles committed Mar 8, 2024
2 parents 122a6f6 + ecc23bb commit 8f6d836
Show file tree
Hide file tree
Showing 14 changed files with 43 additions and 159 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

* Removed compatibility patches for nbconvert < 6 per [#777](https://github.com/ucbds-infra/otter-grader/issues/777)
* Updated Otter Export to throw an error if nbconvert<6.0.0 is found
* Converted Otter Export's PDF via HTML exporter to use nbconvert's WebPDF exporter per [#781](https://github.com/ucbds-infra/otter-grader/issues/781)
* Removed pdfkit from dependencies

**v5.5.0 (unreleased):**

Expand Down
1 change: 0 additions & 1 deletion docs/_static/grading-environment-r.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,5 @@ dependencies:
- dill
- numpy
- gspread
- pypdf
- otter-grader==5.4.1
- rpy2
6 changes: 3 additions & 3 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,9 @@ Installation
------------

Otter is a Python package that is compatible with Python 3.6+. The PDF export internals require
either LaTeX and Pandoc or wkhtmltopdf to be installed. Docker is also required to grade assignments
locally with containerization. Otter's Python package can be installed using pip. To install the
current stable version, install with
either LaTeX and Pandoc or Playwright and Chromium to be installed. Docker is also required to grade
assignments locally with containerization. Otter's Python package can be installed using pip. To
install the current stable version, install with

.. code-block:: console
Expand Down
3 changes: 1 addition & 2 deletions docs/pdfs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@ filtering for generating PDFs for manual grading. There are two options for expo


* **PDF via LaTeX:** this uses nbconvert, pandoc, and LaTeX to generate PDFs from TeX files
* **PDF via HTML:** this uses wkhtmltopdf and the Python packages pdfkit and pypdf to generate PDFs
from HTML files
* **PDF via HTML:** this uses nbconvert's WebPDF exporter to generate PDFs from HTML

Otter Export is used by Otter Assign to generate Gradescope PDF templates and solutions, in the
Gradescope autograder to generate the PDFs of notebooks, by ``otter.Notebook`` to generate PDFs and
Expand Down
31 changes: 10 additions & 21 deletions otter/assign/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from .output import write_output_directories
from .utils import run_tests, write_otter_config_file, run_generate_autograder

from ..export import export_notebook, WkhtmltopdfNotFoundError
from ..export import export_notebook
from ..plugins import PluginCollection
from ..utils import chdir, get_relpath, knit_rmd_file, loggers

Expand Down Expand Up @@ -90,26 +90,15 @@ def main(

if not assignment.is_rmd:
LOGGER.debug(f"Exporting {src} as notebook to {dst}")
try:
LOGGER.debug("Attempting PDF via HTML export")
export_notebook(
src,
dest=dst,
filtering=filtering,
pagebreaks=filtering,
exporter_type="html",
)
LOGGER.debug("PDF via HTML export successful")

except WkhtmltopdfNotFoundError:
LOGGER.debug("PDF via HTML export failed; attempting PDF via LaTeX export")
export_notebook(
src,
dest=dst,
filtering=filtering,
pagebreaks=filtering,
)
LOGGER.debug("PDF via LaTeX export successful")
LOGGER.debug("Attempting PDF via HTML export")
export_notebook(
src,
dest=dst,
filtering=filtering,
pagebreaks=filtering,
exporter_type="html",
)
LOGGER.debug("PDF via HTML export successful")

else:
LOGGER.debug(f"Knitting {src} to {dst}")
Expand Down
2 changes: 0 additions & 2 deletions otter/export/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

import os

from .utils import WkhtmltopdfNotFoundError


def export_notebook(nb_path, dest=None, exporter_type=None, **kwargs):
"""
Expand Down
10 changes: 1 addition & 9 deletions otter/export/exporters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@

import shutil

from .via_html import PDFViaHTMLExporter
from .via_latex import PDFViaLatexExporter

from ..utils import WkhtmltopdfNotFoundError


def get_exporter(exporter_type=None):
"""
Expand All @@ -18,9 +17,6 @@ def get_exporter(exporter_type=None):
Returns:
``otter.export.exporters.base_exporter.BaseExporter``: the exporter class
Raises:
``WkhtmltopdfNotFoundError``: if PDF via HTML is indicated but wkhtmltopdf is not installed.
"""
# throw an error if the nbconvert version is < 6
import nbconvert
Expand All @@ -31,10 +27,6 @@ def get_exporter(exporter_type=None):
exporter_type = exporter_type.lower()

if exporter_type == 'html':
if shutil.which("wkhtmltopdf") is None:
raise WkhtmltopdfNotFoundError("PDF via HTML indicated but wkhtmltopdf not found")

from .via_html import PDFViaHTMLExporter
return PDFViaHTMLExporter

elif exporter_type == "latex":
Expand Down
46 changes: 0 additions & 46 deletions otter/export/exporters/utils.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,6 @@
"""Utilities for Otter Export exporters"""

import re
import copy
import nbformat

from ...utils import get_source


BEGIN_QUESTION_REGEX = r"<!--\s*BEGIN QUESTION\s*-->"
Expand Down Expand Up @@ -73,45 +69,3 @@ def sub_end_for_new_page(line):
``str``: the line with the end question match substituted for the newpage comment
"""
return re.sub(END_QUESTION_REGEX, NEW_PAGE_CELL_SOURCE, line)


def notebook_pdf_generator(nb):
"""
A generator that takes in a notebook ``nb`` with HTML comments for filtering and splits this
notebook up into each filtered block, yielding a complete notebook for each chunk. Used for
implementing pagebreaks in PDFs via HTML.
Args:
nb (``nbformat.NotebookNode``): the notebook to be exported
Yields:
``nbformat.NotebookNode``: a complete notebook containing a single filtered block
"""
dummy_nb = copy.copy(nb)
dummy_nb.cells = []

all_cells, subnb_cells = [], []
for cell in nb.cells:
source = get_source(cell)

if NEW_PAGE_CELL_SOURCE in "\n".join(source):
for i, line in enumerate(source):
if NEW_PAGE_CELL_SOURCE in line:
break

c1, c2 = nbformat.v4.new_markdown_cell(), nbformat.v4.new_markdown_cell()
c1.source, c2.source = "\n".join(source[:i+1]), "\n".join(source[i+1:])

subnb_cells.append(c1)
all_cells.append(subnb_cells)
subnb_cells = []
subnb_cells.append(c2)

else:
subnb_cells.append(cell)

all_cells.append(subnb_cells)

for subnb_cells in all_cells:
dummy_nb.cells = subnb_cells
yield dummy_nb
47 changes: 6 additions & 41 deletions otter/export/exporters/via_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,13 @@

import nbconvert
import os
import shutil

from io import BytesIO

from .base_exporter import BaseExporter, TEMPLATE_DIR
from .utils import notebook_pdf_generator


class PDFViaHTMLExporter(BaseExporter):
"""
Exports notebooks to PDF files using HTML as an intermediary
Converts IPython notebooks to PDFs by first converting them into temporary HTML files that are then
converted to PDFs using wkhtmltopdf and its Python API pdfkit which are then stitched together (if
pagebreaks are enabled) using pypdf.
An exporter that uses nbconvert's WebPDF exporter to convert notebooks to PDFs via HTML.
Attributes:
default_options (``dict``): the default options for this exporter
Expand All @@ -30,12 +22,6 @@ class PDFViaHTMLExporter(BaseExporter):

@classmethod
def convert_notebook(cls, nb_path, dest, **kwargs):
if shutil.which("wkhtmltopdf") is None:
raise RuntimeError("Cannot export via HTML without wkhtmltopdf")

import pdfkit
from pypdf import PdfMerger

options = cls.default_options.copy()
options.update(kwargs)

Expand All @@ -45,32 +31,11 @@ def convert_notebook(cls, nb_path, dest, **kwargs):
orig_template_name = nbconvert.TemplateExporter.template_name
nbconvert.TemplateExporter.template_name = options["template"]

exporter = nbconvert.HTMLExporter()

if options["save_html"]:
html, _ = nbconvert.export(exporter, nb)
html_path = os.path.splitext(dest)[0] + ".html"
with open(html_path, "wb+") as f:
f.write(html.encode("utf-8"))

merger = PdfMerger()
for subnb in notebook_pdf_generator(nb):
html, _ = nbconvert.export(exporter, subnb)

pdfkit_options = {
'enable-local-file-access': None,
'quiet': '',
'print-media-type': '',
'javascript-delay': 2000
}
pdf_contents = pdfkit.from_string(html, False, options=pdfkit_options)

output = BytesIO()
output.write(pdf_contents)
output.seek(0)

merger.append(output, import_outline=False)
exporter = nbconvert.WebPDFExporter()

merger.write(dest)
pdf, _ = nbconvert.export(exporter, nb)
pdf_path = os.path.splitext(dest)[0] + ".pdf"
with open(pdf_path, "wb+") as f:
f.write(pdf)

nbconvert.TemplateExporter.template_name = orig_template_name
9 changes: 1 addition & 8 deletions otter/export/exporters/via_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,17 +8,10 @@

from .base_exporter import BaseExporter, ExportFailedException, TEMPLATE_DIR

from ...utils import print_full_width


class PDFViaLatexExporter(BaseExporter):
"""
Exports notebooks to PDF files using LaTeX as an intermediary
Converts IPython notebooks to PDFs by first converting them into temporary TeX files that are then
converted to PDFs using nbconvert and pandoc. Pagebreaks, if enabled, are enforced with a custom
LaTeX template that clears the document to the next odd numbered page, resulting in responses that
are all two pages long.
An exporter that uses nbconvert's PDF exporter to convert notebooks to PDFs via LaTeX.
Attributes:
default_options (``dict``): the default options for this exporter
Expand Down
7 changes: 0 additions & 7 deletions otter/export/utils.py

This file was deleted.

32 changes: 16 additions & 16 deletions otter/generate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,17 +72,17 @@ def to_dict(self):

pip_deps = self.requirements if self.overwrite_requirements else [
"datascience",
"jupyter_client",
"ipykernel",
"matplotlib",
"pandas",
"ipywidgets",
"scipy",
"seaborn",
"scikit-learn",
"jinja2",
"nbconvert",
"nbformat",
"jupyter_client",
"ipykernel",
"matplotlib",
"pandas",
"ipywidgets",
"scipy",
"seaborn",
"scikit-learn",
"jinja2",
"nbconvert",
"nbformat",
"dill",
"numpy",
"gspread",
Expand Down Expand Up @@ -125,9 +125,9 @@ def to_str(self):
}


def main(*, tests_dir="./tests", output_path="autograder.zip", config=None, no_config=False,
lang=None, requirements=None, no_requirements=False, overwrite_requirements=False,
environment=None, no_environment=False, username=None, password=None, token=None, files=[],
def main(*, tests_dir="./tests", output_path="autograder.zip", config=None, no_config=False,
lang=None, requirements=None, no_requirements=False, overwrite_requirements=False,
environment=None, no_environment=False, username=None, password=None, token=None, files=[],
assignment=None, plugin_collection=None, python_version=None, channel_priority_strict=True):
"""
Run Otter Generate.
Expand All @@ -143,7 +143,7 @@ def main(*, tests_dir="./tests", output_path="autograder.zip", config=None, no_c
overwrite_requirements (``bool``): whether to overwrite the default requirements instead of
adding to them
environment (``str``): path to a conda environment file for this assignment
no_environment (``bool``): whether ``./environment.yml`` should be automatically checked if
no_environment (``bool``): whether ``./environment.yml`` should be automatically checked if
``environment`` is unspecified
username (``str``): a username for Gradescope for generating a token
password (``str``): a password for Gradescope for generating a token
Expand Down Expand Up @@ -236,7 +236,7 @@ def main(*, tests_dir="./tests", output_path="autograder.zip", config=None, no_c

# open requirements if it exists
extra_requirements, r_requirements = [], None
with load_default_file(requirements, lang_config["requirements_filename"],
with load_default_file(requirements, lang_config["requirements_filename"],
default_disabled=no_requirements,) as reqs:
if reqs is not None:
if ag_config.lang == "python":
Expand Down
4 changes: 2 additions & 2 deletions otter/run/run_autograder/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from contextlib import contextmanager
from io import StringIO
from typing import Optional
from typing import Iterable, Optional


_OUTPUT: Optional[StringIO] = None
Expand Down Expand Up @@ -30,7 +30,7 @@ def write_blank_page_to_stare_at_before_you(path: str):


@contextmanager
def capture_run_output() -> StringIO:
def capture_run_output() -> Iterable[StringIO]:
"""
A context manager for capturing anything that Otter Run would normally print to stdout. Yields
an ``io.StringIO`` object that the output will be written to.
Expand Down
2 changes: 1 addition & 1 deletion requirements-export.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
nbconvert>=6.0.0
pdfkit
nbconvert[webpdf]
pypdf

0 comments on commit 8f6d836

Please sign in to comment.