In [1]:
import sys, os
from os.path import relpath
import shutil, errno
import json

sys.path.append('libs')
import markdown

In [96]:
def read_markdown_file_as_html(markdown_filename):
    f = open(markdown_filename, 'r')
    markdown_content = f.read()
    return markdown.markdown(markdown_content)


def walk_dir(root_dir):
    """
    >>> walk_dir("./test_docs/folder containing pdfs/").documents
    ['dummy_pdf 2.pdf', 'dummy_pdf 3.pdf', 'dummy_pdf 4.pdf', 'dummy_pdf.pdf']
    >>> len(walk_dir("./test_docs/folder containing pdfs/").children)
    0
    >>> walk_dir("./test_docs/folder containing markdown and pdfs/").stub is None
    False
    >>> walk_dir("./test_docs/folder containing markdown and pdfs/").children
    ['dummy_pdf 2.pdf', 'dummy_pdf 3.pdf', 'dummy_pdf 4.pdf', 'dummy_pdf.pdf']
    """
    # print('walking directory at {0}'.format(root_dir))
    folder_name = os.path.splitext(os.path.basename(root_dir))[0]
    store = {'name' : folder_name, 'stub': None, 'docs' : [], 'children':[], 'abs_path':root_dir}
    for item in os.listdir(root_dir):
        path = os.path.join(os.path.abspath(root_dir), item)
        # print('item is {0}'.format(item))
        if os.path.isfile(path):
            if item.endswith(".pdf"):
                store['docs'].append(item)
            elif item.endswith(".md"):
                store['stub'] = read_markdown_file_as_html(path)
        if os.path.isdir(path) and dir_contains_pdf(path):
            store['children'].append(walk_dir(path))
    return store

def dir_contains_pdf(root):
    """
    >>> dir_contains_pdf("./test_docs/folder containing pdfs/")
    True
    >>> dir_contains_pdf("./test_docs/folder containing nothing/")
    False
    >>> dir_contains_pdf("./test_docs/folder containing folders, markdown, and pdf/")
    True
    >>> dir_contains_pdf("./test_docs/folder containing markdown and pdfs/")
    True
    """
    root = os.path.abspath(root)
    for item in os.listdir(root):
        item_path = os.path.join(root, item)
        if os.path.isfile(item_path):
            if item.endswith(".pdf"):
                return True
        elif os.path.isdir(item_path):
            if dir_contains_pdf(item_path):
                return True
    return False


In [97]:
in_dir = "./test_docs/test_folder_1"

in_dir_label = os.path.split(in_dir)[1:][0]
out_dir = os.path.join(os.path.dirname('./'), in_dir_label)
out_resources_dir = os.path.join(os.path.dirname('./'), in_dir_label+os.path.sep+'resources')
print('"{0}"'.format(in_dir_label))
print('"{0}"'.format(out_resources_dir))

"test_folder_1"
"./test_folder_1/resources"


In [121]:
in_dir = "./test_docs/test_folder_1"

source_items = walk_dir(os.path.abspath(in_dir))

print(json.dumps(source_items, indent=2))

{
  "abs_path": "/Users/falconer_k/git/vta/employee-resource-guide/test_docs/test_folder_1",
  "docs": [
    "dummy PDF 1.pdf",
    "dummy PDF 2.pdf",
    "dummy PDF 3.pdf",
    "dummy PDF 4.pdf"
  ],
  "children": [
    {
      "abs_path": "/Users/falconer_k/git/vta/employee-resource-guide/test_docs/test_folder_1/sub_folder_1",
      "docs": [
        "dummy PDF 1.pdf",
        "dummy PDF 2.pdf",
        "dummy PDF 3.pdf",
        "dummy PDF 4.pdf"
      ],
      "children": [],
      "name": "sub_folder_1",
      "stub": null
    },
    {
      "abs_path": "/Users/falconer_k/git/vta/employee-resource-guide/test_docs/test_folder_1/sub_folder_2",
      "docs": [
        "dummy PDF 1.pdf",
        "dummy PDF 2.pdf",
        "dummy PDF 3.pdf",
        "dummy PDF 4.pdf"
      ],
      "children": [],
      "name": "sub_folder_2",
      "stub": null
    }
  ],
  "name": "test_folder_1",
  "stub": "<h1>Test Folder 1</h1>\n<p>This folder contains PDFs in the root and one additional level of 

In [118]:
def make_relative(item, work_dir):
    item_dir = item.pop('abs_path', None)
    if item_dir is not None:
        item['rel_path'] = os.path.relpath(item_dir, work_dir)
    for child in item['children']:
        child = make_relative(child, work_dir)
    return item




In [119]:
def populate_template(template, page_title, toc_data, resources_path):
    f = open(template, 'r')
    template_content = f.read()
    return template_content \
        .replace("{page-title}", page_title) \
        .replace("{toc-data}", toc_data) \
        .replace("{resources-path}", resources_path+os.path.sep) \
        .replace("{head-comment}", "\n\t\tPLEASE DO NOT EDIT THIS FILE\n\t\tTHIS FILE IS DYNAMICALLY GENERATED.\n\t\tANY CHANGES MADE MAY BE OVERWRITTEN.\n")

In [120]:
RESOURCES_DIR="resources"

def build_page(item, out_dir):

    p_name = str('None' if item['name'] is None else item['name'])
    rel_path = os.path.join(out_dir, str(p_name)+'.html')
    
    with open(rel_path, "w") as f:
        resources_path = os.path.relpath(RESOURCES_DIR, out_dir)
        page_html = populate_template("template.html", p_name, json.dumps(item, indent=2), resources_path)
        f.write(page_html)
    return rel_path


rel_items = make_relative(source_items, './')
build_page(rel_items, './')

'./test_folder_1.html'