unfoldingWord-dev · richmahn · Oct 31, 2017 · Oct 14, 2017 · Oct 14, 2017 · Oct 14, 2017
diff --git a/functions/convert_md2html/module.json b/functions/convert_md2html/module.json
@@ -2,7 +2,7 @@
     "name": "md2html",
     "version": "2",
     "type": "converter",
-    "resource_types": ["obs", "ta"],
+    "resource_types": ["obs", "ta", "tn"],
     "input_format": ["md"],
     "output_format": ["html"],
     "options": [],

diff --git a/libraries/client/client_webhook.py b/libraries/client/client_webhook.py
@@ -182,7 +182,7 @@ def process_webhook(self):
                     self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload)
             else:
                 # -----------------------------
-                # multiple Bible book project
+                # multiple book project
                 # -----------------------------
                 books = preprocessor.get_book_list()
                 App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books))

diff --git a/libraries/client/preprocessors.py b/libraries/client/preprocessors.py
@@ -3,6 +3,7 @@
 import re
 from glob import glob
 from shutil import copy
+from libraries.app.app import App
 from libraries.door43_tools.bible_books import BOOK_NUMBERS
 from libraries.general_tools.file_utils import write_file, read_file
 from libraries.resource_container.ResourceContainer import RC
@@ -11,12 +12,19 @@
 
 def do_preprocess(rc, repo_dir, output_dir):
     if rc.resource.identifier == 'obs':
+        App.logger.debug("do_preprocess: using ObsPreprocessor")
         preprocessor = ObsPreprocessor(rc, repo_dir, output_dir)
     elif rc.resource.identifier in BIBLE_RESOURCE_TYPES:
+        App.logger.debug("do_preprocess: using BiblePreprocessor")
         preprocessor = BiblePreprocessor(rc, repo_dir, output_dir)
     elif rc.resource.identifier == 'ta':
+        App.logger.debug("do_preprocess: using TaPreprocessor")
         preprocessor = TaPreprocessor(rc, repo_dir, output_dir)
+    elif rc.resource.identifier == 'tn':
+        App.logger.debug("do_preprocess: using TnPreprocessor")
+        preprocessor = TnPreprocessor(rc, repo_dir, output_dir)
     else:
+        App.logger.debug("do_preprocess: using Preprocessor")
         preprocessor = Preprocessor(rc, repo_dir, output_dir)
     return preprocessor.run(), preprocessor
 
@@ -63,10 +71,13 @@ def run(self):
                 else:
                     # Case #3: The project path is multiple chapters, so we piece them together
                     chapters = self.rc.chapters(project.identifier)
+                    App.logger.debug("Merging chapters in '{0}'".format(project.identifier))
                     if len(chapters):
                         text = ''
                         for chapter in chapters:
+                            text = self.mark_chapter(project.identifier, chapter, text)
                             for chunk in self.rc.chunks(project.identifier, chapter):
+                                text = self.mark_chunk(project.identifier, chapter, chunk, text)
                                 text += read_file(os.path.join(project_path, chapter, chunk))+"\n\n"
                         if project.identifier.lower() in BOOK_NUMBERS:
                             filename = '{0}-{1}.{2}'.format(BOOK_NUMBERS[project.identifier.lower()],
@@ -77,6 +88,12 @@ def run(self):
                         write_file(os.path.join(self.output_dir, filename), text)
         return True
 
+    def mark_chapter(self, ident, chapter, text):
+        return text  # default does nothing to text
+
+    def mark_chunk(self, ident, chapter, chunk, text):
+        return text  # default does nothing to text
+
     def is_multiple_jobs(self):
         return False
 
@@ -415,3 +432,238 @@ def fix_links(self, content):
         content = re.sub(r'([^A-Z0-9"(/])(www\.[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](http://\2)',
                          content, flags=re.IGNORECASE)
         return content
+
+
+class TnPreprocessor(Preprocessor):
+    sections = [
+        {'book': "00-toc", 'title': 'Table of Contents'},
+        {'book': "01-GEN", 'title': 'Genesis'},
+        {'book': "02-EXO", 'title': 'Exodus'},
+        {'book': "03-LEV", 'title': 'Leviticus'},
+        {'book': "04-NUM", 'title': 'Numbers'},
+        {'book': "05-DEU", 'title': 'Deuteronomy'},
+        {'book': "06-JOS", 'title': 'Joshua'},
+        {'book': "07-JDG", 'title': 'Judges'},
+        {'book': "08-RUT", 'title': 'Ruth'},
+        {'book': "09-1SA", 'title': '1 Samuel'},
+        {'book': "10-2SA", 'title': '2 Samuel'},
+        {'book': "11-1KI", 'title': '1 Kings'},
+        {'book': "12-2KI", 'title': '2 Kings'},
+        {'book': "13-1CH", 'title': '1 Chronicles'},
+        {'book': "14-2CH", 'title': '2 Chronicles'},
+        {'book': "15-EZR", 'title': 'Ezra'},
+        {'book': "16-NEH", 'title': 'Nehemiah'},
+        {'book': "17-EST", 'title': 'Esther'},
+        {'book': "18-JOB", 'title': 'Job'},
+        {'book': "19-PSA", 'title': 'Psalms'},
+        {'book': "20-PRO", 'title': 'Proverbs'},
+        {'book': "21-ECC", 'title': 'Ecclesiastes'},
+        {'book': "22-SNG", 'title': 'Song of Solomon'},
+        {'book': "23-ISA", 'title': 'Isaiah'},
+        {'book': "24-JER", 'title': 'Jeremiah'},
+        {'book': "25-LAM", 'title': 'Lamentations'},
+        {'book': "26-EZK", 'title': 'Ezekiel'},
+        {'book': "27-DAN", 'title': 'Daniel'},
+        {'book': "28-HOS", 'title': 'Hosea'},
+        {'book': "29-JOL", 'title': 'Joel'},
+        {'book': "30-AMO", 'title': 'Amos'},
+        {'book': "31-OBA", 'title': 'Obadiah'},
+        {'book': "32-JON", 'title': 'Jonah'},
+        {'book': "33-MIC", 'title': 'Micah'},
+        {'book': "34-NAM", 'title': 'Nahum'},
+        {'book': "35-HAB", 'title': 'Habakkuk'},
+        {'book': "36-ZEP", 'title': 'Zephaniah'},
+        {'book': "37-HAG", 'title': 'Haggai'},
+        {'book': "38-ZEC", 'title': 'Zechariah'},
+        {'book': "39-MAL", 'title': 'Malachi'},
+        {'book': "41-MAT", 'title': 'Matthew'},
+        {'book': "42-MRK", 'title': 'Mark'},
+        {'book': "43-LUK", 'title': 'Luke'},
+        {'book': "44-JHN", 'title': 'John'},
+        {'book': "45-ACT", 'title': 'Acts'},
+        {'book': "46-ROM", 'title': 'Romans'},
+        {'book': "47-1CO", 'title': '1 Corinthians'},
+        {'book': "48-2CO", 'title': '2 Corinthians'},
+        {'book': "49-GAL", 'title': 'Galatians'},
+        {'book': "50-EPH", 'title': 'Ephesians'},
+        {'book': "51-PHP", 'title': 'Philippians'},
+        {'book': "52-COL", 'title': 'Colossians'},
+        {'book': "53-1TH", 'title': '1 Thessalonians'},
+        {'book': "54-2TH", 'title': '2 Thessalonians'},
+        {'book': "55-1TI", 'title': '1 Timothy'},
+        {'book': "56-2TI", 'title': '2 Timothy'},
+        {'book': "57-TIT", 'title': 'Titus'},
+        {'book': "58-PHM", 'title': 'Philemon'},
+        {'book': "59-HEB", 'title': 'Hebrews'},
+        {'book': "60-JAS", 'title': 'James'},
+        {'book': "61-1PE", 'title': '1 Peter'},
+        {'book': "62-2PE", 'title': '2 Peter'},
+        {'book': "63-1JN", 'title': '1 John'},
+        {'book': "64-2JN", 'title': '2 John'},
+        {'book': "65-3JN", 'title': '3 John'},
+        {'book': "66-JUD", 'title': 'Jude'},
+        {'book': "67-REV", 'title': 'Revelation'},
+    ]
+
+    def __init__(self, *args, **kwargs):
+        super(TnPreprocessor, self).__init__(*args, **kwargs)
+        self.section_container_id = 1
+        self.toc = ''
+        self.index_json = None
+        self.section_header_marker = '###############'
+        self.books = []
+
+    def is_multiple_jobs(self):
+        return len(self.books) > 1
+
+    def get_book_list(self):
+        self.books.sort()
+        return self.books
+
+    def mark_chapter(self, ident, chapter, text):
+        a = '{0} {1}\n\n'.format(self.section_header_marker, chapter)  # put in invalid header for section - we will correct heading level later
+        return text + a
+
+    def mark_chunk(self, ident, chapter, chunk, text):
+        chunk_marker = os.path.splitext(chunk)[0]
+        a = '{0}# {1}:{2}\n\n'.format(self.section_header_marker, chapter, chunk_marker)  # put in invalid header for section - we will correct heading level later
+        return text + a
+
+    def compile_section(self, title, link, content):
+        """
+        Recursive section markdown creator
+
+        :param content:
+        :param link:
+        :param title:
+        :return:
+        """
+        level = 3
+        markdown = ''
+        level_increase = ('#' * level)
+        markdown += '{0} <a id="{1}"/>{2}\n\n'.format('#' * (level-2), link, title)  # add book title
+        content = content.replace('\r', '')
+        lines = content.split('\n')
+        section_header_length = len(self.section_header_marker)
+        for i in range(0, len(lines)):
+            line = lines[i]
+            if line[:section_header_length] == self.section_header_marker:
+                text = line[section_header_length:]
+                if text[0] == '#':  # check if chunk marker
+                    line = level_increase + ' ' + title + text[1:]  # fix header level and add title
+                else:  # chapter marker
+                    line = '#' * (level-1) + ' ' + title + text  # fix header level and add title
+                lines[i] = line
+            elif line and (line[0] == '#'):
+                if line.rstrip()[-1] == '#':
+                    line = level_increase + line.rstrip() + level_increase
+                else:
+                    line = level_increase + line
+                lines[i] = line
+        content = '\n'.join(lines)
+        markdown += content + '\n\n---\n\n'  # horizontal rule
+        return markdown
+
+    def run(self):
+        super(TnPreprocessor, self).run()
+        self.toc = None
+        projects = {}
+        self.index_json = {
+            'titles': {},
+            'chapters': {},
+            'book_codes': {}
+        }
+        for idx, project in enumerate(self.rc.projects):
+            section = self.get_section_for_file(project.identifier)
+            if section:
+                link = self.get_link_for_section(section)
+                book = section['book']
+                if not self.toc:
+                    self.toc = '# Table of Contents:\n\n'
+                projects[book] = {
+                    'link': link,
+                }
+            else:
+                App.logger.debug('TnPreprocessor: extra project found: {0}'.format(project.identifier))
+
+        for section in TnPreprocessor.sections:  # index by book order
+            book = section['book']
+            if book in projects:
+                file = os.path.join(self.output_dir, book + '.md')
+                link = self.get_link_for_section(section)
+                book = section['book']
+                title = section['title']
+                if not os.path.exists(file):
+                    App.logger.debug('TnPreprocessor: book missing: {0}'.format(book))
+                    continue
+                initial_markdown = read_file(file)
+                markdown = self.compile_section(title, link, initial_markdown)
+                markdown = self.fix_links(markdown, book)
+                if initial_markdown != markdown:
+                    write_file(file, markdown)
+                self.toc += '* [{1}](./{0}.html)\n'.format(book, title)
+                self.index_json['titles'][book + '.html'] = title
+                self.books.append(book + '.md')
+            else:
+                App.logger.debug('TnPreprocessor: missing book: {0}'.format(book))
+
+        self.toc = self.fix_links(self.toc, '-')
+        output_file = os.path.join(self.output_dir, '00-toc.md')
+        write_file(output_file, self.toc)
+        self.index_json['titles']['00-toc.html'] = 'Table of Contents'
+        output_file = os.path.join(self.output_dir, 'index.json')
+        write_file(output_file, self.index_json)
+
+        # Copy the toc and config.yaml file to the output dir so they can be used to
+        # generate the ToC on live.door43.org
+        toc_file = os.path.join(self.source_dir, project.path, 'toc.yaml')
+        if os.path.isfile(toc_file):
+            copy(toc_file, os.path.join(self.output_dir, 'toc.yaml'))
+        config_file = os.path.join(self.source_dir, project.path, 'config.yaml')
+        if os.path.isfile(config_file):
+            copy(config_file, os.path.join(self.output_dir, 'config.yaml'))
+        return True
+
+    def fix_links(self, content, section_link):
+        if not content:
+            return content
+
+        # convert RC links, e.g. rc://en/tn/help/1sa/16/02 => https://git.door43.org/Door43/en_tn/1sa/16/02.md
+        content = re.sub(r'rc://([^/]+)/([^/]+)/([^/]+)/([^\s)\]\n$]+)',
+                         r'https://git.door43.org/{0}/\1_\2/src/master/\4.md'.format(self.rc.repo_name), content,
+                         flags=re.IGNORECASE)
+        # fix links to other sections within the same manual (only one ../ and a section name that matches section_link)
+        # e.g. [covenant](../kt/covenant.md) => [covenant](#covenant)
+        pattern = r'\]\(\.\.\/{0}\/([^/]+).md\)'.format(section_link)
+        content = re.sub(pattern, r'](#\1)', content)
+        # fix links to other sections within the same manual (only one ../ and a section name)
+        # e.g. [commit](../other/commit.md) => [commit](other.html#commit)
+        for section in TnPreprocessor.sections:
+            link = self.get_link_for_section(section)
+            pattern = re.compile(r'\]\(\.\./{0}/([^/]+).md\)'.format(link))
+            replace = r']({0}.html#\1)'.format(link)
+            content = re.sub(pattern, replace, content)
+        # fix links to other sections that just have the section name but no 01.md page (preserve http:// links)
+        # e.g. See [Verbs](figs-verb) => See [Verbs](#figs-verb)
+        content = re.sub(r'\]\(([^# :/)]+)\)', r'](#\1)', content)
+        # convert URLs to links if not already
+        content = re.sub(r'([^"(])((http|https|ftp)://[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](\2)',
+                         content, flags=re.IGNORECASE)
+        # URLS wth just www at the start, no http
+        content = re.sub(r'([^A-Z0-9"(/])(www\.[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](http://\2)',
+                         content, flags=re.IGNORECASE)
+        return content
+
+    def get_section_for_file(self, id):
+        id = id.lower()
+        for section in TnPreprocessor.sections:
+            if (id == section['book'].lower()) or (id == self.get_link_for_section(section)):
+                return section
+        return None
+
+    def get_link_for_section(self, section):
+        link = section['book']
+        parts = link.split('-')
+        if len(parts) > 1:
+            link = parts[1].lower()
+        return link
diff --git a/libraries/converters/converter.py b/libraries/converters/converter.py
@@ -1,7 +1,9 @@
 from __future__ import print_function, unicode_literals
+import json
 import os
 import tempfile
 import traceback
+import urlparse
 import requests
 from libraries.general_tools.url_utils import download_file
 from libraries.general_tools.file_utils import unzip, add_contents_to_zip, remove_tree, remove
@@ -132,7 +134,7 @@ def do_callback(self, url, payload):
         if url.startswith('http'):
             headers = {"content-type": "application/json"}
             App.logger.debug('Making callback to {0} with payload:'.format(url))
-            App.logger.debug(payload)
+            App.logger.debug(json.dumps(payload)[:256])
             response = requests.post(url, json=payload, headers=headers)
             self.callback_status = response.status_code
             if (self.callback_status >= 200) and (self.callback_status < 299):
@@ -141,3 +143,17 @@ def do_callback(self, url, payload):
                 App.logger.error('Error calling callback code {0}: {1}'.format(self.callback_status, response.reason))
         else:
             App.logger.error('Invalid callback url: {0}'.format(url))
+
+    def check_for_exclusive_convert(self):
+        convert_only = []
+        if self.source and len(self.source) > 0:
+            parsed = urlparse.urlparse(self.source)
+            params = urlparse.parse_qsl(parsed.query)
+            if params and len(params) > 0:
+                for i in range(0, len(params)):
+                    item = params[i]
+                    if item[0] == 'convert_only':
+                        convert_only = item[1].split(',')
+                        self.source = urlparse.urlunparse((parsed.scheme, parsed.netloc, parsed.path, '', '', ''))
+                        break
+        return convert_only
diff --git a/libraries/converters/md2html_converter.py b/libraries/converters/md2html_converter.py
@@ -25,6 +25,7 @@ def convert_obs(self):
 
         # find the first directory that has md files.
         files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES)
+        convert_only_list = self.check_for_exclusive_convert()
 
         current_dir = os.path.dirname(os.path.realpath(__file__))
         with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file:
@@ -34,6 +35,10 @@ def convert_obs(self):
 
         for filename in files:
             if filename.endswith('.md'):
+                base_name = os.path.basename(filename)
+                if convert_only_list and (base_name not in convert_only_list):  # see if this is a file we are to convert
+                    continue
+
                 # Convert files that are markdown files
                 with codecs.open(filename, 'r', 'utf-8-sig') as md_file:
                     md = md_file.read()
@@ -73,7 +78,7 @@ def convert_markdown(self):
                 # Convert files that are markdown files
                 with codecs.open(filename, 'r', 'utf-8-sig') as md_file:
                     md = md_file.read()
-                if self.resource == 'ta':
+                if self.resource in ['ta']:
                     html = markdown2.markdown(md, extras=['markdown-in-html', 'tables'])
                 else:
                     html = markdown.markdown(md)