unfoldingWord-dev · richmahn · Oct 31, 2017 · Oct 14, 2017 · Oct 14, 2017 · Oct 14, 2017
diff --git a/functions/convert_md2html/module.json b/functions/convert_md2html/module.json
@@ -2,7 +2,7 @@
     "name": "md2html",
     "version": "2",
     "type": "converter",
-    "resource_types": ["obs", "ta", "tq", "tw"],
+    "resource_types": ["obs", "ta", "tq", "tw", "tn"],
     "input_format": ["md"],
     "output_format": ["html"],
     "options": [],

diff --git a/libraries/client/client_webhook.py b/libraries/client/client_webhook.py
@@ -188,7 +188,7 @@ def process_webhook(self):
                     self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload)
             else:
                 # -----------------------------
-                # multiple Bible book project
+                # multiple book project
                 # -----------------------------
                 books = preprocessor.get_book_list()
                 App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books))

diff --git a/libraries/client/preprocessors.py b/libraries/client/preprocessors.py
@@ -26,8 +26,11 @@ def do_preprocess(rc, repo_dir, output_dir):
     elif rc.resource.identifier == 'tw':
         App.logger.debug("do_preprocess: using TwPreprocessor")
         preprocessor = TwPreprocessor(rc, repo_dir, output_dir)
+    elif rc.resource.identifier == 'tn':
+        App.logger.debug("do_preprocess: using TnPreprocessor")
+        preprocessor = TnPreprocessor(rc, repo_dir, output_dir)
     else:
-        App.logger.debug("do_preprocess: using Preprocessor")
+        App.logger.debug("do_preprocess: using Preprocessor for resource: {0}".format(rc.resource.identifier))
         preprocessor = Preprocessor(rc, repo_dir, output_dir)
     return preprocessor.run(), preprocessor
 
@@ -572,3 +575,110 @@ def fix_links(self, content, section):
         content = re.sub(r'([^A-Z0-9"(/])(www\.[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](http://\2)',
                          content, flags=re.IGNORECASE)
         return content
+
+
+class TnPreprocessor(Preprocessor):
+    index_json = {
+        'titles': {},
+        'chapters': {},
+        'book_codes': {}
+    }
+
+    def __init__(self, *args, **kwargs):
+        super(TnPreprocessor, self).__init__(*args, **kwargs)
+        self.books = []
+
+    def is_multiple_jobs(self):
+        return True
+
+    def get_book_list(self):
+        return self.books
+
+    def run(self):
+        index_json = {
+            'titles': {},
+            'chapters': {},
+            'book_codes': {}
+        }
+        headers_re = re.compile('^(#+) +(.+?) *#*$', flags=re.MULTILINE)
+        for idx, project in enumerate(self.rc.projects):
+            App.logger.debug('TnPreprocessor: processing project: {0}'.format(project.identifier))
+            if project.identifier in BOOK_NAMES:
+                markdown = ''
+                book = project.identifier.lower()
+                html_file = '{0}-{1}.html'.format(BOOK_NUMBERS[book], book.upper())
+                index_json['book_codes'][html_file] = book
+                name = BOOK_NAMES[book]
+                index_json['titles'][html_file] = name
+                chapter_dirs = sorted(glob(os.path.join(self.source_dir, project.path, '*')))
+                markdown += '# <a id="tn-{0}"/> {1}\n\n'.format(book, name)
+                index_json['chapters'][html_file] = []
+                for move_str in ['front', 'intro']:
+                    self.move_to_front(chapter_dirs, move_str)
+                for chapter_dir in chapter_dirs:
+                    chapter = os.path.basename(chapter_dir)
+                    link = 'tn-chapter-{0}-{1}'.format(book, chapter.zfill(3))
+                    index_json['chapters'][html_file].append(link)
+                    markdown += '## <a id="{0}"/> {1} {2}\n\n'.format(link, name, chapter.lstrip('0'))
+                    chunk_files = sorted(glob(os.path.join(chapter_dir, '*.md')))
+                    for move_str in ['front', 'intro']:
+                        self.move_to_front(chunk_files, move_str)
+                    for chunk_idx, chunk_file in enumerate(chunk_files):
+                        start_verse = os.path.splitext(os.path.basename(chunk_file))[0].lstrip('0')
+                        if chunk_idx < len(chunk_files)-1:
+                            base_file_name = os.path.splitext(os.path.basename(chunk_files[chunk_idx + 1]))[0]
+                            if base_file_name.isdigit():
+                                end_verse = str(int(base_file_name) - 1)
+                            else:
+                                end_verse = start_verse
+                        else:
+                            chapter_str = chapter.lstrip('0')
+                            chapter_verses = BOOK_CHAPTER_VERSES[book]
+                            end_verse = chapter_verses[chapter_str] if chapter_str in chapter_verses else start_verse
+
+                        start_verse_str = str(start_verse).zfill(3) if start_verse.isdigit() else start_verse
+                        link = 'tn-chunk-{0}-{1}-{2}'.format(book, str(chapter).zfill(3), start_verse_str)
+                        markdown += '### <a id="{0}"/>{1} {2}:{3}{4}\n\n'. \
+                            format(link, name, chapter.lstrip('0'), start_verse,
+                                   '-'+end_verse if start_verse != end_verse else '')
+                        text = read_file(chunk_file) + '\n\n'
+                        text = headers_re.sub(r'\1## \2', text)  # This will bump any header down 2 levels
+                        markdown += text
+                markdown = self.fix_links(markdown)
+                book_file_name = '{0}-{1}.md'.format(BOOK_NUMBERS[book], book.upper())
+                self.books.append(book_file_name)
+                file_path = os.path.join(self.output_dir, book_file_name)
+                write_file(file_path, markdown)
+            else:
+                App.logger.debug('TnPreprocessor: extra project found: {0}'.format(project.identifier))
+        # Write out index.json
+        output_file = os.path.join(self.output_dir, 'index.json')
+        write_file(output_file, index_json)
+        return True
+
+    def move_to_front(self, files, move_str):
+        if files:
+            last_file = files[-1]
+            if move_str in last_file:  # move intro to front
+                files.pop()
+                files.insert(0, last_file)
+
+    def fix_links(self, content):
+        # convert tA RC links, e.g. rc://en/ta/man/translate/figs-euphemism => https://git.door43.org/Door43/en_ta/translate/figs-euphemism/01.md
+        content = re.sub(r'rc://([^/]+)/ta/([^/]+)/([^\s)\]\n$]+)',
+                         r'https://git.door43.org/Door43/\1_ta/src/master/\3/01.md', content,
+                         flags=re.IGNORECASE)
+        # convert other RC links, e.g. rc://en/tn/help/1sa/16/02 => https://git.door43.org/Door43/en_tn/1sa/16/02.md
+        content = re.sub(r'rc://([^/]+)/([^/]+)/([^/]+)/([^\s)\]\n$]+)',
+                         r'https://git.door43.org/Door43/\1_\2/src/master/\4.md', content,
+                         flags=re.IGNORECASE)
+        # fix links to other sections that just have the section name but no 01.md page (preserve http:// links)
+        # e.g. See [Verbs](figs-verb) => See [Verbs](#figs-verb)
+        content = re.sub(r'\]\(([^# :/)]+)\)', r'](#\1)', content)
+        # convert URLs to links if not already
+        content = re.sub(r'([^"(])((http|https|ftp)://[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](\2)',
+                         content, flags=re.IGNORECASE)
+        # URLS wth just www at the start, no http
+        content = re.sub(r'([^A-Z0-9"(/])(www\.[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](http://\2)',
+                         content, flags=re.IGNORECASE)
+        return content
diff --git a/libraries/converters/converter.py b/libraries/converters/converter.py
@@ -3,6 +3,7 @@
 import os
 import tempfile
 import traceback
+import urlparse
 import requests
 from libraries.general_tools.url_utils import download_file
 from libraries.general_tools.file_utils import unzip, add_contents_to_zip, remove_tree, remove
@@ -142,3 +143,18 @@ def do_callback(self, url, payload):
                 App.logger.error('Error calling callback code {0}: {1}'.format(self.callback_status, response.reason))
         else:
             App.logger.error('Invalid callback url: {0}'.format(url))
+
+    def check_for_exclusive_convert(self):
+        convert_only = []
+        if self.source and len(self.source) > 0:
+            parsed = urlparse.urlparse(self.source)
+            params = urlparse.parse_qsl(parsed.query)
+            if params and len(params) > 0:
+                for i in range(0, len(params)):
+                    item = params[i]
+                    if item[0] == 'convert_only':
+                        convert_only = item[1].split(',')
+                        App.logger.debug('Converting only: {0}'.format(convert_only))
+                        self.source = urlparse.urlunparse((parsed.scheme, parsed.netloc, parsed.path, '', '', ''))
+                        break
+        return convert_only
diff --git a/libraries/converters/md2html_converter.py b/libraries/converters/md2html_converter.py
@@ -61,6 +61,7 @@ def convert_markdown(self):
 
         # find the first directory that has md files.
         files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES)
+        convert_only_list = self.check_for_exclusive_convert()
 
         current_dir = os.path.dirname(os.path.realpath(__file__))
         with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file:
@@ -70,6 +71,10 @@ def convert_markdown(self):
 
         for filename in files:
             if filename.endswith('.md'):
+                base_name = os.path.basename(filename)
+                if convert_only_list and (base_name not in convert_only_list):  # see if this is a file we are to convert
+                    continue
+
                 # Convert files that are markdown files
                 with codecs.open(filename, 'r', 'utf-8-sig') as md_file:
                     md = md_file.read()

diff --git a/libraries/converters/usfm2html_converter.py b/libraries/converters/usfm2html_converter.py
@@ -26,20 +26,7 @@ def convert_bible(self):
 
         # find the first directory that has usfm files.
         files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES)
-
-        exclusive_convert = False
-        convert_only = []
-        if self.source and len(self.source) > 0:
-            parsed = urlparse.urlparse(self.source)
-            params = urlparse.parse_qsl(parsed.query)
-            if params and len(params) > 0:
-                for i in range(0, len(params)):
-                    item = params[i]
-                    if item[0] == 'convert_only':
-                        convert_only = item[1].split(',')
-                        exclusive_convert = True
-                        self.source = urlparse.urlunparse((parsed.scheme, parsed.netloc, parsed.path, '', '', ''))
-                        break
+        convert_only_list = self.check_for_exclusive_convert()
 
         current_dir = os.path.dirname(os.path.realpath(__file__))
         with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file:
@@ -48,9 +35,8 @@ def convert_bible(self):
         for filename in files:
             if filename.endswith('.usfm'):
                 base_name = os.path.basename(filename)
-                if exclusive_convert:
-                    if base_name not in convert_only:  # see if this is a file we are to convert
-                        continue
+                if convert_only_list and (base_name not in convert_only_list):  # see if this is a file we are to convert
+                    continue
 
                 msg = 'Converting Bible USFM file: {0}'.format(base_name)
                 self.log.info(msg)

diff --git a/libraries/door43_tools/project_deployer.py b/libraries/door43_tools/project_deployer.py
@@ -46,7 +46,7 @@ def deploy_revision_to_door43(self, build_log_key):
 
         if not build_log or 'commit_id' not in build_log or 'repo_owner' not in build_log \
                 or 'repo_name' not in build_log:
-            App.logger.debug("Invalid build log at {0}: {1}".format(build_log_key, build_log))
+            App.logger.debug("Exiting, Invalid build log at {0}: {1}".format(build_log_key, build_log))
             return False
 
         start = time.time()
@@ -60,20 +60,33 @@ def deploy_revision_to_door43(self, build_log_key):
         s3_repo_key = 'u/{0}/{1}'.format(user, repo_name)
         download_key = s3_commit_key
 
-        partial = False
-        multi_merge = False
+        do_part_template_only = False
+        do_multipart_merge = False
         if 'multiple' in build_log:
-            multi_merge = build_log['multiple']
-            App.logger.debug("found multi-part merge")
+            do_multipart_merge = build_log['multiple']
+            App.logger.debug("Found multi-part merge: {0}".format(download_key))
+
+            prefix = download_key + '/'
+            undeployed = self.get_undeployed_parts(prefix)
+            if len(undeployed) > 0:
+                App.logger.debug("Exiting, Parts not yet deployed: {0}".format(undeployed))
+                return False
+
+            key_deployed_ = download_key + '/final_deployed'
+            if App.cdn_s3_handler().key_exists(key_deployed_):
+                App.logger.debug("Exiting, Already merged parts: {0}".format(download_key))
+                return False
+            self.write_data_to_file(self.temp_dir, key_deployed_, 'final_deployed', ' ')  # flag that deploy has begun
+            App.logger.debug("Continuing with merge: {0}".format(download_key))
 
         elif 'part' in build_log:
             part = build_log['part']
             download_key += '/' + part
-            partial = True
-            App.logger.debug("found partial: " + part)
+            do_part_template_only = True
+            App.logger.debug("Found partial: {0}".format(download_key))
 
             if not App.cdn_s3_handler().key_exists(download_key + '/finished'):
-                App.logger.debug("Not ready to process partial")
+                App.logger.debug("Exiting, Not ready to process partial")
                 return False
 
         source_dir = tempfile.mkdtemp(prefix='source_', dir=self.temp_dir)
@@ -86,21 +99,26 @@ def deploy_revision_to_door43(self, build_log_key):
         App.logger.debug("Downloading {0} to {1}...".format(template_key, template_file))
         App.door43_s3_handler().download_file(template_key, template_file)
 
-        if not multi_merge:
-            source_dir, success = self.deploy_single_conversion(build_log, download_key, output_dir, repo_name,
+        if not do_multipart_merge:
+            source_dir, success = self.template_converted_files(build_log, download_key, output_dir, repo_name,
                                                                 resource_type, s3_commit_key, source_dir, start,
                                                                 template_file)
             if not success:
                 return False
         else:
-            # merge multi-part project
-            source_dir, success = self.deploy_multipart_master(s3_commit_key, resource_type, download_key, output_dir,
-                                                               source_dir, start, template_file)
+            source_dir, success = self.multipart_master_merge(s3_commit_key, resource_type, download_key, output_dir,
+                                                              source_dir, start, template_file)
             if not success:
                 return False
 
-        # Copy first HTML file to index.html if index.html doesn't exist
-        if not partial or multi_merge:
+        #######################
+        #
+        #  Now do the deploy
+        #
+        #######################
+
+        if not do_part_template_only or do_multipart_merge:
+            # Copy first HTML file to index.html if index.html doesn't exist
             html_files = sorted(glob(os.path.join(output_dir, '*.html')))
             index_file = os.path.join(output_dir, 'index.html')
             if len(html_files) > 0 and not os.path.isfile(index_file):
@@ -112,7 +130,7 @@ def deploy_revision_to_door43(self, build_log_key):
             if not os.path.exists(output_file) and not os.path.isdir(filename):
                 copyfile(filename, output_file)
 
-            if partial:  # move files to common area
+            if do_part_template_only:  # move files to common area
                 basename = os.path.basename(filename)
                 if basename not in ['finished', 'build_log.json', 'index.html', 'merged.json', 'lint_log.json']:
                     App.logger.debug("Moving {0} to common area".format(basename))
@@ -133,8 +151,8 @@ def deploy_revision_to_door43(self, build_log_key):
                 App.logger.debug("Uploading {0} to {1}".format(path, key))
                 App.door43_s3_handler().upload_file(path, key, cache_time=0)
 
-        if not partial:
-            # Now we place json files and make an index.html file for the whole repo
+        if not do_part_template_only:
+            # Now we place json files and redirect index.html for the whole repo to this index.html file
             try:
                 App.door43_s3_handler().copy(from_key='{0}/project.json'.format(s3_repo_key), from_bucket=App.cdn_bucket)
                 App.door43_s3_handler().copy(from_key='{0}/manifest.json'.format(s3_commit_key),
@@ -145,28 +163,23 @@ def deploy_revision_to_door43(self, build_log_key):
             except:
                 pass
 
-        else:  # if processing part
+        else:  # if processing part of multi-part merge
+            self.write_data_to_file(output_dir, download_key, 'deployed', ' ')  # flag that deploy has finished
             if App.cdn_s3_handler().key_exists(s3_commit_key + '/final_build_log.json'):
                 App.logger.debug("final build detected")
                 App.logger.debug("conversions all finished, trigger final merge")
                 App.cdn_s3_handler().copy(from_key=s3_commit_key + '/final_build_log.json',
                                           to_key=s3_commit_key + '/build_log.json')
 
-            self.write_data_to_file(output_dir, download_key, 'deployed', ' ')  # flag that deploy has finished
-
         elapsed_seconds = int(time.time() - start)
-        App.logger.debug("deploy type partial={0}, multi_merge={1}".format(partial, multi_merge))
+        App.logger.debug("deploy type partial={0}, multi_merge={1}".format(do_part_template_only, do_multipart_merge))
         App.logger.debug("deploy completed in {0} seconds".format(elapsed_seconds))
         self.close()
         return True
 
-    def deploy_multipart_master(self, s3_commit_key, resource_type, download_key, output_dir, source_dir, start,
-                                template_file):
+    def multipart_master_merge(self, s3_commit_key, resource_type, download_key, output_dir, source_dir, start,
+                               template_file):
         prefix = download_key + '/'
-        undeployed = self.get_undeployed_parts(prefix)
-        if len(undeployed) > 0:
-            App.logger.debug("Parts not deployed: {0}".format(undeployed))
-
         App.door43_s3_handler().download_dir(prefix, source_dir)  # get previous templated files
         source_dir = os.path.join(source_dir, download_key)
         files = sorted(glob(os.path.join(source_dir, '*.*')))
@@ -209,7 +222,7 @@ def get_undeployed_parts(self, prefix):
                         unfinished.append(part_num)
         return unfinished
 
-    def deploy_single_conversion(self, build_log, download_key, output_dir, repo_name, resource_type, s3_commit_key,
+    def template_converted_files(self, build_log, download_key, output_dir, repo_name, resource_type, s3_commit_key,
                                  source_dir, start, template_file):
         App.cdn_s3_handler().download_dir(download_key + '/', source_dir)
         source_dir = os.path.join(source_dir, download_key.replace('/', os.path.sep))