Merge 0a04233 into d60c033

unfoldingWord-dev · Oct 19, 2017 · f589303 · f589303
2 parents d60c033 + 0a04233
commit f589303
Show file tree

Hide file tree

Showing 20 changed files with 836 additions and 27 deletions.
diff --git a/functions/convert_md2html/module.json b/functions/convert_md2html/module.json
@@ -2,7 +2,7 @@
     "name": "md2html",
     "version": "2",
     "type": "converter",
-    "resource_types": ["obs", "ta", "tq", "tw"],
+    "resource_types": ["obs", "ta", "tq", "tw", "tn"],
     "input_format": ["md"],
     "output_format": ["html"],
     "options": [],

diff --git a/libraries/client/client_webhook.py b/libraries/client/client_webhook.py
@@ -188,7 +188,7 @@ def process_webhook(self):
                     self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload)
             else:
                 # -----------------------------
-                # multiple Bible book project
+                # multiple book project
                 # -----------------------------
                 books = preprocessor.get_book_list()
                 App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books))

diff --git a/libraries/client/preprocessors.py b/libraries/client/preprocessors.py
diff --git a/libraries/converters/converter.py b/libraries/converters/converter.py
@@ -3,6 +3,7 @@
 import os
 import tempfile
 import traceback
+import urlparse
 import requests
 from libraries.general_tools.url_utils import download_file
 from libraries.general_tools.file_utils import unzip, add_contents_to_zip, remove_tree, remove
@@ -142,3 +143,17 @@ def do_callback(self, url, payload):
                 App.logger.error('Error calling callback code {0}: {1}'.format(self.callback_status, response.reason))
         else:
             App.logger.error('Invalid callback url: {0}'.format(url))
+
+    def check_for_exclusive_convert(self):
+        convert_only = []
+        if self.source and len(self.source) > 0:
+            parsed = urlparse.urlparse(self.source)
+            params = urlparse.parse_qsl(parsed.query)
+            if params and len(params) > 0:
+                for i in range(0, len(params)):
+                    item = params[i]
+                    if item[0] == 'convert_only':
+                        convert_only = item[1].split(',')
+                        self.source = urlparse.urlunparse((parsed.scheme, parsed.netloc, parsed.path, '', '', ''))
+                        break
+        return convert_only
diff --git a/libraries/converters/md2html_converter.py b/libraries/converters/md2html_converter.py
@@ -25,6 +25,7 @@ def convert_obs(self):
 
         # find the first directory that has md files.
         files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES)
+        convert_only_list = self.check_for_exclusive_convert()
 
         current_dir = os.path.dirname(os.path.realpath(__file__))
         with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file:
@@ -34,6 +35,10 @@ def convert_obs(self):
 
         for filename in files:
             if filename.endswith('.md'):
+                base_name = os.path.basename(filename)
+                if convert_only_list and (base_name not in convert_only_list):  # see if this is a file we are to convert
+                    continue
+
                 # Convert files that are markdown files
                 with codecs.open(filename, 'r', 'utf-8-sig') as md_file:
                     md = md_file.read()

diff --git a/libraries/converters/usfm2html_converter.py b/libraries/converters/usfm2html_converter.py
@@ -26,20 +26,7 @@ def convert_bible(self):
 
         # find the first directory that has usfm files.
         files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES)
-
-        exclusive_convert = False
-        convert_only = []
-        if self.source and len(self.source) > 0:
-            parsed = urlparse.urlparse(self.source)
-            params = urlparse.parse_qsl(parsed.query)
-            if params and len(params) > 0:
-                for i in range(0, len(params)):
-                    item = params[i]
-                    if item[0] == 'convert_only':
-                        convert_only = item[1].split(',')
-                        exclusive_convert = True
-                        self.source = urlparse.urlunparse((parsed.scheme, parsed.netloc, parsed.path, '', '', ''))
-                        break
+        convert_only_list = self.check_for_exclusive_convert()
 
         current_dir = os.path.dirname(os.path.realpath(__file__))
         with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file:
@@ -48,9 +35,8 @@ def convert_bible(self):
         for filename in files:
             if filename.endswith('.usfm'):
                 base_name = os.path.basename(filename)
-                if exclusive_convert:
-                    if base_name not in convert_only:  # see if this is a file we are to convert
-                        continue
+                if convert_only_list and (base_name not in convert_only_list):  # see if this is a file we are to convert
+                    continue
 
                 msg = 'Converting Bible USFM file: {0}'.format(base_name)
                 self.log.info(msg)

diff --git a/libraries/door43_tools/project_deployer.py b/libraries/door43_tools/project_deployer.py
@@ -65,6 +65,11 @@ def deploy_revision_to_door43(self, build_log_key):
         if 'multiple' in build_log:
             multi_merge = build_log['multiple']
             App.logger.debug("found multi-part merge")
+            key_deployed_ = download_key + '/deployed'
+            if App.cdn_s3_handler().key_exists(key_deployed_):
+                App.logger.debug("Already merged parts")
+                return False
+            self.write_data_to_file(self.temp_dir, key_deployed_, 'deployed', ' ')  # flag that deploy has begun
 
         elif 'part' in build_log:
             part = build_log['part']

diff --git a/libraries/door43_tools/templaters.py b/libraries/door43_tools/templaters.py
@@ -27,6 +27,8 @@ def init_template(resource_type, source_dir, output_dir, template_file):
         templater = TqTemplater(resource_type, source_dir, output_dir, template_file)
     elif resource_type == 'tw':
         templater = TwTemplater(resource_type, source_dir, output_dir, template_file)
+    elif resource_type == 'tn':
+        templater = TnTemplater(resource_type, source_dir, output_dir, template_file)
     else:
         templater = Templater(resource_type, source_dir, output_dir, template_file)
     return templater
@@ -257,6 +259,14 @@ def __init__(self, *args, **kwargs):
             self.titles = index['titles']
 
 
+class TnTemplater(Templater):
+    def __init__(self, *args, **kwargs):
+        super(TnTemplater, self).__init__(*args, **kwargs)
+        index = file_utils.load_json_object(os.path.join(self.source_dir, 'index.json'))
+        if index:
+            self.titles = index['titles']
+
+
 class BibleTemplater(Templater):
     def __init__(self, *args, **kwargs):
         super(BibleTemplater, self).__init__(*args, **kwargs)

diff --git a/libraries/linters/linter.py b/libraries/linters/linter.py
@@ -32,6 +32,7 @@ def __init__(self, source_url=None, source_file=None, source_dir=None, commit_da
         self.source_zip_file = source_file
         self.source_dir = source_dir
         self.commit_data = commit_data
+        self.convert_only = None
 
         self.log = LintLogger()
 

diff --git a/libraries/linters/markdown_linter.py b/libraries/linters/markdown_linter.py
@@ -33,8 +33,17 @@ def lint(self):
         return True
 
     def get_strings(self):
-        files = sorted(get_files(directory=self.source_dir, relative_paths=True, exclude=self.EXCLUDED_FILES,
-                                 extensions=['.md']))
+        if self.convert_only:
+            files = []
+            for dir in self.convert_only:
+                dir_path = os.path.join(self.source_dir, dir)
+                sub_files = sorted(get_files(directory=dir_path, relative_paths=True, exclude=self.EXCLUDED_FILES,
+                                            extensions=['.md']))
+                for f in sub_files:
+                    files.append(os.path.join(dir, f))
+        else:
+            files = sorted(get_files(directory=self.source_dir, relative_paths=True, exclude=self.EXCLUDED_FILES,
+                                     extensions=['.md']))
         strings = {}
         for f in files:
             path = os.path.join(self.source_dir, f)

diff --git a/libraries/linters/tn_linter.py b/libraries/linters/tn_linter.py
@@ -1,9 +1,18 @@
 from __future__ import print_function, unicode_literals
+import os
+import re
+import urlparse
+from libraries.app.app import App
+from libraries.client.preprocessors import TnPreprocessor
+from libraries.general_tools import file_utils
 from libraries.linters.markdown_linter import MarkdownLinter
 
 
 class TnLinter(MarkdownLinter):
 
+    # match links of form '](link)'
+    link_marker_re = re.compile(r'\]\(([^\n()]+)\)', re.UNICODE)
+
     def lint(self):
         """
         Checks for issues with translationNotes
@@ -12,4 +21,104 @@ def lint(self):
         self.source_dir is the directory of source files (.md)
         :return boolean:
         """
+        self.source_dir = os.path.abspath(self.source_dir)
+        source_dirs = []
+        if not self.convert_only:
+            source_dirs = [self.source_dir]
+        else:
+            for d in self.convert_only:
+                source_dirs.append(os.path.join(self.source_dir, d))
+
+        for source in source_dirs:
+            for root, dirs, files in os.walk(source):
+                for f in files:
+                    file_path = os.path.join(root, f)
+                    parts = os.path.splitext(f)
+                    if parts[1] == '.md':
+                        contents = file_utils.read_file(file_path)
+                        self.find_invalid_links(root, f, contents)
+
+        for section in TnPreprocessor.sections:
+            book = section['book']
+            file_path = os.path.join(self.source_dir, '{0}.md'.format(book))
+            if os.path.exists(file_path):
+                contents = file_utils.read_file(file_path)
+                App.logger.debug("Book {0} found, length={1}".format(book, len(contents)))
+                continue
+            else:
+                found_files = False
+                link = self.get_link_for_book(book)
+                if link == "toc":
+                    continue  # not checking for toc, that will be generated by converter
+                if self.convert_only and (link not in self.convert_only):
+                    continue
+                file_path = os.path.join(self.source_dir, link)
+                for root, dirs, files in os.walk(file_path):
+                    if root == file_path:
+                        continue  # skip book folder
+
+                    if len(files) > 0:
+                        found_files = True
+                        break
+
+                if not found_files:
+                    msg = "missing book: '{0}'".format(link)
+                    self.log.warnings.append(msg)
+                    App.logger.debug(msg)
+
         return super(TnLinter, self).lint()  # Runs checks on Markdown, using the markdown linter
+
+    def find_invalid_links(self, folder, f, contents):
+        for link_match in TnLinter.link_marker_re.finditer(contents):
+            link = link_match.group(1)
+            if link:
+                if link[:4] == 'http':
+                    continue
+                if link.find('.md') < 0:
+                    continue
+
+                file_path = os.path.join(folder, link)
+                file_path_abs = os.path.abspath(file_path)
+                exists = os.path.exists(file_path_abs)
+                if not exists:
+                    a = self.get_file_link(f, folder)
+                    msg = "{0}: contains invalid link: ({1})".format(a, link)
+                    self.log.warnings.append(msg)
+                    App.logger.debug(msg)
+
+    def get_file_link(self, f, folder):
+        parts = folder.split(self.source_dir)
+        sub_path = self.source_dir  # default
+        if len(parts) == 2:
+            sub_path = parts[1][1:]
+        url = "https://git.door43.org/{0}/{1}/src/master/{2}/{3}".format(self.repo_owner, self.repo_name,
+                                                                         sub_path, f)
+        a = '<a href="{0}">{1}/{2}</a>'.format(url, sub_path, f)
+        return a
+
+    def get_link_for_book(self, book):
+        parts = book.split('-')
+        link = book
+        if len(parts) > 1:
+            link = parts[1].lower()
+        return link
+
+    def check_for_exclusive_convert(self):
+        self.convert_only = []
+        if self.source_zip_url and len(self.source_zip_url) > 0:
+            parsed = urlparse.urlparse(self.source_zip_url)
+            params = urlparse.parse_qsl(parsed.query)
+            if params and len(params) > 0:
+                for i in range(0, len(params)):
+                    item = params[i]
+                    if item[0] == 'convert_only':
+                        for f in item[1].split(','):
+                            base_name = f.split('.')[0]
+                            parts = base_name.split('-')
+                            if len(parts) > 1:
+                                base_name = parts[1]
+                            self.convert_only.append(base_name.lower())
+                        self.source_zip_url = urlparse.urlunparse((parsed.scheme, parsed.netloc, parsed.path,
+                                                                  '', '', ''))
+                        break
+        return self.convert_only
diff --git a/tests/client_tests/resources/raw_sources/en_tn.zip b/tests/client_tests/resources/raw_sources/en_tn.zip
diff --git a/tests/client_tests/test_tn_preprocessor.py b/tests/client_tests/test_tn_preprocessor.py
@@ -0,0 +1,80 @@
+from __future__ import absolute_import, unicode_literals, print_function
+import os
+import tempfile
+import unittest
+import shutil
+from libraries.resource_container.ResourceContainer import RC
+from libraries.client.preprocessors import do_preprocess, TnPreprocessor
+from libraries.general_tools.file_utils import unzip, read_file
+
+
+class TestTnPreprocessor(unittest.TestCase):
+
+    resources_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'resources')
+
+    def setUp(self):
+        """Runs before each test."""
+        self.out_dir = ''
+        self.temp_dir = ""
+        self.save_sections = TnPreprocessor.sections
+
+    def tearDown(self):
+        """Runs after each test."""
+        TnPreprocessor.sections = self.save_sections
+        # delete temp files
+        if os.path.isdir(self.out_dir):
+            shutil.rmtree(self.out_dir, ignore_errors=True)
+        if os.path.isdir(self.temp_dir):
+            shutil.rmtree(self.temp_dir, ignore_errors=True)
+
+    def test_tn_preprocessor(self):
+        # given
+        repo_name = 'en_tn'
+        file_name = os.path.join('raw_sources', repo_name + '.zip')
+        rc, repo_dir, self.temp_dir = self.extractFiles(file_name, repo_name)
+        repo_dir = os.path.join(repo_dir)
+        self.out_dir = tempfile.mkdtemp(prefix='output_')
+        repo_name = 'dummy_repo'
+
+        # when
+        results, preproc = do_preprocess(rc, repo_dir, self.out_dir)
+
+        # then
+        self.assertTrue(os.path.isfile(os.path.join(self.out_dir, 'index.json')))
+        self.assertTrue(os.path.isfile(os.path.join(self.out_dir, '00-toc.md')))
+        self.assertTrue(os.path.isfile(os.path.join(self.out_dir, '01-GEN.md')))
+        self.assertTrue(os.path.isfile(os.path.join(self.out_dir, '67-REV.md')))
+        index = read_file(os.path.join(self.out_dir, '00-toc.md'))
+        gen = read_file(os.path.join(self.out_dir, '01-GEN.md'))
+
+    def test_tn_preprocessor_dummy_section(self):
+        # given
+        TnPreprocessor.sections = [{'book': "dummy", 'title': 'dummy'}]
+        repo_name = 'en_tn'
+        file_name = os.path.join('raw_sources', repo_name + '.zip')
+        rc, repo_dir, self.temp_dir = self.extractFiles(file_name, repo_name)
+        repo_dir = os.path.join(repo_dir)
+        self.out_dir = tempfile.mkdtemp(prefix='output_')
+        repo_name = 'dummy_repo'
+
+        # when
+        results, preproc = do_preprocess(rc, repo_dir, self.out_dir)
+
+        # then
+        self.assertTrue(os.path.isfile(os.path.join(self.out_dir, '00-toc.md')))
+
+    @classmethod
+    def extractFiles(cls, file_name, repo_name):
+        file_path = os.path.join(TestTnPreprocessor.resources_dir, file_name)
+
+        # 1) unzip the repo files
+        temp_dir = tempfile.mkdtemp(prefix='repo_')
+        unzip(file_path, temp_dir)
+        repo_dir = os.path.join(temp_dir, repo_name)
+        if not os.path.isdir(repo_dir):
+            repo_dir = file_path
+
+        # 2) Get the resource container
+        rc = RC(repo_dir)
+
+        return rc, repo_dir, temp_dir
diff --git a/tests/converter_tests/resources/en_tn.zip b/tests/converter_tests/resources/en_tn.zip