Skip to content
This repository has been archived by the owner on May 22, 2024. It is now read-only.

Commit

Permalink
Merge 0a04233 into d60c033
Browse files Browse the repository at this point in the history
  • Loading branch information
PhotoNomad0 committed Oct 19, 2017
2 parents d60c033 + 0a04233 commit f589303
Show file tree
Hide file tree
Showing 20 changed files with 836 additions and 27 deletions.
2 changes: 1 addition & 1 deletion functions/convert_md2html/module.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "md2html",
"version": "2",
"type": "converter",
"resource_types": ["obs", "ta", "tq", "tw"],
"resource_types": ["obs", "ta", "tq", "tw", "tn"],
"input_format": ["md"],
"output_format": ["html"],
"options": [],
Expand Down
2 changes: 1 addition & 1 deletion libraries/client/client_webhook.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ def process_webhook(self):
self.send_request_to_linter(job, linter, commit_url, extra_payload=extra_payload)
else:
# -----------------------------
# multiple Bible book project
# multiple book project
# -----------------------------
books = preprocessor.get_book_list()
App.logger.debug('Splitting job into separate parts for books: ' + ','.join(books))
Expand Down
473 changes: 473 additions & 0 deletions libraries/client/preprocessors.py

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions libraries/converters/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import tempfile
import traceback
import urlparse
import requests
from libraries.general_tools.url_utils import download_file
from libraries.general_tools.file_utils import unzip, add_contents_to_zip, remove_tree, remove
Expand Down Expand Up @@ -142,3 +143,17 @@ def do_callback(self, url, payload):
App.logger.error('Error calling callback code {0}: {1}'.format(self.callback_status, response.reason))
else:
App.logger.error('Invalid callback url: {0}'.format(url))

def check_for_exclusive_convert(self):
convert_only = []
if self.source and len(self.source) > 0:
parsed = urlparse.urlparse(self.source)
params = urlparse.parse_qsl(parsed.query)
if params and len(params) > 0:
for i in range(0, len(params)):
item = params[i]
if item[0] == 'convert_only':
convert_only = item[1].split(',')
self.source = urlparse.urlunparse((parsed.scheme, parsed.netloc, parsed.path, '', '', ''))
break
return convert_only
5 changes: 5 additions & 0 deletions libraries/converters/md2html_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def convert_obs(self):

# find the first directory that has md files.
files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES)
convert_only_list = self.check_for_exclusive_convert()

current_dir = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file:
Expand All @@ -34,6 +35,10 @@ def convert_obs(self):

for filename in files:
if filename.endswith('.md'):
base_name = os.path.basename(filename)
if convert_only_list and (base_name not in convert_only_list): # see if this is a file we are to convert
continue

# Convert files that are markdown files
with codecs.open(filename, 'r', 'utf-8-sig') as md_file:
md = md_file.read()
Expand Down
20 changes: 3 additions & 17 deletions libraries/converters/usfm2html_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,7 @@ def convert_bible(self):

# find the first directory that has usfm files.
files = get_files(directory=self.files_dir, exclude=self.EXCLUDED_FILES)

exclusive_convert = False
convert_only = []
if self.source and len(self.source) > 0:
parsed = urlparse.urlparse(self.source)
params = urlparse.parse_qsl(parsed.query)
if params and len(params) > 0:
for i in range(0, len(params)):
item = params[i]
if item[0] == 'convert_only':
convert_only = item[1].split(',')
exclusive_convert = True
self.source = urlparse.urlunparse((parsed.scheme, parsed.netloc, parsed.path, '', '', ''))
break
convert_only_list = self.check_for_exclusive_convert()

current_dir = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(current_dir, 'templates', 'template.html')) as template_file:
Expand All @@ -48,9 +35,8 @@ def convert_bible(self):
for filename in files:
if filename.endswith('.usfm'):
base_name = os.path.basename(filename)
if exclusive_convert:
if base_name not in convert_only: # see if this is a file we are to convert
continue
if convert_only_list and (base_name not in convert_only_list): # see if this is a file we are to convert
continue

msg = 'Converting Bible USFM file: {0}'.format(base_name)
self.log.info(msg)
Expand Down
5 changes: 5 additions & 0 deletions libraries/door43_tools/project_deployer.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,11 @@ def deploy_revision_to_door43(self, build_log_key):
if 'multiple' in build_log:
multi_merge = build_log['multiple']
App.logger.debug("found multi-part merge")
key_deployed_ = download_key + '/deployed'
if App.cdn_s3_handler().key_exists(key_deployed_):
App.logger.debug("Already merged parts")
return False
self.write_data_to_file(self.temp_dir, key_deployed_, 'deployed', ' ') # flag that deploy has begun

elif 'part' in build_log:
part = build_log['part']
Expand Down
10 changes: 10 additions & 0 deletions libraries/door43_tools/templaters.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def init_template(resource_type, source_dir, output_dir, template_file):
templater = TqTemplater(resource_type, source_dir, output_dir, template_file)
elif resource_type == 'tw':
templater = TwTemplater(resource_type, source_dir, output_dir, template_file)
elif resource_type == 'tn':
templater = TnTemplater(resource_type, source_dir, output_dir, template_file)
else:
templater = Templater(resource_type, source_dir, output_dir, template_file)
return templater
Expand Down Expand Up @@ -257,6 +259,14 @@ def __init__(self, *args, **kwargs):
self.titles = index['titles']


class TnTemplater(Templater):
def __init__(self, *args, **kwargs):
super(TnTemplater, self).__init__(*args, **kwargs)
index = file_utils.load_json_object(os.path.join(self.source_dir, 'index.json'))
if index:
self.titles = index['titles']


class BibleTemplater(Templater):
def __init__(self, *args, **kwargs):
super(BibleTemplater, self).__init__(*args, **kwargs)
Expand Down
1 change: 1 addition & 0 deletions libraries/linters/linter.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def __init__(self, source_url=None, source_file=None, source_dir=None, commit_da
self.source_zip_file = source_file
self.source_dir = source_dir
self.commit_data = commit_data
self.convert_only = None

self.log = LintLogger()

Expand Down
13 changes: 11 additions & 2 deletions libraries/linters/markdown_linter.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,17 @@ def lint(self):
return True

def get_strings(self):
files = sorted(get_files(directory=self.source_dir, relative_paths=True, exclude=self.EXCLUDED_FILES,
extensions=['.md']))
if self.convert_only:
files = []
for dir in self.convert_only:
dir_path = os.path.join(self.source_dir, dir)
sub_files = sorted(get_files(directory=dir_path, relative_paths=True, exclude=self.EXCLUDED_FILES,
extensions=['.md']))
for f in sub_files:
files.append(os.path.join(dir, f))
else:
files = sorted(get_files(directory=self.source_dir, relative_paths=True, exclude=self.EXCLUDED_FILES,
extensions=['.md']))
strings = {}
for f in files:
path = os.path.join(self.source_dir, f)
Expand Down
109 changes: 109 additions & 0 deletions libraries/linters/tn_linter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
from __future__ import print_function, unicode_literals
import os
import re
import urlparse
from libraries.app.app import App
from libraries.client.preprocessors import TnPreprocessor
from libraries.general_tools import file_utils
from libraries.linters.markdown_linter import MarkdownLinter


class TnLinter(MarkdownLinter):

# match links of form '](link)'
link_marker_re = re.compile(r'\]\(([^\n()]+)\)', re.UNICODE)

def lint(self):
"""
Checks for issues with translationNotes
Expand All @@ -12,4 +21,104 @@ def lint(self):
self.source_dir is the directory of source files (.md)
:return boolean:
"""
self.source_dir = os.path.abspath(self.source_dir)
source_dirs = []
if not self.convert_only:
source_dirs = [self.source_dir]
else:
for d in self.convert_only:
source_dirs.append(os.path.join(self.source_dir, d))

for source in source_dirs:
for root, dirs, files in os.walk(source):
for f in files:
file_path = os.path.join(root, f)
parts = os.path.splitext(f)
if parts[1] == '.md':
contents = file_utils.read_file(file_path)
self.find_invalid_links(root, f, contents)

for section in TnPreprocessor.sections:
book = section['book']
file_path = os.path.join(self.source_dir, '{0}.md'.format(book))
if os.path.exists(file_path):
contents = file_utils.read_file(file_path)
App.logger.debug("Book {0} found, length={1}".format(book, len(contents)))
continue
else:
found_files = False
link = self.get_link_for_book(book)
if link == "toc":
continue # not checking for toc, that will be generated by converter
if self.convert_only and (link not in self.convert_only):
continue
file_path = os.path.join(self.source_dir, link)
for root, dirs, files in os.walk(file_path):
if root == file_path:
continue # skip book folder

if len(files) > 0:
found_files = True
break

if not found_files:
msg = "missing book: '{0}'".format(link)
self.log.warnings.append(msg)
App.logger.debug(msg)

return super(TnLinter, self).lint() # Runs checks on Markdown, using the markdown linter

def find_invalid_links(self, folder, f, contents):
for link_match in TnLinter.link_marker_re.finditer(contents):
link = link_match.group(1)
if link:
if link[:4] == 'http':
continue
if link.find('.md') < 0:
continue

file_path = os.path.join(folder, link)
file_path_abs = os.path.abspath(file_path)
exists = os.path.exists(file_path_abs)
if not exists:
a = self.get_file_link(f, folder)
msg = "{0}: contains invalid link: ({1})".format(a, link)
self.log.warnings.append(msg)
App.logger.debug(msg)

def get_file_link(self, f, folder):
parts = folder.split(self.source_dir)
sub_path = self.source_dir # default
if len(parts) == 2:
sub_path = parts[1][1:]
url = "https://git.door43.org/{0}/{1}/src/master/{2}/{3}".format(self.repo_owner, self.repo_name,
sub_path, f)
a = '<a href="{0}">{1}/{2}</a>'.format(url, sub_path, f)
return a

def get_link_for_book(self, book):
parts = book.split('-')
link = book
if len(parts) > 1:
link = parts[1].lower()
return link

def check_for_exclusive_convert(self):
self.convert_only = []
if self.source_zip_url and len(self.source_zip_url) > 0:
parsed = urlparse.urlparse(self.source_zip_url)
params = urlparse.parse_qsl(parsed.query)
if params and len(params) > 0:
for i in range(0, len(params)):
item = params[i]
if item[0] == 'convert_only':
for f in item[1].split(','):
base_name = f.split('.')[0]
parts = base_name.split('-')
if len(parts) > 1:
base_name = parts[1]
self.convert_only.append(base_name.lower())
self.source_zip_url = urlparse.urlunparse((parsed.scheme, parsed.netloc, parsed.path,
'', '', ''))
break
return self.convert_only
Binary file not shown.
80 changes: 80 additions & 0 deletions tests/client_tests/test_tn_preprocessor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from __future__ import absolute_import, unicode_literals, print_function
import os
import tempfile
import unittest
import shutil
from libraries.resource_container.ResourceContainer import RC
from libraries.client.preprocessors import do_preprocess, TnPreprocessor
from libraries.general_tools.file_utils import unzip, read_file


class TestTnPreprocessor(unittest.TestCase):

resources_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'resources')

def setUp(self):
"""Runs before each test."""
self.out_dir = ''
self.temp_dir = ""
self.save_sections = TnPreprocessor.sections

def tearDown(self):
"""Runs after each test."""
TnPreprocessor.sections = self.save_sections
# delete temp files
if os.path.isdir(self.out_dir):
shutil.rmtree(self.out_dir, ignore_errors=True)
if os.path.isdir(self.temp_dir):
shutil.rmtree(self.temp_dir, ignore_errors=True)

def test_tn_preprocessor(self):
# given
repo_name = 'en_tn'
file_name = os.path.join('raw_sources', repo_name + '.zip')
rc, repo_dir, self.temp_dir = self.extractFiles(file_name, repo_name)
repo_dir = os.path.join(repo_dir)
self.out_dir = tempfile.mkdtemp(prefix='output_')
repo_name = 'dummy_repo'

# when
results, preproc = do_preprocess(rc, repo_dir, self.out_dir)

# then
self.assertTrue(os.path.isfile(os.path.join(self.out_dir, 'index.json')))
self.assertTrue(os.path.isfile(os.path.join(self.out_dir, '00-toc.md')))
self.assertTrue(os.path.isfile(os.path.join(self.out_dir, '01-GEN.md')))
self.assertTrue(os.path.isfile(os.path.join(self.out_dir, '67-REV.md')))
index = read_file(os.path.join(self.out_dir, '00-toc.md'))
gen = read_file(os.path.join(self.out_dir, '01-GEN.md'))

def test_tn_preprocessor_dummy_section(self):
# given
TnPreprocessor.sections = [{'book': "dummy", 'title': 'dummy'}]
repo_name = 'en_tn'
file_name = os.path.join('raw_sources', repo_name + '.zip')
rc, repo_dir, self.temp_dir = self.extractFiles(file_name, repo_name)
repo_dir = os.path.join(repo_dir)
self.out_dir = tempfile.mkdtemp(prefix='output_')
repo_name = 'dummy_repo'

# when
results, preproc = do_preprocess(rc, repo_dir, self.out_dir)

# then
self.assertTrue(os.path.isfile(os.path.join(self.out_dir, '00-toc.md')))

@classmethod
def extractFiles(cls, file_name, repo_name):
file_path = os.path.join(TestTnPreprocessor.resources_dir, file_name)

# 1) unzip the repo files
temp_dir = tempfile.mkdtemp(prefix='repo_')
unzip(file_path, temp_dir)
repo_dir = os.path.join(temp_dir, repo_name)
if not os.path.isdir(repo_dir):
repo_dir = file_path

# 2) Get the resource container
rc = RC(repo_dir)

return rc, repo_dir, temp_dir
Binary file added tests/converter_tests/resources/en_tn.zip
Binary file not shown.
Loading

0 comments on commit f589303

Please sign in to comment.