Skip to content
This repository has been archived by the owner on May 22, 2024. It is now read-only.

Commit

Permalink
Feature 8/add tq to md2html conversion (#185)
Browse files Browse the repository at this point in the history
* Adding tQ support.

* TqPreprocessor & TestTqPreprocessor - Adding tQ support.

* TqLinter & TestMd2HtmlConverter - Adding tQ support.

* merge changes from tw

* TqLinter - fix link urls

* TestMd2HtmlConverter - finished test_tq()

* ProjectDeployerTests - added test_tq_deploy_revision_to_door43()

* TqPreprocessor - fix links

* TqPreprocessor - adding index.json

* TqPreprocessor - fix for generating index.json

* TqPreprocessor - added chapter numbers for books

* TqPreprocessor - fixed header level of book title.  Added book title before chapter headers.

* TqPreprocessor - Added chunk headers.

* TestConversions - Added tq test.
TqLinter - removed unnecessary internal link validation.

* TqPreprocessor & TqLinter- added warning on missing files

* TqPreprocessor - added toc

* TqPreprocessor - added toc

* TqPreprocessor - added toc

* TqLinter & TestTqLinter - improving missing book testing

* TqPreprocessor & TestTqPreprocessor - removed passing repo name
  • Loading branch information
PhotoNomad0 authored and richmahn committed Oct 17, 2017
1 parent 982bcdd commit 25caedc
Show file tree
Hide file tree
Showing 18 changed files with 599 additions and 28 deletions.
2 changes: 1 addition & 1 deletion functions/convert_md2html/module.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"name": "md2html",
"version": "2",
"type": "converter",
"resource_types": ["obs", "ta"],
"resource_types": ["obs", "ta", "tq"],
"input_format": ["md"],
"output_format": ["html"],
"options": [],
Expand Down
30 changes: 15 additions & 15 deletions libraries/client/client_webhook.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,15 +266,15 @@ def upload_build_log_to_s3(self, build_log, s3_commit_key, part=''):
def create_build_log(self, commit_id, commit_message, commit_url, compare_url, job, pusher_username, repo_name,
repo_owner):
"""
:param string commit_id:
:param string commit_message:
:param string commit_url:
:param string compare_url:
:param TxJob job:
:param string pusher_username:
:param string repo_name:
:param string repo_owner:
:return dict:
:param string commit_id:
:param string commit_message:
:param string commit_url:
:param string compare_url:
:param TxJob job:
:param string pusher_username:
:param string repo_name:
:param string repo_owner:
:return dict:
"""
build_log_json = dict(job)
build_log_json['repo_name'] = repo_name
Expand Down Expand Up @@ -345,7 +345,7 @@ def send_request_to_converter(self, job, converter):
"""
:param TxJob job:
:param TxModule converter:
:return bool:
:return bool:
"""
payload = {
'identifier': job.identifier,
Expand Down Expand Up @@ -470,18 +470,18 @@ def get_converter_module(self, job):
:param TxJob job:
:return TxModule:
"""
return TxModule.query().filter(TxModule.type=='converter')\
.filter(TxModule.input_format.contains(job.input_format))\
.filter(TxModule.output_format.contains(job.output_format))\
.filter(TxModule.resource_types.contains(job.resource_type))\
return TxModule.query().filter(TxModule.type=='converter') \
.filter(TxModule.input_format.contains(job.input_format)) \
.filter(TxModule.output_format.contains(job.output_format)) \
.filter(TxModule.resource_types.contains(job.resource_type)) \
.first()

def get_linter_module(self, job):
"""
:param TxJob job:
:return TxModule:
"""
linters = TxModule.query().filter(TxModule.type=='linter')\
linters = TxModule.query().filter(TxModule.type=='linter') \
.filter(TxModule.input_format.contains(job.input_format))
linter = linters.filter(TxModule.resource_types.contains(job.resource_type)).first()
if not linter:
Expand Down
243 changes: 243 additions & 0 deletions libraries/client/preprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import re
from glob import glob
from shutil import copy
from libraries.app.app import App
from libraries.door43_tools.bible_books import BOOK_NUMBERS
from libraries.general_tools.file_utils import write_file, read_file
from libraries.resource_container.ResourceContainer import RC
Expand All @@ -11,12 +12,19 @@

def do_preprocess(rc, repo_dir, output_dir):
if rc.resource.identifier == 'obs':
App.logger.debug("do_preprocess: using ObsPreprocessor")
preprocessor = ObsPreprocessor(rc, repo_dir, output_dir)
elif rc.resource.identifier in BIBLE_RESOURCE_TYPES:
App.logger.debug("do_preprocess: using BiblePreprocessor")
preprocessor = BiblePreprocessor(rc, repo_dir, output_dir)
elif rc.resource.identifier == 'ta':
App.logger.debug("do_preprocess: using TaPreprocessor")
preprocessor = TaPreprocessor(rc, repo_dir, output_dir)
elif rc.resource.identifier == 'tq':
App.logger.debug("do_preprocess: using TqPreprocessor")
preprocessor = TqPreprocessor(rc, repo_dir, output_dir)
else:
App.logger.debug("do_preprocess: using Preprocessor")
preprocessor = Preprocessor(rc, repo_dir, output_dir)
return preprocessor.run(), preprocessor

Expand Down Expand Up @@ -63,10 +71,13 @@ def run(self):
else:
# Case #3: The project path is multiple chapters, so we piece them together
chapters = self.rc.chapters(project.identifier)
App.logger.debug("Merging chapters in '{0}'".format(project.identifier))
if len(chapters):
text = ''
for chapter in chapters:
text = self.mark_chapter(project.identifier, chapter, text)
for chunk in self.rc.chunks(project.identifier, chapter):
text = self.mark_chunk(project.identifier, chapter, chunk, text)
text += read_file(os.path.join(project_path, chapter, chunk))+"\n\n"
if project.identifier.lower() in BOOK_NUMBERS:
filename = '{0}-{1}.{2}'.format(BOOK_NUMBERS[project.identifier.lower()],
Expand All @@ -77,6 +88,12 @@ def run(self):
write_file(os.path.join(self.output_dir, filename), text)
return True

def mark_chapter(self, ident, chapter, text):
return text # default does nothing to text

def mark_chunk(self, ident, chapter, chunk, text):
return text # default does nothing to text

def is_multiple_jobs(self):
return False

Expand Down Expand Up @@ -415,3 +432,229 @@ def fix_links(self, content):
content = re.sub(r'([^A-Z0-9"(/])(www\.[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](http://\2)',
content, flags=re.IGNORECASE)
return content


class TqPreprocessor(Preprocessor):
sections = [
{'book': "00-toc", 'title': 'Table of Contents'},
{'book': "01-GEN", 'title': 'Genesis'},
{'book': "02-EXO", 'title': 'Exodus'},
{'book': "03-LEV", 'title': 'Leviticus'},
{'book': "04-NUM", 'title': 'Numbers'},
{'book': "05-DEU", 'title': 'Deuteronomy'},
{'book': "06-JOS", 'title': 'Joshua'},
{'book': "07-JDG", 'title': 'Judges'},
{'book': "08-RUT", 'title': 'Ruth'},
{'book': "09-1SA", 'title': '1 Samuel'},
{'book': "10-2SA", 'title': '2 Samuel'},
{'book': "11-1KI", 'title': '1 Kings'},
{'book': "12-2KI", 'title': '2 Kings'},
{'book': "13-1CH", 'title': '1 Chronicles'},
{'book': "14-2CH", 'title': '2 Chronicles'},
{'book': "15-EZR", 'title': 'Ezra'},
{'book': "16-NEH", 'title': 'Nehemiah'},
{'book': "17-EST", 'title': 'Esther'},
{'book': "18-JOB", 'title': 'Job'},
{'book': "19-PSA", 'title': 'Psalms'},
{'book': "20-PRO", 'title': 'Proverbs'},
{'book': "21-ECC", 'title': 'Ecclesiastes'},
{'book': "22-SNG", 'title': 'Song of Solomon'},
{'book': "23-ISA", 'title': 'Isaiah'},
{'book': "24-JER", 'title': 'Jeremiah'},
{'book': "25-LAM", 'title': 'Lamentations'},
{'book': "26-EZK", 'title': 'Ezekiel'},
{'book': "27-DAN", 'title': 'Daniel'},
{'book': "28-HOS", 'title': 'Hosea'},
{'book': "29-JOL", 'title': 'Joel'},
{'book': "30-AMO", 'title': 'Amos'},
{'book': "31-OBA", 'title': 'Obadiah'},
{'book': "32-JON", 'title': 'Jonah'},
{'book': "33-MIC", 'title': 'Micah'},
{'book': "34-NAM", 'title': 'Nahum'},
{'book': "35-HAB", 'title': 'Habakkuk'},
{'book': "36-ZEP", 'title': 'Zephaniah'},
{'book': "37-HAG", 'title': 'Haggai'},
{'book': "38-ZEC", 'title': 'Zechariah'},
{'book': "39-MAL", 'title': 'Malachi'},
{'book': "41-MAT", 'title': 'Matthew'},
{'book': "42-MRK", 'title': 'Mark'},
{'book': "43-LUK", 'title': 'Luke'},
{'book': "44-JHN", 'title': 'John'},
{'book': "45-ACT", 'title': 'Acts'},
{'book': "46-ROM", 'title': 'Romans'},
{'book': "47-1CO", 'title': '1 Corinthians'},
{'book': "48-2CO", 'title': '2 Corinthians'},
{'book': "49-GAL", 'title': 'Galatians'},
{'book': "50-EPH", 'title': 'Ephesians'},
{'book': "51-PHP", 'title': 'Philippians'},
{'book': "52-COL", 'title': 'Colossians'},
{'book': "53-1TH", 'title': '1 Thessalonians'},
{'book': "54-2TH", 'title': '2 Thessalonians'},
{'book': "55-1TI", 'title': '1 Timothy'},
{'book': "56-2TI", 'title': '2 Timothy'},
{'book': "57-TIT", 'title': 'Titus'},
{'book': "58-PHM", 'title': 'Philemon'},
{'book': "59-HEB", 'title': 'Hebrews'},
{'book': "60-JAS", 'title': 'James'},
{'book': "61-1PE", 'title': '1 Peter'},
{'book': "62-2PE", 'title': '2 Peter'},
{'book': "63-1JN", 'title': '1 John'},
{'book': "64-2JN", 'title': '2 John'},
{'book': "65-3JN", 'title': '3 John'},
{'book': "66-JUD", 'title': 'Jude'},
{'book': "67-REV", 'title': 'Revelation'},
]

def __init__(self, *args, **kwargs):
super(TqPreprocessor, self).__init__(*args, **kwargs)
self.section_container_id = 1
self.toc = ''
self.index_json = None
self.section_header_marker = '###############'

def mark_chapter(self, ident, chapter, text):
a = '{0} {1}\n\n'.format(self.section_header_marker, chapter) # put in invalid header for section - we will correct heading level later
return text + a

def mark_chunk(self, ident, chapter, chunk, text):
chunk_marker = os.path.splitext(chunk)[0]
a = '{0}# {1}:{2}\n\n'.format(self.section_header_marker, chapter, chunk_marker) # put in invalid header for section - we will correct heading level later
return text + a

def compile_section(self, title, link, content):
"""
Recursive section markdown creator
:param content:
:param link:
:param title:
:return:
"""
level = 3
markdown = ''
level_increase = ('#' * level)
markdown += '{0} <a id="{1}"/>{2}\n\n'.format('#' * (level-2), link, title) # add book title
content = content.replace('\r', '')
lines = content.split('\n')
section_header_length = len(self.section_header_marker)
for i in range(0, len(lines)):
line = lines[i]
if line[:section_header_length] == self.section_header_marker:
text = line[section_header_length:]
if text[0] == '#': # check if chunk marker
line = level_increase + ' ' + title + text[1:] # fix header level and add title
else: # chapter marker
line = '#' * (level-1) + ' ' + title + text # fix header level and add title
lines[i] = line
elif line and (line[0] == '#'):
if line.rstrip()[-1] == '#':
line = level_increase + line.rstrip() + level_increase
else:
line = level_increase + line
lines[i] = line
content = '\n'.join(lines)
markdown += content + '\n\n---\n\n' # horizontal rule
return markdown

def run(self):
super(TqPreprocessor, self).run()
self.toc = None
projects = {}
self.index_json = {
'titles': {},
'chapters': {},
'book_codes': {}
}
for idx, project in enumerate(self.rc.projects):
section = self.get_section_for_file(project.identifier)
if section:
link = self.get_link_for_section(section)
book = section['book']
if not self.toc:
self.toc = '# Table of Contents:\n\n'
projects[book] = {
'link': link,
}
else:
App.logger.debug('TqPreprocessor: extra project found: {0}'.format(project.identifier))

for section in TqPreprocessor.sections: # index by book order
book = section['book']
if book in projects:
file = os.path.join(self.output_dir, book + '.md')
link = self.get_link_for_section(section)
book = section['book']
title = section['title']
if not os.path.exists(file):
App.logger.debug('TqPreprocessor: book missing: {0}'.format(book))
continue
initial_markdown = read_file(file)
markdown = self.compile_section(title, link, initial_markdown)
markdown = self.fix_links(markdown, book)
if initial_markdown != markdown:
write_file(file, markdown)
self.toc += '* [{1}](./{0}.html)\n'.format(book, title)
self.index_json['titles'][book + '.html'] = title
else:
App.logger.debug('TqPreprocessor: missing book: {0}'.format(book))

self.toc = self.fix_links(self.toc, '-')
output_file = os.path.join(self.output_dir, '00-toc.md')
write_file(output_file, self.toc)
self.index_json['titles']['00-toc.html'] = 'Table of Contents'
output_file = os.path.join(self.output_dir, 'index.json')
write_file(output_file, self.index_json)

# Copy the toc and config.yaml file to the output dir so they can be used to
# generate the ToC on live.door43.org
toc_file = os.path.join(self.source_dir, project.path, 'toc.yaml')
if os.path.isfile(toc_file):
copy(toc_file, os.path.join(self.output_dir, 'toc.yaml'))
config_file = os.path.join(self.source_dir, project.path, 'config.yaml')
if os.path.isfile(config_file):
copy(config_file, os.path.join(self.output_dir, 'config.yaml'))
return True

def fix_links(self, content, section_link):
if not content:
return content

# convert RC links, e.g. rc://en/tn/help/1sa/16/02 => https://git.door43.org/Door43/en_tn/1sa/16/02.md
content = re.sub(r'rc://([^/]+)/([^/]+)/([^/]+)/([^\s)\]\n$]+)',
r'https://git.door43.org/{0}/\1_\2/src/master/\4.md'.format(self.rc.repo_name), content,
flags=re.IGNORECASE)
# fix links to other sections within the same manual (only one ../ and a section name that matches section_link)
# e.g. [covenant](../kt/covenant.md) => [covenant](#covenant)
pattern = r'\]\(\.\.\/{0}\/([^/]+).md\)'.format(section_link)
content = re.sub(pattern, r'](#\1)', content)
# fix links to other sections within the same manual (only one ../ and a section name)
# e.g. [commit](../other/commit.md) => [commit](other.html#commit)
for section in TqPreprocessor.sections:
link = self.get_link_for_section(section)
pattern = re.compile(r'\]\(\.\./{0}/([^/]+).md\)'.format(link))
replace = r']({0}.html#\1)'.format(link)
content = re.sub(pattern, replace, content)
# fix links to other sections that just have the section name but no 01.md page (preserve http:// links)
# e.g. See [Verbs](figs-verb) => See [Verbs](#figs-verb)
content = re.sub(r'\]\(([^# :/)]+)\)', r'](#\1)', content)
# convert URLs to links if not already
content = re.sub(r'([^"(])((http|https|ftp)://[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](\2)',
content, flags=re.IGNORECASE)
# URLS wth just www at the start, no http
content = re.sub(r'([^A-Z0-9"(/])(www\.[A-Z0-9/?&_.:=#-]+[A-Z0-9/?&_:=#-])', r'\1[\2](http://\2)',
content, flags=re.IGNORECASE)
return content

def get_section_for_file(self, id):
id = id.lower()
for section in TqPreprocessor.sections:
if (id == section['book'].lower()) or (id == self.get_link_for_section(section)):
return section
return None

def get_link_for_section(self, section):
link = section['book']
parts = link.split('-')
if len(parts) > 1:
link = parts[1].lower()
return link
3 changes: 2 additions & 1 deletion libraries/converters/converter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from __future__ import print_function, unicode_literals
import json
import os
import tempfile
import traceback
Expand Down Expand Up @@ -132,7 +133,7 @@ def do_callback(self, url, payload):
if url.startswith('http'):
headers = {"content-type": "application/json"}
App.logger.debug('Making callback to {0} with payload:'.format(url))
App.logger.debug(payload)
App.logger.debug(json.dumps(payload)[:256])
response = requests.post(url, json=payload, headers=headers)
self.callback_status = response.status_code
if (self.callback_status >= 200) and (self.callback_status < 299):
Expand Down
2 changes: 1 addition & 1 deletion libraries/converters/md2html_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def convert_markdown(self):
# Convert files that are markdown files
with codecs.open(filename, 'r', 'utf-8-sig') as md_file:
md = md_file.read()
if self.resource == 'ta':
if self.resource in ['ta']:
html = markdown2.markdown(md, extras=['markdown-in-html', 'tables'])
else:
html = markdown.markdown(md)
Expand Down
Loading

0 comments on commit 25caedc

Please sign in to comment.