Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
phillip-hopper committed Aug 8, 2016
0 parents commit fb20775
Show file tree
Hide file tree
Showing 8 changed files with 250 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
.idea/
*.iml
24 changes: 24 additions & 0 deletions LICENSE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Files within this repository are released under the MIT License.
Contributors are listed at the top of each file.

Copyright (c) 2016 unfoldingWord

http://creativecommons.org/licenses/MIT/

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# unfoldingWord Transform markdown to HTML

A library of Python scripts to convert a git repository into publishable text.

All code should be compatible with both Python 2.7 and 3.5.

__To use this library, install it in your Python environment like this:__

pip install git+git://github.com/unfoldingWord-dev/tx-md2html.git#egg=tx-md2html

__To install a particular version (tag, branch or commit) use this:__

pip install git+git://github.com/unfoldingWord-dev/tx-md2html.git@Tag-Branch-or-Commit#egg=tx-md2html
Empty file added obs/__init__.py
Empty file.
12 changes: 12 additions & 0 deletions obs/template.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Open Bible Stories</title>
</head>
<body>
<div class="obs-content">

</div>
</body>
</html>
130 changes: 130 additions & 0 deletions obs/transform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
from __future__ import print_function, unicode_literals
import argparse
import codecs
import inspect
import os
import re
import shutil
import sys
import tempfile
from contextlib import closing

import markdown
from general_tools.file_utils import unzip, load_json_object, make_dir, write_file
from general_tools.print_utils import print_error, print_warning, print_ok
from general_tools.url_utils import join_url_parts, download_file


class TransformOBS(object):

dir_re = re.compile(r'(<div\s.*?class=".*?obs-content.*?">).*?(</div>)', re.UNICODE + re.DOTALL)

def __init__(self, source_repo_url, output_directory):

if 'git.door43.org' not in source_repo_url:
print_warning('Currently only git.door43.org repositories are supported.')
sys.exit(0)

self.temp_dir = ''
self.errors = []
self.source_repo_url = source_repo_url
self.output_directory = output_directory

def close(self):
# delete temp files
if os.path.isdir(self.temp_dir):
shutil.rmtree(self.temp_dir, ignore_errors=True)

def run(self):

try:
self.temp_dir = tempfile.mkdtemp(prefix='txOBS_')

# clean up the git repo url
if self.source_repo_url[-4:] == '.git':
self.source_repo_url = self.source_repo_url[:-4]

if self.source_repo_url[-1:] == '/':
self.source_repo_url = self.source_repo_url[:-1]

# download the archive
file_to_download = join_url_parts(self.source_repo_url, 'archive/master.zip')
repo_dir = self.source_repo_url.rpartition('/')[2]
downloaded_file = os.path.join(self.temp_dir, repo_dir + '.zip')
try:
print('Downloading {0}...'.format(file_to_download), end=' ')
if not os.path.isfile(downloaded_file):
download_file(file_to_download, downloaded_file)
finally:
print('finished.')

# unzip the archive
try:
print('Unzipping...'.format(downloaded_file), end=' ')
unzip(downloaded_file, self.temp_dir)
finally:
print('finished.')

# get the manifest
try:
print('Reading the manifest...', end=' ')
manifest = load_json_object(os.path.join(self.temp_dir, 'manifest.json'))
finally:
print('finished.')

# create output directory
make_dir(self.output_directory)

# read the markdown files and output html files
try:
print('Processing the OBS markdown files')
files_to_process = []
for i in range(1, 51):
files_to_process.append(str(i).zfill(2) + '.md')

current_dir = os.path.dirname(inspect.stack()[0][1])
with codecs.open(os.path.join(current_dir, 'template.html'), 'r', 'utf-8-sig') as html_file:
html_template = html_file.read()

for file_to_process in files_to_process:

# read the markdown file
file_name = os.path.join(self.temp_dir, repo_dir, 'content', file_to_process)
with codecs.open(file_name, 'r', 'utf-8-sig') as md_file:
md = md_file.read()

html = markdown.markdown(md)
html = TransformOBS.dir_re.sub(r'\1\n' + html + r'\n\2', html_template)
write_file(os.path.join(self.output_directory, file_to_process.replace('.md', '.html')), html)

except IOError as ioe:
print_error('{0}: {1}'.format(ioe.strerror, ioe.filename))
self.errors.append(ioe)

except Exception as e:
print_error(e.message)
self.errors.append(e)

finally:
print('finished.')

except Exception as e:
print_error(e.message)
self.errors.append(e)


if __name__ == '__main__':
print()
parser = argparse.ArgumentParser(description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter)
parser.add_argument('-r', '--gitrepo', dest='gitrepo', default=False,
required=True, help='Git repository where the source can be found.')
parser.add_argument('-o', '--outdir', dest='outdir', default=False,
required=True, help='The output directory for markdown files.')

args = parser.parse_args(sys.argv[1:])

# call with closing to be sure the temp files get cleaned up
with closing(TransformOBS(args.gitrepo, args.outdir)) as tx:
tx.run()

print_ok('ALL FINISHED: ', 'Please check the output directory.')
24 changes: 24 additions & 0 deletions register/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from __future__ import print_function, unicode_literals
import requests


def register():
post_url = 'https://api.door43.org/tx/module'
post_data = {'name': 'md2html',
'version': '1',
'type': 'conversion',
'resource_types': ['obs'],
'input_format': ['md'],
'output_format': ['html'],
'options': [],
'private_links': [],
'public_links': []}

response = requests.post(post_url, data=post_data)

if response.ok:
print('Registered successfully.')


if __name__ == '__main__':
register()
45 changes: 45 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import os
from setuptools import setup
from setuptools.command.install import install


# Utility function to read the README file.
# Used for the long_description. It's nice, because now 1) we have a top level
# README file and 2) it's easier to type in the README file than to put a raw
# string in below ...
def read(f_name):
return open(os.path.join(os.path.dirname(__file__), f_name)).read()


class PostInstallCommand(install):
"""
Register this converter with tX-manager
"""
def run(self):
from register import register
register()

install.run(self)


setup(
name="tx-md2html",
version="0.0.1",
author="unfoldingWord",
author_email="unfoldingword.org",
description="A library of Python scripts to convert a git repository into publishable text.",
license="MIT",
keywords="unfoldingWord publish",
url="https://github.org/unfoldingWord-dev/tx-md2html",
packages=['register', 'obs'],
long_description=read('README.md'),
classifiers=[],
requires=[
'markdown',
'requests',
'git+git://github.com/unfoldingWord-dev/uw_tools.git#egg=uw_tools'
],
cmdclass={
'install': PostInstallCommand
}
)

0 comments on commit fb20775

Please sign in to comment.