-
Notifications
You must be signed in to change notification settings - Fork 10
/
service.py
90 lines (79 loc) · 2.95 KB
/
service.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import subprocess
import tempfile
import logging
import urllib
from flask import Flask, request, redirect, url_for, abort, send_file
from werkzeug import secure_filename
UPLOAD_FOLDER = '/uploads'
ALLOWED_EXTENSIONS = set(['pdf'])
app = Flask(__name__)
app.config.from_pyfile('config.py')
logging.getLogger().setLevel(logging.INFO)
@app.route('/')
def hello_world():
return 'Hello World!'
# Note that the original from AIT did this: --embed-font 0 --process-outline 0
# The former was to reduce size, the latter to avoid showing the outline (which takes up a lot of space on screen)
def run_pdftohtmlex(url, first_page="1", last_page = None):
# Cache to temp file:
in_f = tempfile.NamedTemporaryFile(suffix=".pdf", delete=False)
urllib.urlretrieve(url, in_f.name)
# TODO Check file exists etc.
out_f = tempfile.NamedTemporaryFile(suffix=".html", delete=False)
out_d, out_name = os.path.split(out_f.name)
# run process
if last_page:
cmd = ['pdf2htmlEX', '--process-outline', '0', '--first-page', first_page,'--last-page', last_page, '--dest-dir', "%s/" % out_d, in_f.name, out_name]
else:
cmd = ['pdf2htmlEX', '--process-outline', '0', '--dest-dir', "%s/" % out_d, in_f.name, out_name]
logging.debug("Running: %s" % cmd )
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, err = p.communicate()
if( out ):
logging.debug("pdf2htmlEX STDOUT %s" % out)
if( err ):
logging.debug("pdf2htmlEX STDERR: %s" % err)
# return the file
return out_f.name
@app.route('/convert')
def convert():
url = request.args.get('url')
if not url:
return abort(400)
first_page = request.args.get('first_page')
last_page = request.args.get('last_page')
# Process it:
logging.debug('URL is: %s' % url)
if last_page:
if not first_page:
first_page = "1"
result = run_pdftohtmlex(url, first_page, last_page)
else:
result = run_pdftohtmlex(url)
return send_file(result,attachment_filename="testing.html",
as_attachment=False)
def allowed_file(filename):
return '.' in filename and \
filename.rsplit('.', 1)[1] in ALLOWED_EXTENSIONS
@app.route('/upload', methods=['GET', 'POST'])
def upload_file():
if request.method == 'POST':
file = request.files['file']
if file and allowed_file(file.filename):
filename = secure_filename(file.filename)
file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
return redirect(url_for('uploaded_file',
filename=filename))
return '''
<!doctype html>
<title>Upload new File</title>
<h1>Upload new File</h1>
<form action="" method=post enctype=multipart/form-data>
<p><input type=file name=file>
<input type=submit value=Upload>
</form>
'''
if __name__ == '__main__':
app.debug = True
app.run(host='0.0.0.0')