Skip to content

Commit

Permalink
Fix for win32 multiprocessing with pyinstaller
Browse files Browse the repository at this point in the history
  • Loading branch information
virantha committed Feb 18, 2016
1 parent a868be2 commit a18a94c
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 1 deletion.
7 changes: 7 additions & 0 deletions pypdfocr/pypdfocr.py
Expand Up @@ -25,6 +25,12 @@
from PIL import Image
import yaml

import multiprocessing
# Replace the Popen routine to allow win32 pyinstaller to build
from multiprocessing import forking
from pypdfocr_multiprocessing import _Popen
forking.Popen = _Popen

from pypdfocr_pdf import PyPdf
from pypdfocr_tesseract import PyTesseract
from pypdfocr_gs import PyGs
Expand Down Expand Up @@ -452,6 +458,7 @@ def _convert_and_file_email(self, pdf_filename):
self._send_email(pdf_filename, ocr_pdffilename, filing)

def main(): # pragma: no cover
multiprocessing.freeze_support()
script = PyPDFOCR()
script.go(sys.argv[1:])

Expand Down
56 changes: 56 additions & 0 deletions pypdfocr/pypdfocr_multiprocessing.py
@@ -0,0 +1,56 @@
#!/usr/bin/env python2.7
# Copyright 2013 Virantha Ekanayake All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys, os, multiprocessing.forking
import logging

""" Special work-around to support multiprocessing and pyinstaller --onefile on windows systms
https://github.com/pyinstaller/pyinstaller/wiki/Recipe-Multiprocessing
"""

import multiprocessing.forking as forking
import os
import sys

class _Popen(multiprocessing.forking.Popen):
def __init__(self, *args, **kw):
if hasattr(sys, 'frozen'):
# We have to set original _MEIPASS2 value from sys._MEIPASS
# to get --onefile mode working.
os.putenv('_MEIPASS2', sys._MEIPASS)
try:
super(_Popen, self).__init__(*args, **kw)
finally:
if hasattr(sys, 'frozen'):
# On some platforms (e.g. AIX) 'os.unsetenv()' is not
# available. In those cases we cannot delete the variable
# but only set it to the empty string. The bootloader
# can handle this case.
if hasattr(os, 'unsetenv'):
os.unsetenv('_MEIPASS2')
else:
os.putenv('_MEIPASS2', '')

forking.Popen = _Popen

#class Process(multiprocessing.Process):
#_Popen = _Popen

# ...

if __name__ == '__main__':
# On Windows calling this function is necessary.
multiprocessing.freeze_support()
6 changes: 6 additions & 0 deletions pypdfocr/pypdfocr_pdffiler.py
Expand Up @@ -26,6 +26,7 @@

from PyPDF2 import PdfFileReader
from pypdfocr_filer import PyFiler
from pypdfocr_filer_dirs import PyFilerDirs

class PyPdfFiler(object):
def __init__(self, filer):
Expand Down Expand Up @@ -72,3 +73,8 @@ def move_to_matching_folder(self, filename):
tgt_file = self.filer.move_to_matching_folder(filename, tgt_folder)
return tgt_file

if __name__ == '__main__':
p = PyPdfFiler(PyFilerDirs())
for page_text in p.iter_pdf_page_text("scan_ocr.pdf"):
print (page_text)

4 changes: 3 additions & 1 deletion pypdfocr/pypdfocr_tesseract.py
Expand Up @@ -24,8 +24,10 @@
import subprocess
import glob
from subprocess import CalledProcessError

from multiprocessing import Pool


def error(text):
print("ERROR: %s" % text)
sys.exit(-1)
Expand Down Expand Up @@ -129,8 +131,8 @@ def make_hocr_from_pnms(self, fns):

# Glob it
#fns = glob.glob(img_filename)
logging.debug("Making pool for tesseract")
pool = Pool(processes=self.threads)
print("Making pool")
hocr_filenames = pool.map(unwrap_self, zip([self]*len(fns), fns))
pool.close()
pool.join()
Expand Down

0 comments on commit a18a94c

Please sign in to comment.