Skip to content

Commit

Permalink
wmlxgettext: fix UTF-8 issue: #1785 (#1793)
Browse files Browse the repository at this point in the history
Also: -o parameter is now mandatory. STOUT redirection still possible
  • Loading branch information
AncientLich authored and CelticMinstrel committed Aug 1, 2017
1 parent cf3017b commit 3f287b9
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 52 deletions.
117 changes: 76 additions & 41 deletions utils/pywmlx/state/machine.py
Expand Up @@ -11,6 +11,7 @@
from pywmlx.state.wml_states import setup_wmlstates

import pywmlx.nodemanip
import pdb



Expand Down Expand Up @@ -335,53 +336,87 @@ def run(*, filebuf, fileref, fileno, startstate, waitwml=True):
_currentdomain = _initialdomain
pywmlx.nodemanip.newfile(fileref, fileno)
# debug_cs = startstate
for xline in filebuf:
xline = xline.strip('\n\r')
_current_lineno += 1
# on new line, debug file will write another marker
if _debugmode:
print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@',
file=_fdebug)
while xline is not None:
# print debug infos (if debugmode is on)
try:
for xline in filebuf:
xline = xline.strip('\n\r')
_current_lineno += 1
# on new line, debug file will write another marker
if _debugmode:
lno = '%05d' % _current_lineno
print('------------------------------------------------------',
print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@',
file=_fdebug)
print('LINE', lno, '|', xline, file=_fdebug)
# action number is used to know what function we should run
action = 0
v = None
m = None
if cs.regex is None:
# action = 1 --> execute state.run
action = 1
while xline is not None:
# print debug infos (if debugmode is on)
if _debugmode:
print('ALWAYS-RUN x', cs_debug, file=_fdebug)
else:
# m is match
m = re.match(cs.regex, xline)
if m:
lno = '%05d' % _current_lineno
print('---------------------------------------------------',
file=_fdebug)
print('LINE', lno, '|', xline, file=_fdebug)
# action number is used to know what function we should run
action = 0
v = None
m = None
if cs.regex is None:
# action = 1 --> execute state.run
action = 1
if _debugmode:
print('RUN state \\', cs_debug, file=_fdebug)
print('ALWAYS-RUN x', cs_debug, file=_fdebug)
else:
# action = 2 --> change to the state pointed by
# state.iffail
action = 2
if _debugmode:
print('FAIL state |', cs_debug, file=_fdebug)
if action == 1:
# xline, ns: xline --> override xline with new value
# ns --> value of next state
xline, ns = cs.run(xline, _current_lineno, m)
cs_debug = ns
cs = _states.get(ns)
else:
cs_debug = cs.iffail
cs = _states.get(cs.iffail)
# end while xline
# end for xline
# m is match
m = re.match(cs.regex, xline)
if m:
# action = 1 --> execute state.run
action = 1
if _debugmode:
print('RUN state \\', cs_debug, file=_fdebug)
else:
# action = 2 --> change to the state pointed by
# state.iffail
action = 2
if _debugmode:
print('FAIL state |', cs_debug, file=_fdebug)
if action == 1:
# xline, ns: xline --> override xline with new value
# ns --> value of next state
xline, ns = cs.run(xline, _current_lineno, m)
cs_debug = ns
cs = _states.get(ns)
else:
cs_debug = cs.iffail
cs = _states.get(cs.iffail)
# end while xline
# end for xline
except UnicodeDecodeError as e:
errpos = int(e.start) # error position on file object with UTF-8 error
errbval = hex(e.object[errpos]) # value of byte wich causes UTF-8 error
# well... when exception occurred, the _current_lineno value
# was not updated at all due to the failure of the try block.
# (it is = 0)
# this means we need to make a workaround to obtain in what line of the
# file the problem happened.
# In order to perform this task (and not only) we create a temporary
# string wich contains all the file text UNTIL the UTF-8
untilerr_buf = e.object[0:errpos] # buffer containing file text
untilerr = "".join(map(chr, untilerr_buf))
# splituntil will be a array of strings (each item is a line of text).
# the last item will show the point where the invalid UTF-8 character
# was found.
splituntil = untilerr.split('\n')
# error line is equal of lines of text until error occurs (number of
# items on splituntil string array)
errlineno = len(splituntil)
# finally we can know the actual file info
finfo = pywmlx.nodemanip.fileref + ":" + str(errlineno)
errmsg = (
"UTF-8 Format error.\nCan't decode byte " + str(errbval) + ' (' +
e.reason + ').\n' +
'Probably your file is not encoded with UTF-8 encoding: you ' +
'should open the file with an advanced text editor, and re-save ' +
'it with UTF-8 encoding.\n' +
'To avoid this problem in the future, you might want to set ' +
'the default encoding of your editor to UTF-8.\n\n' +
'Text preceding the invalid byte (source file, line ' +
str(errlineno) + '):\n' + splituntil[-1] + '\n'
)
wmlerr(finfo, errmsg)
pywmlx.nodemanip.closefile(_dictionary, _current_lineno)

24 changes: 13 additions & 11 deletions utils/wmlxgettext
Expand Up @@ -44,23 +44,29 @@ import pywmlx
def commandline(args):
parser = argparse.ArgumentParser(
description='Generate .po from WML/lua file list.',
usage='''wmlxgettext --domain=DOMAIN [--directory=START_PATH]
usage='''wmlxgettext --domain=DOMAIN -o OUTPUT_FILE
[--directory=START_PATH]
[--recursive] [--initialdomain=INITIALDOMAIN]
[--package-version=PACKAGE_VERSION]
[--no-text-colors] [--fuzzy] [--warnall] [-o OUTPUT_FILE]
[--no-text-colors] [--fuzzy] [--warnall]
FILE1 FILE2 ... FILEN'''
)
parser.add_argument(
'--version',
action='version',
version='wmlxgettext 2016.10.03.py3'
version='wmlxgettext 2017.06.25.py3'
)
parser.add_argument(
'-o',
required=True,
default=None,
dest='outfile',
help= ('Destination file. By default the output '
'will be printed on stdout')
help= ('Destination file. In some special situations you might want '
'to write the output to STDOUT instead of writing '
'an actual file (using "-o -"). On a standard usage, however, '
'you should avoid to write the output to STDOUT (or you can'
'face some issues related to text encoding '
'[**REQUIRED ARGUMENT**]')
)
parser.add_argument(
'--domain',
Expand Down Expand Up @@ -156,16 +162,12 @@ def main():
sentlist = dict()
fileno = 0
fdebug = None
if args.outfile == '-':
args.outfile = None
if args.debugmode:
fdebug = open('debug.txt', 'w', encoding='utf-8')
pywmlx.statemachine.setup(sentlist, args.initdom, args.domain,
args.warnall, fdebug)
if args.warnall is True and args.outfile is None:
pywmlx.wmlwarn('command line warning', 'Writing the output to stdout '
'(and then eventually redirect that output to a file) '
'is a deprecated usage. Please, consider to use the '
'"-o <outfile.po>" option, instead of using the '
'output redirection')
filelist = None
if args.recursive is False and args.filelist is None:
pywmlx.wmlerr("bad command line", "FILELIST must not be empty. "
Expand Down

0 comments on commit 3f287b9

Please sign in to comment.