Skip to content

Commit

Permalink
[offline-renderer] prevent infinite loop on malformed list
Browse files Browse the repository at this point in the history
A set of <li> tags outside a list were indenting until there was zero
space left to wrap the words.

Possibly some template is creating these as they look like:
  <li style="-moz-float-edge: content-box">
and are outside of <ul> and have no corresponding </li>

article in question was:
  2007 in the Philippines

Present solution is just to ignore these

Signed-off-by: Christopher Hall <hsw@openmoko.com>
  • Loading branch information
hxw committed Dec 25, 2009
1 parent 9c25585 commit 6cd2333
Showing 1 changed file with 37 additions and 25 deletions.
62 changes: 37 additions & 25 deletions host-tools/offline-renderer/ArticleRenderer.py
Expand Up @@ -624,19 +624,20 @@ def handle_starttag(self, tag, attrs):
self.enter_list(tag)

elif tag == 'li':
try:
self.li_cnt[self.level] += 1
except KeyError:
(line, column) = self.getpos()
if 0 == self.level:
if warnings:
(line, column) = self.getpos()
PrintLog.message('Warning: stray </%s> @[L%d/C%d] in article[%d]: %s' %
('<li>', line, column, article_count + 1, g_this_article_title))
# force ul since this is a li without a parent
(t, p) = self.tag_stack.pop()
self.tag_stack.append(('ul', p))
self.tag_stack.append((t,p))
self.enter_list('ul')
self.li_cnt[self.level] += 1
return # just ignore it
# force ul since this is a li without a parent
#(t, p) = self.tag_stack.pop()
#self.tag_stack.append(('ul', p))
#self.tag_stack.append((t,p))
#self.enter_list('ul')

self.li_cnt[self.level] += 1

if self.li_type[self.level] == 'ol':
self.wordwrap.append(("%d" % self.li_cnt[self.level]) + u".", DEFAULT_FONT_IDX, None)
Expand Down Expand Up @@ -823,6 +824,7 @@ def list_increase_indent(self):
def leave_list(self):
self.flush_buffer()
esc_code0(LIST_MARGIN_TOP)
del self.li_cnt[self.level]
if self.level > 0:
self.level -= 1

Expand Down Expand Up @@ -988,13 +990,12 @@ def write_article():
article_count += 1
if verbose:
PrintLog.message("[MWR %d] %s" % (article_count, g_this_article_title))
sys.stdout.flush()

elif article_count % 1000 == 0:
now_time = time.time()
PrintLog.message("Render[%d]: %7.2fs %10d" % (file_number, now_time - start_time, article_count))
start_time = now_time


output.flush()

# create link
Expand All @@ -1017,26 +1018,37 @@ def write_article():
literalContextBits = 3,
literalPosBits = 0, posBits = 2, algorithm = 1, eos = 1)
f_out.write(body)
write_article_index(file_offset, len(body))
else:
f_out.write(header)
f_out.write(links)
f_out.write(body)

f_out.flush()
output.truncate(0)
if compress:

try:
(article_number, fnd_offset, restricted) = article_index(g_this_article_title)
data_offset = (file_offset & 0x7fffffff)

if bool(int(restricted)): # '0' is True so turn it into False
data_offset |= 0x80000000
data_length = (0x80 << 24) | (file_number << 24) | len(body) # 0x80 => lzma encoding
i_out.write(struct.pack('III', data_offset, fnd_offset, data_length))
except KeyError:
PrintLog.message('Error in: write_article, Title not found')
PrintLog.message('Title: %s' % g_this_article_title)
PrintLog.message('Offset: %s' % file_offset)
PrintLog.message('Count: %s' % article_count)

def write_article_index(file_offset, length):
global verbose
global output, f_out, i_out
global g_this_article_title
global file_number

try:
(article_number, fnd_offset, restricted) = article_index(g_this_article_title)
data_offset = (file_offset & 0x7fffffff)

if bool(int(restricted)): # '0' is True so turn it into False
data_offset |= 0x80000000
data_length = (0x80 << 24) | (file_number << 24) | length # 0x80 => lzma encoding
i_out.write(struct.pack('III', data_offset, fnd_offset, data_length))
i_out.flush()
except KeyError:
PrintLog.message('Error in: write_article, Title not found')
PrintLog.message('Title: %s' % g_this_article_title)
PrintLog.message('Offset: %s' % file_offset)
PrintLog.message('Count: %s' % article_count)


# run the program
if __name__ == "__main__":
Expand Down

0 comments on commit 6cd2333

Please sign in to comment.