Permalink
Browse files

[offline-renderer] attempt to convert %xx

Only convert if they are all valid utf-8 sequences in a particular
redirect

Signed-off-by: Christopher Hall <hsw@openmoko.com>
  • Loading branch information...
1 parent 275692b commit 0233f7bc4676aa340fe21abbe992ab1a0b05f2e3 @hxw hxw committed Mar 30, 2010
Showing with 10 additions and 0 deletions.
  1. +10 −0 host-tools/offline-renderer/ArticleIndex.py
View
10 host-tools/offline-renderer/ArticleIndex.py
@@ -10,6 +10,7 @@
import os, sys, re
import struct
import littleparser
+import urllib
import getopt
import os.path
import time
@@ -386,6 +387,15 @@ def redirect(self, category, key, title, rcategory, rkey, rtitle, seek):
title = self.translate(title).strip(u'\u200e\u200f')
rtitle = self.translate(rtitle).strip().strip(u'\u200e\u200f')
+
+ # redirected title may contain '%xx' items - treat as unicode sequence
+ # if it fails just keep the %xx sequences intact since it must represent
+ # either real %xx or some unknowable coding scheme
+ try:
+ rtitle = unicode(urllib.unquote(rtitle.encode('utf-8')), 'utf-8').strip().strip(u'\u200e\u200f')
+ except UnicodeDecodeError:
+ pass
+
rtitle = whitespaces.sub(' ', rtitle).strip().lstrip(':')
if self.KEY_TEMPLATE == key:

0 comments on commit 0233f7b

Please sign in to comment.