Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Read marks as unicode

This is the meat of Christian's all-unicode patch. It comes after
the templates, but it must be pulled forward, because it is impossible
to tinker with the templates using the available mark corpii, which
contain international tags and titles.

Clearly it's a bug: we write marks using UTF-8 writer, so we must
read them using UTF-8 reader. The current code miraclously works by
using safestr(), but the dictionaries of templates mix everything up,
thus making the problem apparent.
  • Loading branch information...
commit d2881c702df8eadf0ffa25832f025dd6eb9ecfa2 1 parent b68f910
@zaitcev authored
Showing with 13 additions and 5 deletions.
  1. +6 −0 slasti/__init__.py
  2. +1 −1  slasti/main.py
  3. +6 −4 slasti/tagbase.py
View
6 slasti/__init__.py
@@ -5,6 +5,8 @@
# See file COPYING for licensing information (expect GPL 2).
#
+import urllib
+
class AppError(Exception):
pass
class App400Error(Exception):
@@ -25,6 +27,10 @@ def safestr(u):
return u.encode('utf-8')
return u
+def escapeURLComponent(s):
+ # Turn s into a bytes first, quote_plus blows up otherwise
+ return unicode(urllib.quote_plus(s.encode("utf-8")))
+
class Context:
def __init__(self, pfx, user, base, method, path, query, pinput, coos):
# prefix: Path where the application is mounted in WSGI or empty string.
View
2  slasti/main.py
@@ -63,7 +63,7 @@ def edit_anchor_html(mark, path, text):
def tag_anchor_html(tag, path):
if tag == None:
return ' -'
- tagu = urllib.quote_plus(tag)
+ tagu = slasti.escapeURLComponent(tag)
tagt = unicode(cgi.escape(slasti.safestr(tag)),'utf-8')
return ' <a href="%s/%s/">%s</a>' % (path, tagu, tagt)
View
10 slasti/tagbase.py
@@ -73,7 +73,8 @@ def load_tag(tagdir, tag):
def read_tags(markdir, markname):
try:
- f = open(markdir+"/"+markname, "r")
+ f = codecs.open(markdir+"/"+markname, "r",
+ encoding="utf-8", errors="replace")
except IOError:
return []
@@ -167,7 +168,8 @@ def __init__(self, base, fromtag, marklist, markindex):
self.tags = []
try:
- f = open(base.markdir+"/"+markname, "r")
+ f = codecs.open(base.markdir+"/"+markname, "r",
+ encoding="utf-8", errors="replace")
except IOError:
# Set a red tag to tell us where we crashed.
self.stamp1 = 1
@@ -228,8 +230,8 @@ def __str__(self):
# There do not seem to be any exceptions raised with weird inputs.
datestr = time.strftime("%Y-%m-%d", time.gmtime(self.stamp0))
return self.ourlist[self.ourindex]+'|'+datestr+'|'+\
- self.title+'|'+self.url+'|'+self.note+"|"+\
- slasti.safestr(unicode(self.tags))
+ slasti.safestr(self.title)+'|'+self.url+'|'+\
+ slasti.safestr(self.note)+"|"+slasti.safestr(self.tags)
def key(self):
return (self.stamp0, self.stamp1)
Please sign in to comment.
Something went wrong with that request. Please try again.