Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

[offline-renderer] Changes for language links

Language links that are simple are encoded as:
  en:Just Testing\0

Language links with accents:
  fr#Hiroshi Hara\1Hìrôšhî Hârå\0
  ja#harakouji\1原広司\0

Note: \0 and \1 are binary bytes

Signed-off-by: Christopher Hall <hsw@openmoko.com>
  • Loading branch information...
commit 3e1658eb146312bdb1b00c962fd70a2a25af96cd 1 parent b3cded8
@hxw hxw authored
View
213 host-tools/offline-renderer/ArticleIndex.py
@@ -20,22 +20,13 @@
import FileScanner
import TidyUp
import PrintLog
-
-
-# this _must_ be in ascending ASCII sequence
-KEYPAD_KEYS = """ !#$%&'()*+,-.0123456789=?@abcdefghijklmnopqrstuvwxyz"""
-
-# to check if in order: uncomment and look at result
-#for c in KEYPAD_KEYS:
-# print('{0:d}'.format(ord(c)))
-#sys.exit(0)
+import LanguageTranslation
+import SearchKey
# maximum string lengths for FND file
MAXIMUM_TITLE_LENGTH = 63 # c-code is 64 including '\0'
-# underscore and space
-whitespaces = re.compile(r'([\s_]+)', re.IGNORECASE)
# to catch loop in redirections
class CycleError(Exception):
@@ -63,6 +54,7 @@ def usage(message):
print(' --limit=number Limit the number of articles processed')
print(' --prefix=name Device file name portion for .fnd/.pfx [pedia]')
print(' --templates=file Database for templates [templates.db]')
+ print(' --truncate-title Set to when not using language links to save space')
exit(1)
@@ -72,7 +64,7 @@ def main():
try:
- opts, args = getopt.getopt(sys.argv[1:], 'hvi:o:c:t:l:p:L:',
+ opts, args = getopt.getopt(sys.argv[1:], 'hvi:o:c:t:l:p:L:T',
['help', 'verbose',
'article-index=',
'article-offsets=',
@@ -81,6 +73,7 @@ def main():
'limit=',
'prefix=',
'language=',
+ 'truncate-title',
])
except getopt.GetoptError, err:
usage(err)
@@ -94,6 +87,7 @@ def main():
template_name = 'templates.db'
limit = 'all'
language = 'en' # some languages may require special processing
+ truncate_title = False # set tru when not using language links
for opt, arg in opts:
if opt in ('-v', '--verbose'):
@@ -108,6 +102,8 @@ def main():
cnt_name = arg
elif opt in ('-t', '--templates'):
template_name = arg
+ elif opt in ('-T', '--truncate-title'):
+ truncate_title = True
elif opt in ('-l', '--limit'):
if arg[-1] == 'k':
arg = arg[:-1] + '000'
@@ -129,9 +125,9 @@ def main():
if [] == args:
usage('Missing argument(s)')
- language_convert = LanguageNull()
+ language_convert = LanguageTranslation.LanguageNull()
if 'ja' == language:
- language_convert = Furigana()
+ language_convert = LanguageTranslation.Furigana()
processor = FileProcessing(articles = art_name, offsets = off_name,
templates = template_name,
@@ -170,7 +166,7 @@ def main():
cf.close()
- output_fnd(fnd_name, processor, language_convert)
+ output_fnd(fnd_name, processor, language_convert, truncate_title)
output_pfx(pfx_name)
del processor
@@ -189,14 +185,14 @@ def generate_bigram(text):
if len(text) > 2:
try:
- if text[0].lower() in KEYPAD_KEYS and text[1].lower() in KEYPAD_KEYS:
+ if SearchKey.is_valid_character(text[0]) and SearchKey.is_valid_character(text[1]):
bigram[text[0:2]] += 1
except KeyError:
bigram[text[0:2]] = 1
if len(text) > 4:
try:
- if text[2].lower() in KEYPAD_KEYS and text[3].lower() in KEYPAD_KEYS:
+ if SearchKey.is_valid_character(text[2]) and SearchKey.is_valid_character(text[3]):
bigram[text[2:4]] += 1
except KeyError:
bigram[text[2:4]] = 1
@@ -381,7 +377,6 @@ def title(self, category, key, title, seek):
def redirect(self, category, key, title, rcategory, rkey, rtitle, seek):
- global whitespaces
global verbose
title = self.translate(title).strip(u'\u200e\u200f')
@@ -396,7 +391,7 @@ def redirect(self, category, key, title, rcategory, rkey, rtitle, seek):
except UnicodeDecodeError:
pass
- rtitle = whitespaces.sub(' ', rtitle).strip().lstrip(':')
+ rtitle = SearchKey.compact_spaces(rtitle).lstrip(':').strip()
if self.KEY_TEMPLATE == key:
if title != rtitle:
@@ -532,22 +527,15 @@ def find(self, title, level = 0):
return result
-import unicodedata
-def strip_accents(s):
- if type(s) == str:
- s = unicode(s, 'utf-8')
- return ''.join((c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn'))
-
-
def bigram_encode(title):
+ """encode a title in bigram form"""
global bigram
- global whitespaces
result = ''
- title = strip_accents(title)
+ title = SearchKey.strip_accents(title)
while len(title) >= 2:
- if title[0].lower() in KEYPAD_KEYS:
+ if SearchKey.is_valid_character(title[0]):
b = title[0:2]
if b in bigram:
@@ -560,18 +548,15 @@ def bigram_encode(title):
#result += '?'
title = title[1:]
if len(title) == 1:
- if title[0].lower() in KEYPAD_KEYS:
+ if SearchKey.is_valid_character(title[0]):
result += chr(ord(title[0]))
#else:
# result += '?'
- # compact all spaces
- result = whitespaces.sub(' ', result).strip()
-
- return result
+ return SearchKey.compact_spaces(result)
-def output_fnd(filename, article_index, language_processor):
+def output_fnd(filename, article_index, language_processor, truncate_title):
"""create bigram table"""
global bigram
global index_matrix
@@ -606,19 +591,11 @@ def output_fnd(filename, article_index, language_processor):
#article_list = [strip_accents(k) for k in article_index.keys()]
#article_list.sort(key = lambda x: strip_accents(x).lower())
- def sort_key(key):
- global KEYPAD_KEYS
- global whitepaces
-
- result = ''.join(c for c in strip_accents(language_processor.translate(key).lower()) if c in KEYPAD_KEYS)
- # compact all spaces
- result = whitespaces.sub(' ', result).strip()
- return result
-
PrintLog.message(u'Sorting titles')
start_time = time.time()
- article_list = [ (sort_key(title), title) for title in article_index.all_indices() ]
+ article_list = [ (SearchKey.make_key(language_processor.translate(title)), title)
+ for title in article_index.all_indices() ]
article_list.sort()
PrintLog.message(u'Time: {0:7.1f}s'.format(time.time() - start_time))
@@ -641,7 +618,9 @@ def sort_key(key):
if '' == bigram_title and is_redirect:
continue
- utf8_title = title.encode('utf-8')[:MAXIMUM_TITLE_LENGTH]
+ utf8_title = title.encode('utf-8')
+ if truncate_title:
+ utf8_title = utf8_title[:MAXIMUM_TITLE_LENGTH]
offset = out_f.tell()
article_index.set_index(title, (article_number, offset, restricted, is_redirect))
@@ -697,7 +676,7 @@ def output_pfx(filename):
PrintLog.message(u'Writing: {0:s}'.format(filename))
start_time = time.time()
out_f = open(filename, 'w')
- list = '\0' + KEYPAD_KEYS
+ list = '\0' + SearchKey.all_characters()
for k1 in list:
for k2 in list:
for k3 in list:
@@ -712,146 +691,6 @@ def output_pfx(filename):
PrintLog.message(u'Time: {0:7.1f}s'.format(time.time() - start_time))
-class LanguageProcessor(object):
-
- def translate(self, text):
- PrintLog.message('Virual function called')
- sys.exit(1)
-
-
-
-class LanguageNull(LanguageProcessor):
- """no-op class"""
- def translate(self, text):
- """null translation => only strip spaces"""
- return text.strip()
-
-
-class Furigana(LanguageProcessor):
- """Convert Japanese to Romaji"""
-
- KANA_TO_ROMAN = {
-
- u'': 'a', u'': 'i', u'': 'u', u'': 'e', u'': 'o',
- u'': 'ka', u'': 'ki', u'': 'ku', u'': 'ke', u'': 'ko',
- u'': 'ga', u'': 'gi', u'': 'gu', u'': 'ge', u'': 'go',
-
- u'': 'sa', u'': 'shi', u'': 'su', u'': 'se', u'': 'so',
- u'': 'za', u'': 'ji', u'': 'zu', u'': 'ze', u'': 'zo',
- u'': 'ta', u'': 'chi', u'': 'tsu', u'': 'te', u'': 'to',
-
- u'': 'da', u'': 'di', u'': 'du', u'': 'de', u'': 'do',
- u'': 'na', u'': 'ni', u'': 'nu', u'': 'ne', u'': 'no',
- u'': 'ha', u'': 'hi', u'': 'fu', u'': 'he', u'': 'ho',
-
- u'': 'ba', u'': 'bi', u'': 'bu', u'': 'be', u'': 'bo',
- u'': 'pa', u'': 'pi', u'': 'pu', u'': 'pe', u'': 'po',
- u'': 'ma', u'': 'mi', u'': 'mu', u'': 'me', u'': 'mo',
-
- u'': 'ya', u'': 'yu', u'': 'yo',
- u'': 'ra', u'': 'ri', u'': 'ru', u'': 're', u'': 'ro',
- u'': 'wa', u'': 'wo',
-
- u'': 'nn',
-
- u'': '-',
-
- u'ウァ': 'wha', u'ウィ': 'whi', u'ウェ': 'whe', u'ウォ': 'who',
- u'ヴァ': 'va', u'ヴィ': 'vi', u'': 'vu', u'ヴェ': 've', u'ヴォ': 'vo',
- u'チャ': 'cya', u'チィ': 'cyi', u'チュ': 'cyu', u'チェ': 'cye', u'チョ': 'cyo',
-
- u'ニャ': 'nya', u'ニィ': 'nyi', u'ニュ': 'nyu', u'ニェ': 'nye', u'ニョ': 'nyo',
- u'シャ': 'sya', u'シィ': 'syi', u'シュ': 'syu', u'シェ': 'sye', u'ショ': 'syo',
- u'キァ': 'kya', u'キィ': 'kyi', u'キュ': 'kyu', u'キェ': 'kye', u'キョ': 'kyo',
-
- u'テャ': 'tha', u'ティ': 'thi', u'テュ': 'thu', u'テェ': 'the', u'テョ': 'tho',
- u'ヒャ': 'hya', u'ヒィ': 'hyi', u'ヒュ': 'hyu', u'ヒェ': 'hye', u'ヒョ': 'hyo',
- u'ミャ': 'mya', u'ミィ': 'myi', u'ミュ': 'myu', u'ミェ': 'mye', u'ミョ': 'myo',
-
- u'リャ': 'rya', u'リィ': 'ryi', u'リュ': 'ryu', u'リェ': 'rye', u'リョ': 'ryo',
- u'ジャ': 'ja', u'ジィ': 'jyi', u'ジュ': 'ju', u'ジェ': 'je' , u'ジョ': 'jo',
- u'ギャ': 'gya', u'ギィ': 'gyi', u'ギュ': 'gyu', u'ギェ': 'gye', u'ギョ': 'gyo',
-
- u'ビャ': 'bya', u'ビィ': 'byi', u'ビュ': 'byu', u'ビェ': 'bye', u'ビョ': 'byo',
- u'ピャ': 'pya', u'ピィ': 'pyi', u'ピュ': 'pyu', u'ピェ': 'pye', u'ピョ': 'pyo',
- u'クァ': 'kha', u'クィ': 'khi', u'クゥ': 'khu', u'クェ': 'khe', u'クォ': 'kho',
-
- u'グァ': 'gha', u'グィ': 'ghi', u'グゥ': 'ghu', u'グェ': 'ghe', u'グォ': 'gho',
- u'ファ': 'fa', u'フィ': 'fi', u'フェ': 'fe', u'フォ': 'fo',
- u'フャ': 'fya', u'フュ': 'fyu', u'フョ': 'fyo',
-
- u'デァ': 'dha', u'ディ': 'dhi', u'デュ': 'dhu', u'デェ': 'dhe', u'デョ': 'dho',
- u'ツァ': 'tsa', u'ツィ': 'tsi', u'ツェ': 'tse', u'ツォ': 'tso',
- }
-
-
- def __init__(self, *args, **kw):
- super(Furigana, self).__init__(*args, **kw)
-
- import MeCab # load Japanese dictionary interface
-
- self.mecab = MeCab.Tagger('-Ochasen')
-
-
- def romanise(self, text):
- """private method for converting Japanese phonetics to Romaji"""
-
- if type(text) != unicode:
- text = unicode(text, "utf-8")
-
- result = ''
- i = 0
- duplicate = False
- last = len(text) - 1
- while i <= last:
- key = text[i:i + 2] # extract a pair of phonetics
- if not (i < last and key in self.KANA_TO_ROMAN):
- key = text[i]
-
- if key in self.KANA_TO_ROMAN:
- s = self.KANA_TO_ROMAN[key]
- i += len(key) - 1
- if duplicate:
- s = s[0] + s
- duplicate = False
- result += s
- elif u'' == key:
- duplicate = True
- else:
- result += key
- duplicate = False
- i += 1
-
- return result
-
-
- def translate(self, text):
- """take Japanese string and convert to Roman letters"""
-
- if type(text) == unicode:
- text = text.encode('utf-8')
- n = self.mecab.parseToNode(text)
-
- result = ''
- while n:
-
- if n.surface == '':
- n = n.next
- continue
-
- feature = unicode(n.feature,'utf-8').split(',')
-
- if len(feature) < 8 or feature[7] == '*':
- r = self.romanise(n.surface)
- else:
- r = self.romanise(feature[7])
-
- result += r + " "
- n = n.next
-
- return result.strip()
-
-
# run the program
if __name__ == "__main__":
main()
View
17 host-tools/offline-renderer/ArticleRenderer.py
@@ -21,6 +21,9 @@
import bucket
import PrintLog
import gd
+import littleparser
+import LanguageTranslation
+import SearchKey
verbose = False
warnings = False
@@ -1145,9 +1148,21 @@ def write_article(language_links):
# create language links
links_stream = io.BytesIO('')
+ japanese_convert = LanguageTranslation.Furigana().translate
+ translate = littleparser.LittleParser().translate
for l in language_links:
- links_stream.write(l.encode('utf-8') + '\0')
+ language, link = l.split(':', 1)
+
+ if 'ja' == language:
+ stripped = SearchKey.strip_accents(japanese_convert(link))
+ else:
+ stripped = SearchKey.strip_accents(link)
+
+ if link == stripped:
+ links_stream.write(l.encode('utf-8') + '\0')
+ else:
+ links_stream.write((language + '#' + stripped).encode('utf-8') + '\1' + link.encode('utf-8') + '\0')
links_stream.flush()
langs = links_stream.getvalue()
View
155 host-tools/offline-renderer/LanguageTranslation.py
@@ -0,0 +1,155 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# COPYRIGHT: Openmoko Inc. 2010
+# LICENSE: GPL Version 3 or later
+# DESCRIPTION: Converting Asian language titles to phonetic representation
+# AUTHORS: Sean Moss-Pultz <sean@openmoko.com>
+# Christopher Hall <hsw@openmoko.com>
+
+import os
+import sys
+import string
+
+class LanguageProcessor(object):
+
+ def translate(self, text):
+ PrintLog.message('Virual function called')
+ sys.exit(1)
+
+
+class LanguageNull(LanguageProcessor):
+ """no-op class"""
+
+ def translate(self, text):
+ """null translation => only strip spaces"""
+ return text.strip()
+
+
+class Furigana(LanguageProcessor):
+ """Convert Japanese to Romaji"""
+
+ KANA_TO_ROMAN = {
+
+ u'': 'a', u'': 'i', u'': 'u', u'': 'e', u'': 'o',
+ u'': 'ka', u'': 'ki', u'': 'ku', u'': 'ke', u'': 'ko',
+ u'': 'ga', u'': 'gi', u'': 'gu', u'': 'ge', u'': 'go',
+
+ u'': 'sa', u'': 'shi', u'': 'su', u'': 'se', u'': 'so',
+ u'': 'za', u'': 'ji', u'': 'zu', u'': 'ze', u'': 'zo',
+ u'': 'ta', u'': 'chi', u'': 'tsu', u'': 'te', u'': 'to',
+
+ u'': 'da', u'': 'di', u'': 'du', u'': 'de', u'': 'do',
+ u'': 'na', u'': 'ni', u'': 'nu', u'': 'ne', u'': 'no',
+ u'': 'ha', u'': 'hi', u'': 'fu', u'': 'he', u'': 'ho',
+
+ u'': 'ba', u'': 'bi', u'': 'bu', u'': 'be', u'': 'bo',
+ u'': 'pa', u'': 'pi', u'': 'pu', u'': 'pe', u'': 'po',
+ u'': 'ma', u'': 'mi', u'': 'mu', u'': 'me', u'': 'mo',
+
+ u'': 'ya', u'': 'yu', u'': 'yo',
+ u'': 'ra', u'': 'ri', u'': 'ru', u'': 're', u'': 'ro',
+ u'': 'wa', u'': 'wo',
+
+ u'': 'nn',
+
+ u'': '-',
+
+ u'ウァ': 'wha', u'ウィ': 'whi', u'ウェ': 'whe', u'ウォ': 'who',
+ u'ヴァ': 'va', u'ヴィ': 'vi', u'': 'vu', u'ヴェ': 've', u'ヴォ': 'vo',
+ u'チャ': 'cya', u'チィ': 'cyi', u'チュ': 'cyu', u'チェ': 'cye', u'チョ': 'cyo',
+
+ u'ニャ': 'nya', u'ニィ': 'nyi', u'ニュ': 'nyu', u'ニェ': 'nye', u'ニョ': 'nyo',
+ u'シャ': 'sya', u'シィ': 'syi', u'シュ': 'syu', u'シェ': 'sye', u'ショ': 'syo',
+ u'キァ': 'kya', u'キィ': 'kyi', u'キュ': 'kyu', u'キェ': 'kye', u'キョ': 'kyo',
+
+ u'テャ': 'tha', u'ティ': 'thi', u'テュ': 'thu', u'テェ': 'the', u'テョ': 'tho',
+ u'ヒャ': 'hya', u'ヒィ': 'hyi', u'ヒュ': 'hyu', u'ヒェ': 'hye', u'ヒョ': 'hyo',
+ u'ミャ': 'mya', u'ミィ': 'myi', u'ミュ': 'myu', u'ミェ': 'mye', u'ミョ': 'myo',
+
+ u'リャ': 'rya', u'リィ': 'ryi', u'リュ': 'ryu', u'リェ': 'rye', u'リョ': 'ryo',
+ u'ジャ': 'ja', u'ジィ': 'jyi', u'ジュ': 'ju', u'ジェ': 'je' , u'ジョ': 'jo',
+ u'ギャ': 'gya', u'ギィ': 'gyi', u'ギュ': 'gyu', u'ギェ': 'gye', u'ギョ': 'gyo',
+
+ u'ビャ': 'bya', u'ビィ': 'byi', u'ビュ': 'byu', u'ビェ': 'bye', u'ビョ': 'byo',
+ u'ピャ': 'pya', u'ピィ': 'pyi', u'ピュ': 'pyu', u'ピェ': 'pye', u'ピョ': 'pyo',
+ u'クァ': 'kha', u'クィ': 'khi', u'クゥ': 'khu', u'クェ': 'khe', u'クォ': 'kho',
+
+ u'グァ': 'gha', u'グィ': 'ghi', u'グゥ': 'ghu', u'グェ': 'ghe', u'グォ': 'gho',
+ u'ファ': 'fa', u'フィ': 'fi', u'フェ': 'fe', u'フォ': 'fo',
+ u'フャ': 'fya', u'フュ': 'fyu', u'フョ': 'fyo',
+
+ u'デァ': 'dha', u'ディ': 'dhi', u'デュ': 'dhu', u'デェ': 'dhe', u'デョ': 'dho',
+ u'ツァ': 'tsa', u'ツィ': 'tsi', u'ツェ': 'tse', u'ツォ': 'tso',
+ }
+
+
+ def __init__(self, *args, **kw):
+ super(Furigana, self).__init__(*args, **kw)
+
+ import MeCab # load Japanese dictionary interface
+
+ self.mecab = MeCab.Tagger('-Ochasen')
+
+
+ def romanise(self, text):
+ """private method for converting Japanese phonetics to Romaji"""
+
+ if type(text) != unicode:
+ text = unicode(text, "utf-8")
+
+ result = ''
+ i = 0
+ duplicate = False
+ last = len(text) - 1
+ while i <= last:
+ key = text[i:i + 2] # extract a pair of phonetics
+ if not (i < last and key in self.KANA_TO_ROMAN):
+ key = text[i]
+
+ if key in self.KANA_TO_ROMAN:
+ s = self.KANA_TO_ROMAN[key]
+ i += len(key) - 1
+ if duplicate:
+ s = s[0] + s
+ duplicate = False
+ result += s
+ elif u'' == key:
+ duplicate = True
+ else:
+ result += key
+ duplicate = False
+ i += 1
+
+ return result
+
+
+ def translate(self, text):
+ """take Japanese string and convert to Roman letters"""
+
+ result = ''
+
+ for text in text.split():
+
+ if type(text) == unicode:
+ text = text.encode('utf-8')
+ n = self.mecab.parseToNode(text)
+
+ while n:
+
+ if n.surface == '':
+ n = n.next
+ continue
+
+ feature = unicode(n.feature,'utf-8').split(',')
+
+ if len(feature) < 8 or feature[7] == '*':
+ r = self.romanise(n.surface)
+ else:
+ r = self.romanise(feature[7])
+
+ result += r
+ n = n.next
+
+ result += ' '
+
+ return result.strip()
View
10 host-tools/offline-renderer/Makefile
@@ -85,6 +85,14 @@ ifeq (YES,$(strip ${VERBOSE}))
VERBOSE_ARG = --verbose
endif
+TRUNCATE_ARG =
+ifneq (yes,$(strip ${ENABLE_LANGUAGE_LINKS}))
+ifneq (YES,$(strip ${ENABLE_LANGUAGE_LINKS}))
+TRUNCATE_ARG += --truncate-title
+endif
+endif
+
+
TARGETS = index parse render combine
.PHONY: all
@@ -118,7 +126,7 @@ index: check-dirs check-xml RedirectedTo.py
--article-counts="${COUNTS_FILE}" \
--templates="${TEMPLATE_FILE}" \
--language="${WIKI_LANGUAGE}" \
- --prefix="${PREFIX}" ${XML_FILES}
+ --prefix="${PREFIX}" ${TRUNCATE_ARG} ${XML_FILES}
HTML_FILES_COUNT := $(words $(wildcard ${WORKDIR_PATH}/*.html))
.PHONY: merge
View
167 host-tools/offline-renderer/SearchKey.py
@@ -0,0 +1,167 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# COPYRIGHT: Openmoko Inc. 2010
+# LICENSE: GPL Version 3 or later
+# DESCRIPTION: Convert string to search key
+# AUTHORS: Sean Moss-Pultz <sean@openmoko.com>
+# Christopher Hall <hsw@openmoko.com>
+
+import os, sys
+import re
+import unicodedata
+
+# this _must_ be in ascending ASCII sequence
+KEYPAD_KEYS = """ !#$%&'()*+,-.0123456789=?@abcdefghijklmnopqrstuvwxyz"""
+
+# underscore and space
+whitespaces = re.compile(r'([\s_]+)', re.IGNORECASE)
+
+
+def make_key(text):
+ """filter out only the chacters available on the keypad"""
+
+ global whitespaces
+
+ result = ''.join(c for c in strip_accents(text).strip().lower() if c in KEYPAD_KEYS)
+ return compact_spaces(result)
+
+
+def all_characters():
+ """string of all allowed characters in a search key"""
+ return KEYPAD_KEYS
+
+
+def is_valid_character(c):
+ """test if a single character is a valid search key character"""
+ return c.lower() in KEYPAD_KEYS
+
+
+def compact_spaces(text):
+ """condense runs of spaces"""
+ global whitespaces
+
+ return whitespaces.sub(' ', text).strip()
+
+
+def strip_accents(text):
+ """convert all accented [a-zA-Z] to their unaccented form"""
+
+ if type(text) == str:
+ text = unicode(text, 'utf-8')
+
+ return ''.join((c for c in unicodedata.normalize('NFD', text) if unicodedata.category(c) != 'Mn'))
+
+
+def test_keypad_keys():
+ """check that data stucture is correct"""
+
+ global KEYPAD_KEYS
+
+ error_count = 0
+ previous_character = 'NONE'
+ previous_ord = 0
+ # to check if in order: uncomment and look at result
+ for c in KEYPAD_KEYS:
+ value = ord(c)
+ if value <= previous_ord:
+ print('error "{0!r:s}" = {1:d} <= "{2!r:s}" = {3:d}'.format(c, value, previous_character, previous_ord))
+ error_count += 1
+ previous_ord = value
+ print('total error count = {0:d}'.format(error_count))
+
+
+def test_strip_accents():
+ """test strip_accents function"""
+
+ source = u"""
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 2x ! " # $ % & ' ( ) * + , - . /
+ 3x 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ 4x @ A B C D E F G H I J K L M N O
+ 5x P Q R S T U V W X Y Z [ \ ] ^ _
+ 6x ` a b c d e f g h i j k l m n o
+ 7x p q r s t u v w x y z { | } ~
+
+ Ax   ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯
+ Bx ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
+ Cx À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
+ Dx Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
+ Ex à á â ã ä å æ ç è é ê ë ì í î ï
+ Fx ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
+
+ Ax   Ą ĸ Ŗ ¤ Ĩ Ļ § ¨ Š Ē Ģ Ŧ ­ Ž ¯
+ Bx ° ą ˛ ŗ ´ ĩ ļ ˇ ¸ š ē ģ ŧ Ŋ ž ŋ
+ Cx Ā Á Â Ã Ä Å Æ Į Č É Ę Ë Ė Í Î Ī
+ Dx Đ Ņ Ō Ķ Ô Õ Ö × Ø Ų Ú Û Ü Ũ Ū ß
+ Ex ā á â ã ä å æ į č é ę ë ė í î ī
+ Fx đ ņ ō ķ ô õ ö ÷ ø ų ú û ü ũ ū ˙
+"""
+
+ correct = u"""
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F
+ 2x ! " # $ % & ' ( ) * + , - . /
+ 3x 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ 4x @ A B C D E F G H I J K L M N O
+ 5x P Q R S T U V W X Y Z [ \ ] ^ _
+ 6x ` a b c d e f g h i j k l m n o
+ 7x p q r s t u v w x y z { | } ~
+
+ Ax   ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯
+ Bx ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
+ Cx A A A A A A Æ C E E E E I I I I
+ Dx Ð N O O O O O × Ø U U U U Y Þ ß
+ Ex a a a a a a æ c e e e e i i i i
+ Fx ð n o o o o o ÷ ø u u u u y þ y
+
+ Ax   A ĸ R ¤ I L § ¨ S E G Ŧ ­ Z ¯
+ Bx ° a ˛ r ´ i l ˇ ¸ s e g ŧ Ŋ z ŋ
+ Cx A A A A A A Æ I C E E E E I I I
+ Dx Đ N O K O O O × Ø U U U U U U ß
+ Ex a a a a a a æ i c e e e e i i i
+ Fx đ n o k o o o ÷ ø u u u u u u ˙
+"""
+
+ converted = strip_accents(source)
+ if correct == converted:
+ print('Accents stripped sucessfully')
+ else:
+ print('Differences encountered')
+ print('Source:')
+ print(source)
+ print('Converted:')
+ print(converted)
+
+
+def test_make_key():
+ """test make_key function"""
+
+ source = """! " # $ % & ' ( ) * + , - . /
+ 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+ @ A B C D E F G H I J K L M N O
+ P Q R S T U V W X Y Z [ \ ] ^ _
+ ` a b c d e f g h i j k l m n o
+ p q r s t u v w x y z { | } ~
+ """
+ correct = """! # $ % & ' ( ) * + , - . 0 1 2 3 4 5 6 7 8 9 = ? @ a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z"""
+
+ converted = make_key(source)
+ if correct == converted:
+ print('Filtered sucessfully')
+ else:
+ print('Differences encountered')
+ print('Source:')
+ print(source)
+ print('Converted:')
+ print(converted)
+
+
+def main():
+ """perform tests"""
+ test_keypad_keys()
+ test_strip_accents()
+ test_make_key()
+
+
+# run the program
+if __name__ == "__main__":
+ main()
Please sign in to comment.
Something went wrong with that request. Please try again.