## Dual mode str and bytes APIs

##### Example 4-22. ramanujan.py: compare behavior of simple str and bytes regular expressions.

In [4]:
import re

re_numbers_str = re.compile(r'\d+')      #  str type.
re_words_str = re.compile(r'\w+')        #  str type.
re_numbers_bytes = re.compile(rb'\d+')   # bytes type
re_words_bytes = re.compile(rb'\w+')     # bytes type

In [7]:
text_str = ("Ramanujan saw \u0be7\u0bed\u0be8\u0bef"
" as 1729 = 1³ + 12³ = 9³ + 10³.")            
text_bytes = text_str.encode('utf_8')

print('Text', repr(text_str), sep='\n ')

Text
 'Ramanujan saw ௧௭௨௯ as 1729 = 1³ + 12³ = 9³ + 10³.'


In [13]:
# The str pattern r'\d+' matches the Tamil and ASCII digits
print('Numbers')
print(' str :', re_numbers_str.findall(text_str))
print(' bytes:', re_numbers_bytes.findall(text_bytes))

Numbers
 str : ['௧௭௨௯', '1729', '1', '12', '9', '10']
 bytes: [b'1729', b'1', b'12', b'9', b'10']


In [15]:
print('Words')
# The str pattern r'\w+' matches the letters, superscripts, Tamil and ASCII digits
print(' str :', re_words_str.findall(text_str)) 

# The bytes pattern rb'\w+' matches only the ASCII bytes for letters and digits.
print(' bytes:', re_words_bytes.findall(text_bytes))

Words
 str : ['Ramanujan', 'saw', '௧௭௨௯', 'as', '1729', '1³', '12³', '9³', '10³']
 bytes: [b'Ramanujan', b'saw', b'as', b'1729', b'1', b'12', b'9', b'10']
