#### 7.1.1 Unicode

In [3]:
def unicode_test(value):
    import unicodedata
    name = unicodedata.name(value)
    value2 = unicodedata.lookup(name)
    print('value="%s", name="%s", value2="%s"' % (value, name, value2))
    
unicode_test('A')

value="A", name="LATIN CAPITAL LETTER A", value2="A"


In [6]:
print('\u2415')
standard_name = 'ACKNOWLEDGE, SYMBOL FOR NEGATIVE'
standard_name_python3 = 'SYMBOL FOR NEGATIVE ACKNOWLEDGE'
import unicodedata
value = unicodedata.lookup(standard_name_python3)
print(value)

␕
␕


In [10]:
print('\u2415')
print('\N{SYMBOL FOR NEGATIVE ACKNOWLEDGE}')

␕
␕


In [25]:
def word_encode(word):
    print(len(word))
    dsUTF8 = word.encode('utf-8') # ascii, utf-8, latin-1, cp-1252, unicode-escape, ...
    print('UTF-8:', dsUTF8)
    # dsASCII = word.encode('ascii')
    # print('ASCII:', dsASCII)
    # dsLatin1 = word.encode('latin-1')
    # print('Latin-1:', dsLatin1)
    dsUnicodeEscape = word.encode('unicode-escape')
    print('UnicodeEscape:', dsUnicodeEscape)
    # dsCP1252 = word.encode('cp-1252') # only available in Windows Platform, windows-1252
    # print('CP-1252:', dsCP1252)
    
word_encode('\u2603')

1
UTF-8: b'\xe2\x98\x83'
UnicodeEscape: b'\\u2603'


In [35]:
place = 'caf\u00e9'
place_bytes_utf8 = place.encode('utf-8')
print(place_bytes_utf8)
print(place_bytes_utf8.decode('utf-8'))
print(place_bytes_utf8.decode('ascii', 'ignore'))
print('-----------')
place_bytes_ascii = place.encode('ascii', 'replace')
print(place_bytes_ascii)
print(place_bytes_ascii.decode('utf-8'))
print(place_bytes_ascii.decode('ascii'))

b'caf\xc3\xa9'
café
caf
-----------
b'caf?'
caf?
caf?


#### 7.1.2 格式化

In [68]:
print('|%s|' % '$42#')
print('|%10s|' % '$42#')
print('|%-10s|' % '$42#')
print('|%s|' % 42)
print('|%10s|' % 42)
print('|%3d|' % 42)
print('|%8.3f|' % 42)
print('|%-10s|' % 42)
print('|%9.3f|' % 42.1415926)
print('|%o|' % 42)
print('|%x|' % 42)
print('|%g|' % 42.141592614)
print('|%.2f%%|' % 42)

|$42#|
|      $42#|
|$42#      |
|42|
|        42|
| 42|
|  42.000|
|42        |
|   42.142|
|52|
|2a|
|42.1416|
|42.00%|


In [121]:
print('|%s|' % '$42#')
print('|{}{}|'.format('$4', '2#'))
print('|{0}{1}|'.format('$4', '2#'))
print('|{1}{0}|'.format('$4', '2#'))
print('|{first}{second}|'.format(first='$4', second='2#'))

d = {'n': 42, 'f': 7.03, 's': 'string cheese'}
print('|{0[n]}{0[f]}{0[s]}^{1}-{k}|'.format(d, 'cheep', k=200))
print('|{0:s}|'.format('$42#'))

print('------------------')
print('|%10s|' % '$42#')
print('|{0:>10s}|'.format('$42#'))

print('------------------')
print('|%-10s|' % '$42#')
print('|{0:<10s}|'.format('$42#'))

print('------------------')
print('|%s|' % 42)
print('|{0}|'.format(42))

print('------------------')
print('|%3d|' % 42)
print('|{0:3d}|'.format(42))

print('------------------')
print('|%8.3f|' % 42)
print('|{0:8.3f}'.format(42))

print('------------------')
print('|%-10s|' % 42)
print('|{0:<10s}|'.format(str(42)))
print('|{0:<10d}|'.format(42))

print('------------------')
print('|%9.3f|' % 42.1415926)
print('|{0:9.3f}|'.format(42.1415926))

print('------------------')
print('|%o|' % 42)
print('|{0:o}|'.format(42))

print('------------------')
print('|%x|' % 42)
print('|{0:x}|'.format(42))

print('------------------')
print('|%g|' % 42.141592614)
print('|{0:g}|'.format(42.141592614))

print('------------------')
print('|%.2f%%|' % 42)
print('|{0:.2f}%|'.format(42))

print('------------------')
print('|{0:!^20s}|'.format('$42#'))
print('|{0:!<20s}|'.format('$42#'))
print('|{0:!>20s}|'.format('$42#'))

|$42#|
|$42#|
|$42#|
|2#$4|
|$42#|
|427.03string cheese^cheep-200|
|$42#|
------------------
|      $42#|
|      $42#|
------------------
|$42#      |
|$42#      |
------------------
|42|
|42|
------------------
| 42|
| 42|
------------------
|  42.000|
|  42.000
------------------
|42        |
|42        |
|42        |
------------------
|   42.142|
|   42.142|
------------------
|52|
|52|
------------------
|2a|
|2a|
------------------
|42.1416|
|42.1416|
------------------
|42.00%|
|42.00%|
------------------
|!!!!!!!!$42#!!!!!!!!|
|$42#!!!!!!!!!!!!!!!!|
|!!!!!!!!!!!!!!!!$42#|


#### 7.1.3 使用正则表达式匹配

In [25]:
import re

source = 'String for test!String for test!String for test!'
m1 = re.match('Str', source)
print(m1)
if m1:
    print(m1.group())
    print(m1.groups())
print('----------------')
p2 = re.compile('Str')
m2 = p2.match(source)
if m2:
    print(m2.group())
    
def print_re_func(pattern, source, re_func):
    print('func:', re_func)
    m = re_func(pattern, source)
    if m:
        try:
            print('pattern:', pattern, 'result:', m.group())
        except:
            print('pattern:', pattern, 'result:', m)
    else:
        print('pattern:', pattern, 'result:', 'none!')
    print('-------------')
        
print_re_func('tes', source, re.match)
print_re_func('.*tes', source, re.match)
print_re_func('tes', source, re.search)
print_re_func('tes', source, re.findall)
print_re_func('tes', source, re.split)

m = re.sub('tes', '?', source) # replace 'tes' to '?'
print(m)

<_sre.SRE_Match object; span=(0, 3), match='Str'>
Str
()
----------------
Str
func: <function match at 0x1051f1510>
pattern: tes result: none!
-------------
func: <function match at 0x1051f1510>
pattern: .*tes result: String for test!String for test!String for tes
-------------
func: <function search at 0x1052c91e0>
pattern: tes result: tes
-------------
func: <function findall at 0x1052c9400>
pattern: tes result: ['tes', 'tes', 'tes']
-------------
func: <function split at 0x1052c9378>
pattern: tes result: ['String for ', 't!String for ', 't!String for ', 't!']
-------------
String for ?t!String for ?t!String for ?t!


In [29]:
import re, string
print(string.printable)

print(re.findall('\s', string.printable))
print(re.findall('\w', 'abc' + '-/*' + '\u00ea' + '\u0115')) # Unicode

0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ 	

[' ', '\t', '\n', '\r', '\x0b', '\x0c']
['a', 'b', 'c', 'ê', 'ĕ']


#### 7.2.1 字节和字节数组

In [31]:
blist = [1,2,3,255]
the_bytes = bytes(blist)
the_byte_array = bytearray(blist)
print(the_bytes)
print(the_byte_array)
the_byte_array[1] = 127
print(the_byte_array) # changable

b'\x01\x02\x03\xff'
bytearray(b'\x01\x02\x03\xff')
bytearray(b'\x01\x7f\x03\xff')


#### 7.2.2 使用struct转换二进制数据

In [35]:
import struct
data1 = b'\x01\x02\x03\x04\x01\x02\x03\x04'
data2 = b'\x04\x03\x02\x01\x04\x03\x02\x01'
print(len(data))
x, y = struct.unpack('>LL', data1)
print(x, y)
x, y = struct.unpack('<LL', data2)
print(x, y)
x, y = struct.unpack('<LL', data1)
print(x, y)

8
16909060 16909060
16909060 16909060
67305985 67305985


#### 7.2.4 使用binascii()转换字节/字符串

In [39]:
import binascii
valid_png_header = b'\x89PNG\r\n\x1a\n'
a = binascii.hexlify(valid_png_header)
print(a)
b = binascii.unhexlify(a)
print(b)

b'89504e470d0a1a0a'
b'\x89PNG\r\n\x1a\n'
