# Bytes

In [1]:
# bytes from iterable
bytes((1, 2, 3))

b'\x01\x02\x03'

In [2]:
# bytes items are int, not single-element bytes!
record = b'0.03\r\n\x00'
record[-1]    # this is int !
record[-1:]   # this is single-element bytes

b'\x00'

In [3]:
# float() accepts bytes
float(b'0.03')

0.03

In [4]:
# stripping null and newline characters
b'0.03\r\n\x00'.rstrip(b'\x00\r\n')

b'0.03'

In [5]:
# octal escape can be shortened
b'a\0b'

b'a\x00b'

In [6]:
# if not followed by a digit 0-7
b'a\052b'

b'a*b'

## Representation

In [7]:
# as hexadecimal
b'\xcdL\x07=\xfc\x00\x00 '.hex()

'cd4c073dfc000020'

## Any byte sequence in string for system interface
* [PEP 383 – Non-decodable Bytes in System Character Interfaces](https://peps.python.org/pep-0383/)

"non-decodable bytes >= 128 will be represented as lone surrogate codes U+DC80..U+DCFF. Bytes below 128 will produce exceptions"

"This PEP allows the possibility of “smuggling” bytes in character strings. This would be a security risk if the bytes are security-critical when interpreted as characters on a target system, such as path name separators. For this reason, the PEP rejects smuggling bytes below 128."

https://discuss.python.org/t/drop-supporting-bytes-on-sys-path/17225/6?u=vbrozik

In [20]:
import os
open(b'\xe5\xe6', 'wb').close()
[
    name for name in os.listdir('.') # ['\udce5\udce6']
    if not name[0].isprintable()]

['\udce5\udce6']

In [21]:
os.remove(b'\xe5\xe6')

In [11]:
# direct translation to bytes not possible
bytes('\udce5\udce6', 'utf-8')

UnicodeEncodeError: 'utf-8' codec can't encode characters in position 0-1: surrogates not allowed

In [23]:
# direct translation from bytes also not possible
b'\xe5\xe6'.decode('utf-8')

UnicodeDecodeError: 'utf-8' codec can't decode byte 0xe5 in position 0: invalid continuation byte