# collect and summarize from Effective Python 

# 1. Pythonic Thinking

In [None]:
#follow the PEP 8 style
#https://www.python.org/dev/peps/pep-0008/

In [None]:
#str and byte
xxx=bytes()
value=xxx.decode('utf-8') #str
print(value)

xxx='test'
value=xxx.encode('utf-8') #bytes
print(value)


b'test'


In [None]:
#slicing
a=['a','c','d','e','f']
odds=a[::2]
even=a[1::2]
print(odds)
print(even)

re_odds=a[::-2]
re_even=a[-2::-2]
print(re_odds)
print(re_even)

['a', 'd', 'f']
['c', 'e']
['f', 'd', 'a']
['e', 'c']


In [None]:
matrix=[[1,2,3],[4,5,6],[7,8,9]]
filtered=[[x for x in row if x%3==0]
          for row in matrix if sum(row)>=10]
print(filtered)

[[6], [9]]


In [None]:
# enumerate and zip
color_list=['red','blue','yellow','green']
lens=[len(n) for n in color_list]
print(lens)
for index,color in enumerate(color_list,1):
  print(index,color)

for color,len in zip(lens,color_list):
  print(color,len)

[3, 4, 6, 5]
1 red
2 blue
3 yellow
4 green
3 red
4 blue
6 yellow
5 green


# 2. Functions

In [None]:
#avoid return None
def divide(a,b):
  try:
    return a/b
  except ZeroDivisionError as e:
    raise ValueError('invalid inputs') from e

x,y=0,0
try:
  result=divide(x,y)
except ValueError:
  print("invalid 0 inputs")
else:
  print(f'resualt is {result:.1f}')

invalid 0 inputs


In [None]:
numbers=[8,3,1,2,5,4,7,6]
group=[2,3,5,9]

class Sorter(object):
  def __init__(self,group):
    self.group=group
    self.found=False
  def __call__(self,x):
    if x in self.group:
      self.found=True
      return(0,x)
    return (1,x)

sorter=Sorter(group)
numbers.sort(key=sorter)
print(sorter.found is True)
print(numbers)

True
[2, 3, 5, 1, 4, 6, 7, 8]


In [None]:
#save memory
def fab(max): 
    n, a, b = 0, 0, 1 
    while n < max: 
        yield b      # 使用 yield
        # print b 
        a, b = b, a + b 
        n = n + 1
 
for n in fab(5): 
    print(n)

1
1
2
3
5


In [None]:
i=[1,2,3,4,5]
a,*b,c=i
print(a)
print(b)
print(c)

1
[2, 3, 4]
5


In [None]:
# use None and Docstrings to specify dynamic default
from datetime import datetime
def log(message,when=None):
  if when is None:
    when=datetime.now() 
  print(f'{when}:{message}')

log('hi')
log('bye')

2021-09-12 08:24:32.188268:hi
2021-09-12 08:24:32.188435:bye


# 3. Classes and Inheritance

In [None]:
from collections import defaultdict

current={'green':12,'blue':3}
increments=[('red',5),('blue',9)]
class CountMissing(object):
  def __init__(self):
    self.added=1
  def __call__(self):
    self.added+=1
    return 0
counter=CountMissing()
result=defaultdict(counter,current)
for key,amount in increments:
  result[key]+=amount
print(counter.added)

2


In [None]:
def test(f):
    print ("before ...")
    f()
    print ("after ...")
    f()
    print ("end ...")
 
@test
def func():
    print ("func was called")

before ...
func was called
after ...
func was called
end ...


In [None]:
def a(fn):
    print ('a')
    def d(st):
        print (st+'d')
    return d
 
def b(fn):
    print ('b')
    return fn
 
@a
@b
def c(st):
    print (st)
    
c('c')

b
a
cd


# 4. Metaclasses and Attributes

In [10]:
class Resistor(object):
    def __init__(self, ohms):
        self.ohms = ohms
        self.voltage = 0
        self.current = 0

class VoltageResistance(Resistor):
    def __init__(self, ohms):
        super().__init__(ohms)
        self._voltage = 0


    @property
    def voltage(self):
        return self._voltage

    @voltage.setter
    def voltage(self, voltage):
        self._voltage = voltage
        self.current = self._voltage / self.ohms

r2 = VoltageResistance(1e3)
print('Before: %5r amps' % r2.current)
r2.voltage = 10
print('After:  %5r amps' % r2.current)

class BoundedResistance(Resistor):
    def __init__(self, ohms):
        super().__init__(ohms)

    @property
    def ohms(self):
        return self._ohms

    @ohms.setter
    def ohms(self, ohms):
        if ohms <= 0:
            raise ValueError('%f ohms mush be > 0' % ohms)
        self._ohms = ohms


# Assigning an invalid resistance to the attribute raises an excpetion.


r3 = BoundedResistance(1e3)
r3.ohms=0

Before:     0 amps
After:   0.01 amps


ValueError: ignored

In [15]:
from datetime import timedelta
import datetime

class Bucket(object):
    def __init__(self, period):
        self.period_delta = timedelta(seconds=period)
        self.reset_time = datetime.datetime.now()
        self.max_quota = 0
        self.quota_consumed = 0
        
    #refactor print
    def __repr__(self):
        return ('Bucket(max_quota=%d, quota_consumed=%d)' %
                (self.max_quota, self.quota_consumed))
    @property
    def quota(self):
        return self.max_quota - self.quota_consumed
    @quota.setter
    def quota(self, amount):
        delta = self.max_quota - amount
        if amount == 0:
            '''quota being reset for a new period'''
            self.quota_consumed = 0
            self.max_quota = 0
        elif delta < 0:
            '''quota being filled for the new period'''
            assert self.quota_consumed == 0
            self.max_quota = amount
        else:
            '''quota being consumed during the period'''
            assert self.max_quota >= self.quota_consumed
            self.quota_consumed += delta
def fill(bucket, amount):
    now = datetime.datetime.now()
    if now - bucket.reset_time > bucket.period_delta:
        bucket.quota = 0
        bucket.reset_time = now
    bucket.quota += amount
def deduct(bucket, amount):
    now = datetime.datetime.now()
    if now - bucket.reset_time > bucket.period_delta:
        return False
    if bucket.quota - amount < 0:
        return False
    bucket.quota -= amount
    return True

bucket = Bucket(60)
print('Initial', bucket)
fill(bucket, 100)
print('Filled', bucket)

if deduct(bucket, 99):
    print('Had 99 quota')
else:
    print('Not enough for 99 quota')
print('Now', bucket)

if deduct(bucket, 3):
    print('Had 3 quota')
else:
    print('Not enough for 3 quota')
print('Still', bucket)

Initial Bucket(max_quota=0, quota_consumed=0)
Filled Bucket(max_quota=100, quota_consumed=0)
Had 99 quota
Now Bucket(max_quota=100, quota_consumed=99)
Not enough for 3 quota
Still Bucket(max_quota=100, quota_consumed=99)


In [20]:
import weakref

class Grade(object):
    def __init__(self):
        self._values = weakref.WeakKeyDictionary()

    def __get__(self, instance, owner):
        if instance is None:
            return self
        return self._values.get(instance, 0)

    def __set__(self, instance, value):
        if not (0 <= value <= 100):
            raise ValueError('Grade must be between 0 and 100')
        self._values[instance] = value
class Exam(object):
    math_grade = Grade()
    writing_grade = Grade()
    science_grade = Grade()


first_exam = Exam()
first_exam.writing_grade = 82
second_exam = Exam()
second_exam.writing_grade = 75
print('First ', first_exam.writing_grade, 'is right')
print('Second', second_exam.writing_grade, 'is right')

First  82 is right
Second 75 is right


In [24]:
# __getattr__ is only invoked if the attribute wasn't found the usual ways. 
# It's good for implementing a fallback for missing attributes, 
# and is probably the one of two you want.

# __getattribute__ is invoked before looking at the actual attributes on the object, 
# and so can be tricky to implement correctly. 
# You can end up in infinite recursions very easily.

class AboutAttr(object):
    def __init__(self, name):
        self.name = name

    def __getattribute__(self, item):
        try:
            return super(AboutAttr, self).__getattribute__(item)
        except KeyError:
            return 'default'
        except AttributeError as ex:
            print (ex)

    def __getattr__(self, item):
        return 'default'

at = AboutAttr('test')
print (at.name)
print (at.not_exised)


test
'AboutAttr' object has no attribute 'not_exised'
None


In [25]:
class ValidatePolygon(type):
    def __new__(meta, name, bases, class_dict):
        '''Don't validate the abstract Polygon class'''
        if bases != (object,):
            if class_dict['sides'] < 3:
                raise ValueError('Polygons need 3+ sides')
        return type.__new__(meta, name, bases, class_dict)


class Polygon(object, metaclass=ValidatePolygon):
    sides = None  # Specified by subclass

    @classmethod
    def interior_angles(cls):
        return (cls.sides - 2) * 180


class Triangle(Polygon):
    sides = 3

print('Before class')


class Line(Polygon):
    print('Before side')
    sides = 1
    print('After side')
    
print('After class')

Before class
Before side
After side


ValueError: ignored

In [27]:
class Meta(type):
    def __new__(meta, name, bases, class_dict):
        for key, value in class_dict.items():
            if isinstance(value, Field):
                value.name = key
                value.internal_name = '_' + key
        cls = type.__new__(meta, name, bases, class_dict)
        return cls

class Field(object):
    def __init__(self):
        self.name = None
        self.internal_name = None

    def __get__(self, instance, instance_type):
        if instance is None:
            return self
        return getattr(instance, self.internal_name)

    def __set__(self, instance, value):
        setattr(instance, self.internal_name, value)

class DatabaseRow(object, metaclass=Meta):
    pass
    
class BetterCustomer(DatabaseRow):
    first_name = Field()
    last_name = Field()
    prefix = Field()
    suffix = Field()


# The behavior of the new class is identical to the old one.


foo = BetterCustomer()
# print('Before: ', repr(foo.first_name), foo.__dict__)
print('Before: ', foo.__dict__)
foo.first_name = 'Euler'
print('After:  ', repr(foo.first_name), foo.__dict__)
# Before:  {}

Before:  {}
After:   'Euler' {'_first_name': 'Euler'}


# 5. concurrency and parallelism


In [33]:
import subprocess
import time
import os

proc = subprocess.Popen(['sleep', '0.0001'])
while proc.poll() is None:
    print('Working...')
    # some time-consuming work here
    total = 0
    for i in range(1000):
        total += i
    print(total)
print('Exit status', proc.poll())

Working...
499500
Working...
499500
Working...
499500
Working...
499500
Working...
499500
Working...
499500
Working...
499500
Working...
499500
Exit status 0


In [37]:
def run_openssl(data):
    env = os.environ.copy()
    env['password'] = b'\xe24U\n\xd0Q13S\x11'
    proc = subprocess.Popen(
        ['openssl', 'enc', '-des3', '-pass', 'env:password'],
        env=env,
        stdin=subprocess.PIPE,
        stdout=subprocess.PIPE
    )
    proc.stdin.write(data)
    proc.stdin.flush()   # Ensure the child gets input
    return proc
def run_md5(input_stdin):
    proc = subprocess.Popen(
        ['md5sum'],
        stdin=input_stdin,
        stdout=subprocess.PIPE
    )
    return proc
input_procs = []
hash_procs = []
for _ in range(3):
    data = os.urandom(10)
    proc = run_openssl(data)
    input_procs.append(proc)
    hash_proc = run_md5(proc.stdout)
    hash_procs.append(hash_proc)

# The I/O between the child processes will happen automatically once you get
# them started. All you need to do is wait for them to finish and print the
# final output.

for proc in input_procs:
    proc.communicate()

for proc in hash_procs:
    out, err = proc.communicate()
    print(out.strip())
    
def run_sleep(period):
    proc = subprocess.Popen(['sleep', str(period)])
    return proc

proc = run_sleep(10)
try:
    proc.communicate(timeout=0.1)
except subprocess.TimeoutExpired:
    proc.terminate()
    proc.wait()

print('Exit status', proc.poll())

b'c59793473f5e615f3db4b7797fa05029  -'
b'2cb54298f440d5329a9ac9bc783d66f3  -'
b'df99f7a3ebc98739c350dd1125cb6259  -'
Exit status -15


In [38]:
# 1. Python threads can't bytecode in parallel on multiple CPU cores because
#     of the global interpreter lock (GIL).
# 2. Python threads are still useful despite the GIL because they provide an
#     easy way to do multiple things at seemingly the same time.
# 3. Use Python threads to make multiple system calls in parallel. This allows
#     you to do blocking I/O at the same time as computation.

import time
from threading import Thread
import select
def slow_systemcall():
    select.select([], [], [], 0.1)


# Running this system call in serial requires a linearly increasing amount of
# time.

start = time.time()
for _ in range(5):
    slow_systemcall()
end = time.time()
print('Took %.3f seconds' % (end - start))

start = time.time()
threads = []
for _ in range(5):
    thread = Thread(target=slow_systemcall)
    thread.start()
    threads.append(thread)

def compute_helicopter_location(index):
    return index**2

for i in range(5):
    compute_helicopter_location(i)

for thread in threads:
    thread.join()
end = time.time()
print('Took %.3f seconds' % (end - start))

Took 0.501 seconds
Took 0.101 seconds


In [42]:
# 1. Even though Python has a global interpreter lock, you're still
#     responsible for protecting against objects without locks.
# 2. Your programs will corrupt their data structures if you allow multiple
#     threads to modify the same objects without locks.
# 3. The lock class in the threading built-in module is Python's standard
#     mutual exclusion lock implementation.
from threading import Thread
from threading import Lock

def run_threads(func, how_many, counter):
    threads = []
    for i in range(5):
        args = (i, how_many, counter)
        thread = Thread(target=func, args=args)
        threads.append(thread)
        thread.start()

    for thread in threads:
        thread.join()

def worker(sensor_index, how_many, counter):
    for _ in range(how_many):
        # Read from the sensor
        counter.increment(1)

class LockingCounter(object):
    def __init__(self):
        self.lock = Lock()
        self.count = 0

    def increment(self, offset):
        with self.lock:
            self.count += offset
class Counter(object):
    def __init__(self):
        self.count = 0

    def increment(self, offset):
        self.count += offset

counter = LockingCounter()
how_many = 10**5
run_threads(worker, how_many, counter)
print('Counter should be %d, found %d' % (5 * how_many, counter.count))

counter = Counter()
how_many = 10**5
run_threads(worker, how_many, counter)
print('Counter should be %d, found %d' % (5 * how_many, counter.count))

Counter should be 500000, found 500000
Counter should be 500000, found 330139


In [46]:
# 1. Pipelines are a great way to organize sequences of work that run
#     concurrently using multiple Python threads.
# 2. Be aware of the many problems in building concurrent pipelines: busy
#     waiting, stopping workers, and memory explosion.
# 3. The Queue class has all of the facilities you need to build robust
#     pipelines: blocking operations, buffer sizes, and joining.
from collections import deque
from threading import Thread
from threading import Lock
from time import sleep
from queue import Queue
import time

def download(item):
    # print(item)
    # print("download()")
    pass


def resize(item):
    # print("resize()")
    # print(item)
    pass


def upload(item):
    # print("upload()")
    # print(item)
    pass

class ClosableQueue(Queue):
    SENTINEL = object()

    def close(self):
        self.put(self.SENTINEL)

    def __iter__(self):
        while True:
            item = self.get()
            try:
                if item is self.SENTINEL:
                    return  # Cause the thread to exit
                yield item
            finally:
                self.task_done()
class StoppableWorker(Thread):
    def __init__(self, func, in_queue, out_queue):
        super().__init__()
        self.func = func
        self.in_queue = in_queue
        self.out_queue = out_queue
        self.polled_count = 0
        self.work_done = 0

    def run(self):
        for item in self.in_queue:
            result = self.func(item)
            self.out_queue.put(result)

# Here, I re-create the set of worker threads using the new worker class:

download_queue = ClosableQueue()
resize_queue = ClosableQueue()
upload_queue = ClosableQueue()
done_queue = ClosableQueue()

threads = [
    StoppableWorker(download, download_queue, resize_queue),
    StoppableWorker(resize, resize_queue, upload_queue),
    StoppableWorker(upload, upload_queue, done_queue),
]

# After running the worker threads like before, I also send the stop signal
# once all the input work has been injected by closing the input queue of the
# first phase.

for thread in threads:
    thread.start()

for _ in range(1000):
    download_queue.put(object())

download_queue.close()
download_queue.join()
resize_queue.close()
resize_queue.join()
upload_queue.close()
upload_queue.join()
print(done_queue.qsize(), 'item finished')

1000 item finished


In [48]:
from collections import namedtuple

def minimize():
    current = yield
    while True:
        value = yield current
        current = min(value, current)


# The code consuming the generator can run one step at a time and will output
# the minimum value seen after each input.

it = minimize()
next(it)             # Prime the generator
print(it.send(10))
print(it.send(4))
print(it.send(22))
print(it.send(-1))

10
4
4
-1


In [61]:

ALIVE = '*'
EMPTY = '-'


Query = namedtuple('Query', ('y', 'x'))

def count_neighbors(y, x):
    n_ = yield Query(y + 1, x + 0)  # North
    ne = yield Query(y + 1, x + 1)  # Northeast
    e_ = yield Query(y + 0, x + 1)  # East
    se = yield Query(y - 1, x + 1)  # Southeast
    s_ = yield Query(y - 1, x + 0)  # South
    sw = yield Query(y - 1, x - 1)  # Southwest
    w_ = yield Query(y + 0, x - 1)  # West
    nw = yield Query(y + 1, x - 1)  # Northwest

    neighbor_states = [n_, ne, e_, se, s_, sw, w_, nw]
    count = 0
    for state in neighbor_states:
        if state == ALIVE:
            count += 1
    return count


# I can drive the count_neighbors coroutine with fake data to test it. Here, I
# show how Query objects will be yielded for each neighbor. count_neighbors
# expects to receive cell states corresponding to each Query through the
# coroutine's send method. The final count is returned in the StopIteration
# exception that is raised when the generator is exhausted by the return
# statement.



# The step_cell coroutine receives its coordinates in the grid as arguments.
# It yields a Query to get the initial state of those coordinates. It runs
# count_neighbors to inspect the cells around it. It runs the game logic to
# determine what state the cell should have for the next clock tick. Finally,
# it yields a Transition object to cell the environment the cell's next state.


# def game_logic(state, neighbors):
#     return state

def game_logic(state, neighbors):
    if state == ALIVE:
        if neighbors < 2:
            return EMPTY          # Die: Too few
        elif neighbors > 3:
            return EMPTY          # Die: Too many
    else:
        if neighbors == 3:
            return ALIVE          # Regenerate
    return state


def step_cell(y, x):
    state = yield Query(y, x)
    neighbors = yield from count_neighbors(y, x)
    next_state = game_logic(state, neighbors)
    yield Transition(y, x, next_state)


# Importantly, the call to count_neighbors uses the yield from expression.
# This expression allows Python to compose generator coroutines together,
# making it easy to reuse smaller pieces of functionality and build complex
# coroutines from simpler ones. When count_neighbors is exhausted, the final
# value it returns (with the return statement) will be passed to step_cell as
# the result of the yield from expression.


# The goal of the game is to run this logic for a whole grid of cells in
# lockstep. To do this, I can further compose the step_cell coroutine into a
# simulate coroutine. This coroutine progresses the grid of cells forward by
# yielding from step_cell many times. After progressing every coordinate, it
# yields a TICK object to indicate that the current generation of cells have
# all transitioned.


TICK = object()


def simulate(height, width):
    while True:
        for y in range(height):
            for x in range(width):
                yield from step_cell(y, x)
        yield TICK


# What's impressive about simulate is that it's completely disconnected from
# the surrounding environment. I still haven't defined how the grid is
# represented in Python objects, how Query, Transition, and TICK values are
# handled on the outside, nor how the game gets its initial state. But the
# logic is clear. Each cell will transition by running step_cell. Then the
# game clock will tick. This will continue forever, as long as the simulate
# coroutine is advanced.


# This is the beauty of coroutines. They help you focus on the logic of what
# you're trying to accomplish. They decouple your code's instructions for the
# environment from the implementation that carries out your wishes. This also
# allows you to improve the implementation of following those instructions
# over time without changing the coroutines.

# Now, I want to run simulate in a real environment. To do that, I need to
# represent the state of each cell in the grid. Here, I define a class to
# contain the grid:


class Grid(object):
    def __init__(self, height, width):
        self.height = height
        self.width = width
        self.rows = []
        for _ in range(self.height):
            self.rows.append([EMPTY] * self.width)

    def __str__(self):
        str = ''
        for i in range(self.height):
            for j in range(self.width):
                str += self.query(i, j)
                # print(self.query(i, j))
            str += '\n'
            # print('\n')
        return str

# The grid allows you to get and set the value of any coordinate. Coordinates
# that are out of bounds will wrap around, making the grid act like infinite
# looping space.

    def query(self, y, x):
        return self.rows[y % self.height][x % self.width]

    def assign(self, y, x, state):
        self.rows[y % self.height][x % self.width] = state

# At last, I can define the function that interprets the values yielded from
# simulate and all of its interior coroutines. This function turns the
# instructions from the coroutines into interactions with the surrounding
# environment. It progress the whole grid of cells forward a single step and
# then returns a new grid containing the next state.


def live_a_generation(grid, sim):
    progeny = Grid(grid.height, grid.width)
    item = next(sim)
    while item is (not TICK):
        if isinstance(item, Query):
            state = grid.query(item.y, item.x)
            item = sim.send(state)
        else:
            progeny.assign(item.y, item.x, item.state)
            item.next(sim)
    return progeny


# To see this function in action, I need to create a grid and set its initial
# state. Here, I make a classic shape called a glider.

class ColumnPrinter(object):
    def __init__(self):
        self.height = 0
        self.width = 0
        self.string = []
        self.times = 0

    def append(self, str_grid):
        if self.string == []:
            self.string = str_grid.split('\n')
            self.height = len(self.string) - 1
            self.width = len(self.string[0])
        else:
            str_grid_ = str_grid.split('\n')
            height_ = len(str_grid_) - 1
            width_ = len(str_grid_[0])
            assert height_ == self.height
            assert width_ == self.width
            for i in range(self.height):
                self.string[i] += '|'
                self.string[i] += str_grid_[i]

        self.times += 1

    def __str__(self):
        # head
        head = ""
        for i in range(self.width * self.times + (self.times - 1)):
            number = int(i + (self.width+1)/2 + 1)
            if (i+1) % (self.width + 1) == 0 and i > 0:
                head += '|'
            elif number % (self.width + 1) == 0:
                head += str(int(number/self.width))
            else:
                head += ' '
        # body
        printer = head + '\n'
        for i in range(self.height):
            printer += (self.string[i] + '\n')
        return printer


grid = Grid(5, 9)
grid.assign(0, 3, ALIVE)
grid.assign(1, 4, ALIVE)
grid.assign(2, 2, ALIVE)
grid.assign(2, 3, ALIVE)
grid.assign(2, 4, ALIVE)
colums = ColumnPrinter()
sim = simulate(grid.height, grid.width)
for i in range(5):
    colums.append(str(grid))
    grid = live_a_generation(grid, sim)

print(colums)



# The best part about this approach is that I can change the game_logic
# function without having to update the code that surrounds it. I can change
# the rules or add larger spheres of influence with the existing machanics of
# Query, Transition, and TICK. This demonstrates how coroutines enable the
# separation of concerns, which is an important design principle.

# Things to remember

# 1. Coroutines provide an efficient way to run tens of thousands of functions
#    seemingly at the same time.
# 2. Within a generator, the value of the yield expression will be whatever
#    value was passed to the generator's send method from the exterior code.
# 3. Coroutines give you a powerful tool for separating the core logic of your
#    program from its interaction with the surrounding environment.
# 4. Python 2 doesn't support yield from or returning values from generators.

    1    |    2    |    3    |    4    |    5    
---*-----|---------|---------|---------|---------
----*----|---------|---------|---------|---------
--***----|---------|---------|---------|---------
---------|---------|---------|---------|---------
---------|---------|---------|---------|---------



In [65]:
# 1. Moving CPU bottlenecks to C-extension modules can be an effective way to
#    improve performance while maximizing your investment in Python code.
#    However, the cost of doing so is high and may introduce bugs.
# 2. The multiprocessing module provides powerful tools that can parallelize
#    certain types of Python computation with minimal effort.
# 3. The power of multiprocessing is best accessed through the
#    concurrent.futures built-in module and its simple ProcessPoolExecutor
#    class.
# 4. The advanced parts of the multiprocessing module should be avoided
#    because they are so complex.
import time
from concurrent.futures import ThreadPoolExecutor
from concurrent.futures import ProcessPoolExecutor

def gcd(pair):
    a, b = pair
    low = min(a, b)
    for i in range(low, 0, -1):
        if a % i == 0 and b % i == 0:
            return i


# Running this function in serial takes a linearly increasing amount of time
# because there is no parallelism.

numbers = [(1963309, 2265973), (2030677, 3814172),
           (1551645, 2229620), (2039045, 2020802)]
start = time.time()
results = list(map(gcd, numbers))
end = time.time()
print('Took %.3f seconds' % (end - start))

start = time.time()
pool = ThreadPoolExecutor(max_workers=2)
results = list(pool.map(gcd, numbers))
end = time.time()
print('Took %.3f seconds' % (end - start))
# Took 0.664 seconds

start = time.time()
pool = ProcessPoolExecutor(max_workers=2)  # the one change
results = list(pool.map(gcd, numbers))
end = time.time()
print('Took %.3f seconds' % (end - start))
# Took 0.357 seconds

Took 0.555 seconds
Took 0.615 seconds
Took 0.690 seconds


# 6. Built-in Modules

In [72]:
# 1. Decorators are Python syntax for allowing one function to modify another
#    function at runtime.
# 2. Using decorators can cause strange behaviors in tools that do
#    introspection, such as debuggers.
# 3. Use the wraps decorator from the functools built-in module when you
#    define your own decorators to avoid any issues.
from functools import wraps

def trace(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        result = func(*args, **kwargs)
        print('%s(%r, %r) -> %r' %
              (func.__name__, args, kwargs, result))
        return result
    return wrapper

@ trace
def fibonacci(n):
    """Return the n-th Fibonacci number"""
    if n in (0, 1):
        return n
    return fibonacci(n - 2) + fibonacci(n - 1)

fibonacci(5)
help(fibonacci)

fibonacci((1,), {}) -> 1
fibonacci((0,), {}) -> 0
fibonacci((1,), {}) -> 1
fibonacci((2,), {}) -> 1
fibonacci((3,), {}) -> 2
fibonacci((0,), {}) -> 0
fibonacci((1,), {}) -> 1
fibonacci((2,), {}) -> 1
fibonacci((1,), {}) -> 1
fibonacci((0,), {}) -> 0
fibonacci((1,), {}) -> 1
fibonacci((2,), {}) -> 1
fibonacci((3,), {}) -> 2
fibonacci((4,), {}) -> 3
fibonacci((5,), {}) -> 5
Help on function fibonacci in module __main__:

fibonacci(n)
    Return the n-th Fibonacci number



In [82]:
from threading import Lock
import logging
from contextlib import contextmanager

@contextmanager
def log_level(level, name):
    logger = logging.getLogger(name)
    old_level = logger.getEffectiveLevel()
    logger.setLevel(level)
    try:
        yield logger
    finally:
        logger.setLevel(old_level)
        
with log_level(logging.DEBUG, 'my_log') as logger:
    logger.debug('This is my message!')
    logging.debug('This will not print')


# After the with statement exits, calling debug logging methods on the Logger
# named 'my-log' will not print anything because the default logging severity
# level has been restored. Error log messages will always print.

logger = logging.getLogger('my_log')
logger.debug('Debug will not print')
logger.error('Error will print')

DEBUG:my_log:This is my message!
ERROR:my_log:Error will print


In [84]:
# 1. Avoid using the time module for translating between different time zones.
# 2. Use the datetime built-in module along with the pytz module to reliably
#    convert between times in different time zones.
# 3. Always represent time in UTC and do conversations to local time as the
#    final step before presentation.
from time import localtime, strftime
from datetime import datetime, timezone
import pytz

time_format = '%Y-%m-%d %H:%M:%S'
arrival_nyc = '2014-05-01 23:33:24'
nyc_dt_naive = datetime.strptime(arrival_nyc, time_format)
eastern = pytz.timezone('US/Eastern')
nyc_dt = eastern.localize(nyc_dt_naive)
utc_dt = pytz.utc.normalize(nyc_dt.astimezone(pytz.utc))
print(utc_dt)
# 2014-05-02 03:33:24+00:00

# Once I have a UTC datetime, I can convert it to San Francisco local time.

pacific = pytz.timezone('US/Pacific')
sf_dt = pacific.normalize(utc_dt.astimezone(pacific))
print(sf_dt)
# 2014-05-01 20:33:24-07:00

# Just as easily, I can convert it to the local time in Nepal.

nepal = pytz.timezone('Asia/Katmandu')
nepal_dt = nepal.normalize(utc_dt.astimezone(nepal))
print(nepal_dt)

2014-05-02 03:33:24+00:00
2014-05-01 20:33:24-07:00
2014-05-02 09:18:24+05:45


In [86]:
from decimal import Decimal
from decimal import ROUND_UP

rate = Decimal('0.05')
seconds = Decimal('5')
cost = rate * seconds / Decimal('60')
print(cost)
# 0.004166666666666666666666666667

# But the quantize behavior ensures that this is rounded up to one whole cent.

rounded = cost.quantize(Decimal('0.01'), rounding=ROUND_UP)
print(rounded)
# 0.01

0.004166666666666666666666666667
0.01


# 7. Collaboration

In [88]:
# 1. Write documentation for every module, class and function using
#    docstrings. Keep them up to date as your code changes.
# 2. For modules: introduce the contents of the module and any important
#    classes or functions all users should know about.
# 3. For classes: document behavior, important attributes, and subclass
#    behavior in the docstring following the class statement.
# 4. For functions and methods: document every argument, returned value,
#    raised exception, and other behaviors in the docstring following the
#    def statement.

# 1. Defining root exceptions for your modules allows API consumers to
#    insulate themselves from your API.
# 2. Catching root exceptions can help you find bugs in code that consumes an
#    API.
# 3. Catching the Python Exception base class can help you find bugs in API
#    implementations.
# 4. Intermediate root exceptions let you add more specific types of
#    exceptions in the future without breaking your API consumers.

# 1. Circular dependencies happen when two modules must call into each other
#    at import time. They can cause your program to crash at startup.
# 2. The best way to break a circular dependency is refactoring mutual
#    dependencies into a separate module at the bottom of the dependency tree.
# 3. Dynamic imports are the simplest solution for breaking a circular
#    dependency between modules while minimizing refactoring and complexity.
"""Library for test words for various linguistic patterns.
Testing how words relate to each other can be tricky sometimes! This module
provides easy ways to determine when words you've found have special
properties.
Available functions:
- palindrome: Determine if a word is a palindrome.
- check_anagram: Determine if two words are anagrams.
...
"""
# ...

class Player(object):
    """Represents a player of the game.
    Subclasses may override the 'tick' method to provide custom animations for
    the player's movement depending on their power level. etc.
    Public attributes:
    - power: Unused power-ups (float between 0 and 1).
    - coins: Coins found during the level (integer).
    """
    # ...
    pass
def find_anagrms(word, dictionary):
    """Find all anagrams for a word.
    This function only runs as fast as the test for membership in the
    'dictionary' container. It will be slow if the dictionary is a list and
    fast if it's a set.
    Args:
        word: String of the target word.
        dictionary: Container with all strings that are known to be actual
        words.
    Returns:
        List of anagrams that were found. Empty if none were found.
    """
    # ...
    pass

help(Player)
help(find_anagrms)   

Help on class Player in module __main__:

class Player(builtins.object)
 |  Represents a player of the game.
 |  Subclasses may override the 'tick' method to provide custom animations for
 |  the player's movement depending on their power level. etc.
 |  Public attributes:
 |  - power: Unused power-ups (float between 0 and 1).
 |  - coins: Coins found during the level (integer).
 |  
 |  Data descriptors defined here:
 |  
 |  __dict__
 |      dictionary for instance variables (if defined)
 |  
 |  __weakref__
 |      list of weak references to the object (if defined)

Help on function find_anagrms in module __main__:

find_anagrms(word, dictionary)
    Find all anagrams for a word.
    This function only runs as fast as the test for membership in the
    'dictionary' container. It will be slow if the dictionary is a list and
    fast if it's a set.
    Args:
        word: String of the target word.
        dictionary: Container with all strings that are known to be actual
        word

In [89]:
# 1. Virtual environment allow you to use pip to install many different
#    versions of the same package on the same machine without conflicts.
# 2. Virtual environments are created with pyvenv, enabled with source
#    bin/activate, and disabled with deactivate.
# 3. You can dump all of the requirements of an environment with pip freeze.
#    You can reproduce the environment by supplying the requirements.txt file
#    to pip install -r.
# 4. In versions of Python before 3.4, the pyvenv tool must be downloaded and
#    installed separately. The command-line tool is called virtualenv instead
#    of pyvenv.

# 8. Production

In [None]:
# 1. Programs often need to run in multiple deployment environments that each
#    have unique assumptions and configurations.
# 2. You can tailor a module's contents to different deployment environments
#    by using normal Python statements in module scope.
# 3. Module contents can be the product of any external condition, including
#    host introspection through the sys and os modules.

In [90]:
# 1. Calling print on built-in Python types will produce the human-readable
#    string version of a value, which hides type information.
# 2. Calling repr on built-in Python types will produce the printable string
#    version of a value. These repr strings could be passed to the eval
#    built-in function to get back the original value.
# 3. %s in format strings will produce human-readable strings like str.%r will
#    produce printable strings like repr.
# 4. You can define the __repr__ method to customize the printable
#    representation of a class and provide more detailed debugging
#    information.
# 5. You can reach into any object's __dict__ attribute to view its internals.
a = '\x07'
print(repr(a))
b = eval(repr(a))
assert a == b

print(repr(5))
print(repr('5'))

class OpaqueClass(object):
    def __init__(self, x, y):
        self.x = x
        self.y = y


obj = OpaqueClass(1, 2)
print(obj)
print(repr(obj))

class BetterClass(object):
    def __init__(self, x, y):
        self.x = x
        self.y = y

    def __repr__(self):
        return 'BetterClass(%d, %d)' % (self.x, self.y)


# Now, the repr value is much more useful.

obj = BetterClass(1, 2)
print(obj)
print(repr(obj))
# BetterClass(1, 2)
# BetterClass(1, 2)

# When you don't have control over the class definition, you can reach into
# the object's instance dictionary, which is stored in the __dict__
# attribute. Here, I print out the contents of an OpaqueClass instance:

obj = OpaqueClass(4, 5)
print(obj.__dict__)

'\x07'
5
'5'
<__main__.OpaqueClass object at 0x7f61f701a250>
<__main__.OpaqueClass object at 0x7f61f701a250>
BetterClass(1, 2)
BetterClass(1, 2)
{'x': 4, 'y': 5}


In [92]:
from unittest import TestCase, main
from tempfile import TemporaryDirectory

# 1. The only way to have confidence in a Python program is to write tests.
# 2. The unittest built-in module provides most of the facilities you'll need
#    to write good tests.
# 3. You can define tests by subclassing TestCase and defining one method per
#    behavior you'd like to test. Test methods on TestCase classes must start
#    with the word test.
# 4. It's important to write both unit tests (for isolated functionality) and
#    integration tests (for modules that interact).
def to_str(data):
    if isinstance(data, str):
        return data
    elif isinstance(data, bytes):
        return data.decode('utf-8')
    else:
        raise TypeError('Must supply str or bytes, '
                        'found: %r' % data)
        
class UtilsTestCase(TestCase):
    def test_to_str_bytes(self):
        self.assertEqual('hello', to_str(b'hello'))

    def test_to_str_str(self):
        self.assertEqual('hello', to_str('hello'))

    def test_to_str_bad(self):
        self.assertRaises(TypeError, to_str, object())


class MyTest(TestCase):
    def setUp(self):
        self.test_dir = TemporaryDirectory()

    def tearDown(self):
        self.test_dir.cleanup()

    def test_to_str_bytes(self):
        self.assertEqual('hello', to_str(b'hello'))

    def test_to_str_str(self):
        self.assertEqual('hello', to_str('hello'))

    def test_to_str_bad(self):
        self.assertRaises(TypeError, to_str, object())

In [98]:
# 1. ncalls: The number of calls to the function during the profiling period.
# 2. tottime: The number of seconds spent executing the function, excluding
#    time spent executing other functions it calls.
# 3. totime percall: The average number of seconds spents in the function each
#    time it was called, executing time spent executing other functions it
#    calls. This is tottime divided by ncalls.
# 4. cumtime: The cumulative number of seconds spent executing the function,
#    including time spent in all other function it calls.
# 5. cumtime percall: The average number of seconds spent in the function each
#    time it was called, including time spent in all other functions it calls.
#    This is cumtime divided by ncalls.


from random import randint
from profile import Profile
from pstats import Stats
from bisect import bisect_left

def insert_value_bi(array, value):
    i = bisect_left(array, value)
    array.insert(i, value)


# I can run the profiler again and generate a new table of profiler
# statistics. The new function is much faster, with a cumulative time spent
# that is nearly 100 times smaller than the previous insert_value function.


def insertion_sort_bi(data):
    result = []
    for value in data:
        insert_value_bi(result, value)
    return result

max_size = 10**4
data = [randint(0, max_size) for _ in range(max_size)]
print(data)
test_bi = lambda: insertion_sort_bi(data)
profiler_bi = Profile()
profiler_bi.runcall(test_bi)
stats_bi = Stats(profiler_bi)
stats_bi.strip_dirs()
stats_bi.sort_stats('cumulative')
stats_bi.print_stats()
stats_bi.print_callers()

# 1. It's import to profile Python programs before optimizing because the
#    source of slowdowns is often obscure.
# 2. Use the cProfile module instead of the profile module because it provides
#    more accurate profiling information.
# 3. The Profile object's runcall method provides everything you need to
#    profile a tree of function calls in isolation.
# 4. The Stats object lets you select and print the subset of profiling
#    information you need to see to understand your program's performance.

[9247, 795, 3765, 6532, 5790, 2231, 6794, 7128, 7199, 3584, 5634, 4929, 9147, 1701, 2433, 6181, 9136, 3690, 6257, 235, 1657, 8658, 1800, 7074, 6324, 5565, 9912, 3491, 7711, 5425, 8444, 8686, 2661, 5833, 3924, 6234, 4595, 4736, 9490, 9970, 9921, 441, 9505, 7432, 6934, 9683, 1864, 6187, 5376, 6432, 4112, 1376, 4975, 8158, 1170, 867, 7413, 1129, 542, 4811, 3434, 7658, 2153, 9679, 5063, 9766, 9329, 7667, 5157, 5632, 9300, 9657, 2549, 1696, 4139, 2709, 122, 4192, 831, 563, 1193, 9462, 7102, 8051, 2557, 4762, 6283, 1373, 4899, 4964, 7502, 3446, 6437, 2209, 7694, 2958, 9517, 7273, 7755, 4316, 8620, 4496, 4326, 9947, 3918, 8974, 2076, 1994, 6644, 2353, 8673, 6962, 2734, 3528, 3069, 9378, 5089, 4779, 341, 1738, 2873, 8698, 3044, 7949, 7035, 5185, 298, 8029, 783, 5103, 3167, 9204, 6752, 2767, 6641, 217, 3009, 7626, 5212, 4330, 4232, 1732, 8965, 1690, 8389, 2877, 7305, 5721, 8073, 466, 679, 4412, 9322, 1649, 5040, 9654, 4374, 5659, 9485, 9068, 7232, 5652, 9430, 135, 5832, 1166, 4333, 3232, 6900, 

<pstats.Stats at 0x7f61f6fa2c50>

In [101]:
# 1. It can be difficult to understand how Python programs use and leak
#    memory.
# 2. The gc module can help you understand which objects exist, but it has no
#    information about how they were allocated.
# 3. The tracemalloc built-in module provides powerful tools for understanding
#    the source of memory usage.
# 4. tracemalloc is only available in Python 3.4 and above.
import tracemalloc

def run():
    a = []
    for i in range(100000):
        c = i**2 + 1
        a.append(10 * 230 * i)
    return a

tracemalloc.start(10)   # Save up to 10 stack frames

time1 = tracemalloc.take_snapshot()
x = run()
time2 = tracemalloc.take_snapshot()

stats = time2.compare_to(time1, 'lineno')
for stat in stats[:3]:
    print(stat)

<ipython-input-101-6ea6c4261695>:7: size=3539 KiB (+3539 KiB), count=100000 (+100000), average=36 B
<ipython-input-100-4f8af04bfada>:10: size=0 B (-3539 KiB), count=0 (-100000)
/usr/lib/python3.7/json/decoder.py:353: size=2552 B (-6254 B), count=28 (-91), average=91 B


In [104]:
import gc
found_objects = gc.get_objects()
print('%d objects before' % len(found_objects))
x = run()
found_objects = gc.get_objects()
print('%d objects after' % len(found_objects))
for obj in found_objects[:3]:
    print(repr(obj)[:100])

115006 objects before
115036 objects after
(('/usr/local/lib/python3.7/dist-packages/zmq/eventloop/zmqstream.py', 431), ('/usr/local/lib/python
<Traceback (<Frame filename='/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py' lineno
(('/usr/local/lib/python3.7/dist-packages/tornado/stack_context.py', 300), ('/usr/local/lib/python3.


In [103]:
import tracemalloc
tracemalloc.start(10)   # Save up to 10 stack frames

time1 = tracemalloc.take_snapshot()
x = run()
time2 = tracemalloc.take_snapshot()

stats = time2.compare_to(time1, 'traceback')
top = stats[0]
print('\n'.join(top.traceback.format()))

  File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 283
    return self.dispatch_shell(stream, msg)
  File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 233
    handler(stream, idents, msg)
  File "/usr/local/lib/python3.7/dist-packages/ipykernel/kernelbase.py", line 399
    user_expressions, allow_stdin)
  File "/usr/local/lib/python3.7/dist-packages/ipykernel/ipkernel.py", line 208
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.7/dist-packages/ipykernel/zmqshell.py", line 537
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2718
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.7/dist-packages/IPython/core/interactiveshell.py", line 2822
    if self.run_code(code, result):
  File "/usr/local/lib/python3.7/dist-packages/IPy