In [9]:
#解压可迭代对象赋值给多个变量
records = [
    ('foo', 1, 2),
    ('bar', 'hello'),
    ('foo', 3, 4),
]
def do_foo(x, y):
    print('foo', x, y)
def do_bar(s):
    print('bar', s)
for tag, *args in records:
    if tag == 'foo':
        do_foo(*args)
    elif tag == 'bar':
        do_bar(*args)
items = [1,23,456,789]
head, *tails = items
print ('head: '+str(head))
print ('tails '+str(tails))

foo 1 2
bar hello
foo 3 4
head: 1
tails [23, 456, 789]


In [2]:
# 借助队列保留最后的N个元素
from collections import deque
#maxlen 指定了最多的元素个数
# 最老的元素会被替代
q = deque(maxlen=3)
q.append(1)
q.append(2)
q.append(3)
q.append(4)
print(q)

deque([2, 3, 4], maxlen=3)


In [14]:
#利用堆查找最大或最小的N个元素
'''当要查找的元素个数相对比较小的时候，函数 nlargest() 和 nsmallest() 是很
合适的。如果你仅仅想查找唯一的最小或最大 (N=1) 的元素的话，那么使用 min() 和
max() 函数会更快些。类似的，如果 N 的大小和集合大小接近的时候，通常先排序这
个集合然后再使用切片操作会更快点 ( sorted(items)[:N] 或者是 sorted(items)[-
N:] )。需要在正确场合使用函数 nlargest() 和 nsmallest() 才能发挥它们的优势'''
import heapq
portfolio = [
    {'name': 'IBM', 'shares': 100, 'price': 91.1},
    {'name': 'AAPL', 'shares': 50, 'price': 543.22},
    {'name': 'FB', 'shares': 200, 'price': 21.09},
    {'name': 'HPQ', 'shares': 35, 'price': 31.75},
    {'name': 'YHOO', 'shares': 45, 'price': 16.35},
    {'name': 'ACME', 'shares': 75, 'price': 115.65}
]
#根据价格返回目标
cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
print (cheap)
print (expensive)

[{'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.75}]
[{'name': 'AAPL', 'shares': 50, 'price': 543.22}, {'name': 'ACME', 'shares': 75, 'price': 115.65}, {'name': 'IBM', 'shares': 100, 'price': 91.1}]


In [17]:
# 利用 heapq 实现一个优先级队列
import heapq
class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0
    def push(self, item, priority):
        # 优先级相同的按照index排序，而index一定不同
        heapq.heappush(self._queue, (-priority, self._index, item))
        self._index += 1
    def pop(self):
        return heapq.heappop(self._queue)[-1]

class Item:
    def __init__(self, name):
        self.name = name
    def __repr__(self):
        return 'Item({!r})'.format(self.name)

q = PriorityQueue()
q.push(Item('foo'), 1)
q.push(Item('bar'), 5)
q.push(Item('spam'), 4)
q.push(Item('grok'), 1)
print(q.pop())
print(q.pop())
print(q.pop())
print(q.pop())

Item('bar')
Item('spam')
Item('foo')
Item('grok')


In [7]:
#字典中的键映射多个值
pairs=[('a',1),('a',2),('b',3)]
d={}
for k,v in pairs:
    if k not in d:
        d[k]=[]
    d[k].append(v)
d

{'a': [1, 2], 'b': [3]}

In [21]:
# 借助multidict
from collections import defaultdict
pairs=[('a',1),('a',2),('b',3)]
dd = defaultdict(list)
for k,v in pairs:
    dd[k].append(v)
dd

defaultdict(list, {'a': [1, 2], 'b': [3]})

In [14]:
# 字典排序 OrderedDict
# index 是有序的
# 可以在构建json时使用
# 不足： 因为内部维护着另外的链表，od的大小是普通字典的两倍
from collections import OrderedDict
import sys
od = OrderedDict()
d={}
od['a']=1
od['b']=0
od['c']=0
od['d']=8
od['e']=6
d['a']=1
d['b']=0
d['c']=0
d['d']=8
d['e']=6
print(sys.getsizeof(od))
print(sys.getsizeof(d))

528
240


In [16]:
#对字典上的值执行普通数学运算
#可以用lambda指明key
#也可以用zip()函数
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}
#min_price = prices[min(prices, key=lambda k: prices[k])]
min_price = min(zip(prices.values(),prices.keys()))
max_price = max(zip(prices.values(),prices.keys()))
print('min:{},max:{}'.format(min_price,max_price))

min:(10.75, 'FB'),max:(612.78, 'AAPL')


In [None]:
# zip()返回一个迭代器，只能迭代一次
prices_and_names = zip(prices.values(), prices.keys())
print(min(prices_and_names)) # OK
print(max(prices_and_names)) # ValueError: max() arg is an empty sequence

In [23]:
min(prices.values())

10.75

In [38]:
a = {
'x' : 1,
'y' : 2,
'z' : 3
}
b = {
'w' : 10,
'x' : 11,
'y' : 2
}
a.items() | b.items()
# a.items() & b.items()
a.keys() - b.keys()
type(a.keys())

dict_keys

In [42]:
#在一个序列上面保持元素顺序的同时消除重复的值
li=[1,5,2,1,99,9,1,5,10]
#set可以去重，但是里面的值变成升序的了
set(li)
#而且只适用于序列元素可哈希的情况

{1, 2, 5, 9, 10, 99}

In [45]:
def dedupe(items):
    seen=[]
    for item in items:
        if item not in seen:
            seen.append(item)
    return seen
list(dedupe(li))

[1, 5, 2, 99, 9, 10]

In [53]:
def dedupe2(items):
    #seen=set()
    seen=[]
    for item in items:
        if item not in seen:
            yield item
            #seen.add(item)
            seen.append(item)
list(dedupe2(li))

[1, 5, 2, 99, 9, 10]

In [55]:
#对于序列元素不可哈希的情况
def dedupe3(items, key=None):
    seen = set()
    for item in items:
        val = item if key is None else key(item)
        if val not in seen:
            yield item
            seen.add(val)
a3 = [ {'x':1, 'y':2}, {'x':1, 'y':3}, {'x':1, 'y':2}, {'x':2, 'y':4}]
list(dedupe3(a3, key=lambda d: (d['x'],d['y'])))

[{'x': 1, 'y': 2}, {'x': 1, 'y': 3}, {'x': 2, 'y': 4}]

In [58]:
#命名切片 使代码更加易读
items = [0, 1, 2, 3, 4, 5, 6]
a = slice(2, 4)
print(items[a])
items[a] = [6,5]
print(items)
del items[a]
print(items)

[2, 3]
[0, 1, 6, 5, 4, 5, 6]
[0, 1, 4, 5, 6]


In [8]:
#序列中出现次数最多的元素
#优先选择Counter,而不是手动的利用字典去实现
#
words = [
'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
'my', 'eyes', "you're", 'under'
]
morewords = ['why','are','you','not','looking','in','my','eyes']
from collections import Counter
a = Counter(words)
b = Counter(morewords)
# 出现频率最高的 3 个单词
top_three = a.most_common(3)
print(top_three)
# a
# b
# Counter可以和数学运算相结合
print(a+b)

[('eyes', 8), ('the', 5), ('look', 4)]
Counter({'eyes': 9, 'the': 5, 'look': 4, 'my': 4, 'into': 3, 'not': 2, 'around': 2, "don't": 1, "you're": 1, 'under': 1, 'why': 1, 'are': 1, 'you': 1, 'looking': 1, 'in': 1})


In [12]:
#通过某个关键字排序一个字典列表
#使用operator 模块的 itemgetter 函数
rows = [
    {'fname':'Brian', 'lname': 'Jones', 'uid': 1003},
    {'fname':'David', 'lname': 'Beazley', 'uid': 1002},
    {'fname':'John', 'lname': 'Cleese', 'uid': 1001},
    {'fname':'Big', 'lname': 'Jones', 'uid': 1004},
]
from operator import itemgetter
rows_by_fname = sorted(rows, key=itemgetter('fname'))
#可以用lambda表达式替代，但是上面的方式运行更快
rows_by_uid = sorted(rows, key=lambda s:s['uid'])
print(rows_by_fname)
print('---------')
print(rows_by_uid)
min(rows, key=itemgetter('uid'))

[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]
---------
[{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}]


{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}

In [1]:
# 排序不支持原生比较的对象
# https://docs.python.org/3.5/howto/sorting.html#sortinghowto
# to specify a function to be called on each list element prior to making comparisons.
# key指定了list中的每一个元素,在进行比较之前，都要调用的函数,该函数接收单个参数，返回用于比较的键值。
# operator模块中定义了itemgetter(), attrgetter()和methodcaller()函数，比lambda函数跑得快
# key函数可以访问外部资源，不一定和待排序的数组有关
students = ['dave', 'john', 'jane']
newgrades = {'john': 'F', 'jane':'A', 'dave': 'C'}
sorted(students, key=newgrades.__getitem__)

['jane', 'dave', 'john']

In [10]:
#通过某个字段将记录分组
rows = [
{'address':'5412 N CLARK','date':'07/01/2012'},
{'address':'5800 N CLARK','date':'07/01/2012'},
{'address':'5412 N CLARK','date':'07/02/2012'},
{'address':'5800 E CLARK','date':'07/04/2012'},
{'address':'5412 N CLARK','date':'07/04/2012'},
{'address':'1060 W CLARK','date':'07/03/2012'},
{'address':'4801 N CLARK','date':'07/02/2012'},
]
#按date分组
from collections import defaultdict
rows_by_date = defaultdict(list)
for row in rows:
    rows_by_date[row['date']].append(row)
rows_by_date
#好处是可以按照日期随机访问

defaultdict(list,
            {'07/01/2012': [{'address': '5412 N CLARK', 'date': '07/01/2012'},
              {'address': '5800 N CLARK', 'date': '07/01/2012'}],
             '07/02/2012': [{'address': '5412 N CLARK', 'date': '07/02/2012'},
              {'address': '4801 N CLARK', 'date': '07/02/2012'}],
             '07/03/2012': [{'address': '1060 W CLARK', 'date': '07/03/2012'}],
             '07/04/2012': [{'address': '5800 E CLARK', 'date': '07/04/2012'},
              {'address': '5412 N CLARK', 'date': '07/04/2012'}]})

In [12]:
#使用group(),但是需要先按指定字段排序
from operator import itemgetter
from itertools import groupby
# Sort by the desired field first
rows.sort(key=itemgetter('date'))
# Iterate in groups
for date, items in groupby(rows, key=itemgetter('date')):
    print(date)
    for i in items:
        print(' ', i)

07/01/2012
  {'address': '5412 N CLARK', 'date': '07/01/2012'}
  {'address': '5800 N CLARK', 'date': '07/01/2012'}
07/02/2012
  {'address': '5412 N CLARK', 'date': '07/02/2012'}
  {'address': '4801 N CLARK', 'date': '07/02/2012'}
07/03/2012
  {'address': '1060 W CLARK', 'date': '07/03/2012'}
07/04/2012
  {'address': '5800 E CLARK', 'date': '07/04/2012'}
  {'address': '5412 N CLARK', 'date': '07/04/2012'}


In [13]:
#过滤序列元素
#列表推导|生成器表达式|filter|compress
#关键点在于先创建一个Boolean序列, 指示哪些元素复合条件。然后compress()函数根据这个序列去选择输出对应位置为True的元素
addresses = [
    '5412',
    '5148',
    '5800',
    '2122',
    '5645',
    '1060',
    '4801',
]
counts = [0, 3, 10, 4, 1, 7, 6]
from itertools import compress
more5 = [n > 5 for n in counts]
print(more5)
list(compress(addresses,more5))

[False, False, True, False, False, True, True]


['5800', '1060', '4801']

In [35]:
from time import time
#从字典中提取子集
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}
# Make a dictionary of all prices over 200
# 借助元组实现，但是很慢
# p1 = dict((key, value) for key, value in prices.items() if value > 200)
p1 = {key: value for key, value in prices.items() if value > 200}
# Make a dictionary of tech stocks
tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT'}
# 第一种实现比第二种慢，
#
t1 = time()
p2 = { key:prices[key] for key in prices.keys() & tech_names }
t2 = time()
p2 = {key: value for key, value in prices.items() if key in tech_names}
t3 = time()
print((t2-t1)/(t3-t2))

1.0479616306954436
