### 基础集合类型
* list
* set
* dict
    * orderedDict
    * defaultdict


In [12]:
# 一个字典生成器的示例
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}
# Make a dictionary of all prices over 200
{key: value for key, value in prices.items() if value > 200}

{'AAPL': 612.78, 'IBM': 205.55}

### 高级类型
* queue
    * queue
    * dequeue
    * priorityQueue
* headq 优先栈 可以用以实现priorityQueue


In [5]:
import heapq

class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0

    def push(self, item, priority):
        heapq.heappush(self._queue, (-priority, self._index, item))
        self._index += 1

    def pop(self):
        return heapq.heappop(self._queue)[-1]

In [18]:
import queue as Q
def PriorityQueue_tuple():
    que = Q.PriorityQueue()
    que.put((10,'ten'))
    que.put((1,'one'))
    que.put((10/2,'five'))
    while not que.empty():
        print(que.get())

#### 常见方法
* zip
* sorted
* filter
* slice
* enumerate


In [15]:
prices = {
    'ACME': 45.23,
    'AAPL': 612.78,
    'IBM': 205.55,
    'HPQ': 37.20,
    'FB': 10.75
}
sorted(zip(prices.values(), prices.keys()))

[(10.75, 'FB'),
 (37.2, 'HPQ'),
 (45.23, 'ACME'),
 (205.55, 'IBM'),
 (612.78, 'AAPL')]

In [18]:
sorted_key = sorted(prices,key=lambda k:prices[k])
sorted_key

['FB', 'HPQ', 'ACME', 'IBM', 'AAPL']

#### 其他常用包
* collections.Counter
* from operator import itemgetter
* from itertools import groupby

In [9]:
from operator import itemgetter
from itertools import groupby
rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]
# Sort by the desired field first
rows.sort(key=itemgetter('date'))
# Iterate in groups
for date, items in groupby(rows, key=itemgetter('date')):
    print(date)
    for i in items:
        print(' ', i)

07/01/2012
  {'address': '5412 N CLARK', 'date': '07/01/2012'}
  {'address': '4801 N BROADWAY', 'date': '07/01/2012'}
07/02/2012
  {'address': '5800 E 58TH', 'date': '07/02/2012'}
  {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
  {'address': '1060 W ADDISON', 'date': '07/02/2012'}
07/03/2012
  {'address': '2122 N CLARK', 'date': '07/03/2012'}
07/04/2012
  {'address': '5148 N CLARK', 'date': '07/04/2012'}
  {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}


In [14]:
# collections.namedtuple()示例
from collections import namedtuple
Subscriber = namedtuple('Subscriber', ['addr', 'joined'])
sub = Subscriber('jonesy@example.com', '2012-10-19')
print(sub)
print(sub.addr,end='\n')
print(sub.joined, end='\n')

Subscriber(addr='jonesy@example.com', joined='2012-10-19')
jonesy@example.com
2012-10-19


### 操作字符串
* re模块下`re.split` 更加复杂的分割
    * `(,|;|\s)` 与 `(?:,|;|\s)`的区别
* startswith()/endswith() Str内置方法
* [正则搜索和匹配](https://python3-cookbook.readthedocs.io/zh_CN/latest/c02/p04_match_and_search_text.html)
    * re.match的使用和复用

In [2]:
# format 输出 包含名称和格式 -> 提高代码的可读性
"name is {val:s}".format(val = "123")

'name is 123'

### 魔法函数
#### 生成器与迭代器

In [17]:
# 生成器函数作为参数传入
nums = [1,2,3]
s = sum(x * x for x in nums)

#### 生成器和迭代器
* 什么是迭代器和生成器
* 如何实现迭代器和生成器
* 反向迭代
* 暴露生成器状态 -> 将生成器包装到一个类中的 __iter__中，状态量就可以通过类的实例来访问了
* 迭代器切片 -> `itertools.islice()`

In [4]:
# 手动遍历生成器
itr = enumerate([1,2,3,4])
next(itr)

(0, 1)

In [5]:
# 手写一个生成器
def countdown(n):
    print('Starting to count from', n)
    while n > 0:
        yield n
        n -= 1
    print('Done!')

for i in countdown(2):
    print(i)

Starting to count from 2
2
1
Done!


In [12]:
# 在语句中增加 continue的筛选条件 跳过开头部分
from itertools import dropwhile
for i in dropwhile(lambda x: x > 3,countdown(6)):
    print(i)

Starting to count from 6
3
2
1
Done!


In [3]:
PriorityQueue_tuple()

(1, 'one')
(5.0, 'five')
(10, 'ten')


In [6]:
def find_max_substring(nums):
    if not nums:
        # check valid array
        return (-1,-1)
    dp,dp_index = 0,0
    max_value = 0
    max_index = [0,0]
    for index,number in enumerate(nums):
        # check wether the subproblem is optimal
        dp = max(dp + number,0)
        if max_value < dp:
            max_value = dp
            max_index = [dp_index,index]
        if dp == 0:
            # the index will be reset
            dp_index = index + 1
        else:
            # new number makes the 
            dp_index = dp_index
            # continue
            
    # if max_index[0] > len(nums)-1:
    #     return (-1,-1)

    return max_index

In [8]:
find_max_substring([1,2,3,-3,-4])

[0, 2]

### 装饰器的使用

In [2]:
def do_twice(func):
    def wrapper(*args,**kwargs):
        func(*args,**kwargs)
        x = func(*args,**kwargs)
        return x
    return wrapper

@do_twice
def say_hi(name):
    print(f"hello {name}")
    return name

#### 输入了函数的句柄，将句柄修改成了一个叫做wrapper的闭包，输出

In [3]:
say_hi("asd")

hello asd
hello asd


'asd'

In [4]:
# 可以看到函数的名称变了，经过装饰器的修饰，实际返回的是函数的闭包
print(say_hi.__name__)

wrapper


In [5]:
import functools

def do_twice(func):
    @functools.wraps(func)
    def wrapper(*args,**kwargs):
        func(*args,**kwargs)
        x = func(*args,**kwargs)
        return x
    return wrapper

@do_twice
def say_hi(name):
    print(f"hello {name}")
    return name

In [6]:
# 增加functools.wraps ， 保留原函数，使得返回不再是闭包
print(say_hi.__name__)

say_hi


In [5]:
find_valid_combo("DID")

5

### 数字和日期
* 浮点数：舍入，精度和格式化
* 负数运算
* 数据溢出，大整数，无穷大
* 随机选择
#### 日期
* 日期转换
* 格式化输出

In [13]:
# round 函数进行四舍五入 
# round(value, ndigits)
print("小数点后一位舍入  {}".format(round(1.23,1)))

# ndigits -1，-2，-3表示对10位，百位，千位的舍入
print("舍入百位 %r" % round(1627730,-2))

x=1.2345
y=1.2345
print("input = {} => 区别舍入 {} 和格式化 {} ".format(x, round(x,3), format(y,'0.3f')))
print("x = {} y = {}".format(x,y))

小数点后一位舍入  1.2
舍入百位 1627700
input = 1.2345 => 区别舍入 1.234 和格式化 1.234 
x = 1.2345 y = 1.2345


* 高精度浮点数
* 格式化数字输出

In [16]:
def find_sum_subarray(A):
    dim = len(A)
    # dp[i][j] for subarray start from i and end at i+j
    dp,res = [],0
    for i in A:
        res += i
        dp.append([i])
    for index,val in enumerate(A):
        for j in range(1,dim - index):
            dp[index].append(min(dp[index][-1],A[index+j]))
            res += dp[index][j]
    return res

left0  right3  up0  down2
[1, 2, 3]
[1, 2, 3, 4, 8]
[1, 2, 3, 4, 8, 12, 11, 10, 9, 5]


[1, 2, 3, 4, 8, 12, 11, 10, 9, 5, 6, 7]

In [67]:
a[0][3:0::-1]

SyntaxError: invalid syntax (<ipython-input-67-3bb7a18c9b38>, line 1)

### numpy 技巧
#### array 操作技巧
* 最大最小的值和角标
* sorting!

In [None]:
# min,max,rmin,rmax, argsort

In [None]:
# reshape 

### 矩阵变换
#### 创建
* 单元阵
* 对角阵
* 零阵
#### reshape
* reshape(3,4)
* reshape 中的-1 是对未知参数的补全