# 可迭代对象和迭代器、生成器

In [12]:
from collections.abc import Iterable, Iterator, Generator

# 迭代器和生成器的区别
# 如果说迭代器是以类为基础的单位产生器，那么生成器(generator)就是以函数为基础的单位产生器。
# 也就是说，迭代器和生成器都是一个值一个值得生产，每迭代一次，只能得到一个值，
# 所不同的是，迭代器需要在类中定义__iter__和__next__方法，在使用时需要创建迭代器的实例。而生成器是通过一个函数展现的，可以直接调用，
# 所以从某种意义上来说，生成器在使用上更简洁。
# 内置方法zip()、enumerate()、map()、filter()和reversed()等返回的迭代器
# range()返回的是迭代对象不是迭代器
li1 = range(10000)
li2 = range(10)
it = zip(li1, li2)
print(next(it))
print(isinstance(li1, Iterable), isinstance(it, Iterable))
print(isinstance(li1, Iterator), isinstance(it, Iterator))

(0, 0)
True True
False True


# Numpy

In [None]:
# Numpy，Numerical Python
# Nmupy方法比Python方法要快10到100倍，并且使用的内存也更少。

In [17]:
import numpy as np

my_arr = np.array(range(1000000))
my_list = list(range(1000000))

In [18]:
%time for _ in range(10): my_arr2 = my_arr * 2

Wall time: 28 ms


In [19]:
%time for _ in range(10): my_list2 = [x * 2 for x in my_list]

Wall time: 1.13 s


# 4.1.1 ndarray/多维数组对象

In [40]:
import numpy as np

arr1 = np.zeros((3, 4), dtype=int)  # 设定数据类型，python中int == np.int32, float == np.float64
print(arr1, type(arr1))  # ndarray是多维同类数据容器
print(arr1.ndim)  # 维度
print(arr1.shape)  # 每个维度的数量
print(arr1.dtype)  # 默认数据类型float64

[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]] <class 'numpy.ndarray'>
2
(3, 4)
int32


In [30]:
np.zeros((2, 3, 4))  # == (层，行，列)，由2个np.zeros((3, 2))组成

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]])

In [31]:
np.empty((2, 3, 4))  # empty可以创建一个没有初始化数值的数组

array([[[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]],

       [[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]]])

In [43]:
import random

int_array = np.arange(10)
calibers = np.random.rand(10)
print(int_array)
print(calibers)
new_array = int_array.astype(calibers.dtype)
print(int_array)
print(new_array)  # astype总是生成一个新的数组，即使你传入的dtype和之前一样

[0 1 2 3 4 5 6 7 8 9]
[0.72703733 0.43349754 0.93851466 0.2432287  0.43772368 0.5229145
 0.4435793  0.4472958  0.31190861 0.95824959]
[0 1 2 3 4 5 6 7 8 9]
[0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]


# 4.1.4 切片

In [None]:
# arr[:]不等价于arr.copy()（和python中的list不同），是arr的视图；
# numpy被设计成适合处理非常大的数组，持续复制数据则会引起很多内存问题。

In [102]:
# li = [[[i + j * 3 + k * 6 for i in range(1, 4)] for j in range(0, 2)] for k in range(0, 2)]
# li = [[[i + j + k for i in range(1, 4)] for j in range(0, 4, 3)] for k in range(0, 7, 6)]

import numpy as np
arr3d = np.array(li)
arr3d = np.arange(1, 13).reshape((2, 2, 3))  # 活用的reshape
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [45]:
old_values = arr3d[0].copy()
arr3d[0] = 99
arr3d

array([[[99, 99, 99],
        [99, 99, 99]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [46]:
arr3d[0] = old_values
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [47]:
arr3d[0, 1]  # == arr3d[0][1]

array([4, 5, 6])

In [53]:
slice1 = arr3d[:, 1, :2]  # 切片
slice1, slice1.shape

(array([[ 4,  5],
        [10, 11]]), (2, 2))

In [54]:
slice2 = arr3d[:, 1:2, :2]  # 多了一个维度
slice2, slice2.shape

(array([[[ 4,  5]],
 
        [[10, 11]]]), (2, 1, 2))

# 4.1.5 布尔索引

In [70]:
import numpy as np

names = np.array(["Bob", "Joe", "Will", "Bob", "Will", "Joe", "Joe"])
data = np.random.randn(7, 4)

names  # "U10" => 长度为10的Unicode类型

array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'], dtype='<U4')

In [71]:
data

array([[-0.71627879, -1.20651693, -0.44900824, -1.11467653],
       [ 0.64930013, -1.04715157,  0.37788995, -0.25570928],
       [-0.05397453,  0.72897994,  1.04621625,  1.54582988],
       [-0.84327586, -2.72187729,  0.68021665, -0.60791348],
       [-0.93792468, -0.33219255,  0.47343501,  0.52544135],
       [-1.10053   ,  0.06766763,  1.05487143, -0.82091682],
       [ 1.76285866, -0.70415732,  0.30885753, -0.1456462 ]])

In [72]:
names == "Joe"

array([False,  True, False, False, False,  True,  True])

In [73]:
data[names == "Joe"]  # names == "Joe"太长、太短都会报IndexError

array([[ 0.64930013, -1.04715157,  0.37788995, -0.25570928],
       [-1.10053   ,  0.06766763,  1.05487143, -0.82091682],
       [ 1.76285866, -0.70415732,  0.30885753, -0.1456462 ]])

In [74]:
data[names == "Joe", 1:3] 

array([[-1.04715157,  0.37788995],
       [ 0.06766763,  1.05487143],
       [-0.70415732,  0.30885753]])

In [78]:
cond = names == "Joe"
data[~cond]  # 等价表达式1
# data[~(names == "Joe")]  # 等价表达式2
# data[names != "Joe"]  # 等价表达式3

array([[-0.71627879, -1.20651693, -0.44900824, -1.11467653],
       [-0.05397453,  0.72897994,  1.04621625,  1.54582988],
       [-0.84327586, -2.72187729,  0.68021665, -0.60791348],
       [-0.93792468, -0.33219255,  0.47343501,  0.52544135]])

In [84]:
data[~(names == "Joe") & (names == "Will")]  # 不能用and(&)、or(|)，每组条件都加上小括号

array([[-0.05397453,  0.72897994,  1.04621625,  1.54582988],
       [-0.93792468, -0.33219255,  0.47343501,  0.52544135]])

# 4. 1. 6 神奇索引

In [85]:
import numpy as np

arr = np.empty((8, 4))

for i in range(8):
    arr[i] = i
arr

array([[0., 0., 0., 0.],
       [1., 1., 1., 1.],
       [2., 2., 2., 2.],
       [3., 3., 3., 3.],
       [4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [6., 6., 6., 6.],
       [7., 7., 7., 7.]])

In [86]:
arr[[4, 3, 0, 6]]

array([[4., 4., 4., 4.],
       [3., 3., 3., 3.],
       [0., 0., 0., 0.],
       [6., 6., 6., 6.]])

In [87]:
arr[[-4, -3, 0, -6]]

array([[4., 4., 4., 4.],
       [5., 5., 5., 5.],
       [0., 0., 0., 0.],
       [2., 2., 2., 2.]])

In [88]:
# li = [[[j + k * 4 for _ in range(0, 4)] for j in range(0, 4)] for k in range(0, 4)]
li= [[[j + k * 4] * 4  for j in range(0, 4)] for k in range(0, 4)]

import numpy as np
arr3d = np.array(li)
arr3d

array([[[ 0,  0,  0,  0],
        [ 1,  1,  1,  1],
        [ 2,  2,  2,  2],
        [ 3,  3,  3,  3]],

       [[ 4,  4,  4,  4],
        [ 5,  5,  5,  5],
        [ 6,  6,  6,  6],
        [ 7,  7,  7,  7]],

       [[ 8,  8,  8,  8],
        [ 9,  9,  9,  9],
        [10, 10, 10, 10],
        [11, 11, 11, 11]],

       [[12, 12, 12, 12],
        [13, 13, 13, 13],
        [14, 14, 14, 14],
        [15, 15, 15, 15]]])

In [89]:
arr3d[[1, 2, 1]]

array([[[ 4,  4,  4,  4],
        [ 5,  5,  5,  5],
        [ 6,  6,  6,  6],
        [ 7,  7,  7,  7]],

       [[ 8,  8,  8,  8],
        [ 9,  9,  9,  9],
        [10, 10, 10, 10],
        [11, 11, 11, 11]],

       [[ 4,  4,  4,  4],
        [ 5,  5,  5,  5],
        [ 6,  6,  6,  6],
        [ 7,  7,  7,  7]]])

In [97]:
arr3d[1:][[1, 2, 1]]
# arr3d[1:2][[1, 2, 1]]  # IndexError: index 1 is out of bounds for axis 0 with size 1

array([[[ 8,  8,  8,  8],
        [ 9,  9,  9,  9],
        [10, 10, 10, 10],
        [11, 11, 11, 11]],

       [[12, 12, 12, 12],
        [13, 13, 13, 13],
        [14, 14, 14, 14],
        [15, 15, 15, 15]],

       [[ 8,  8,  8,  8],
        [ 9,  9,  9,  9],
        [10, 10, 10, 10],
        [11, 11, 11, 11]]])

In [92]:
arr3d[1][[1, 2, 1]]

array([[5, 5, 5, 5],
       [6, 6, 6, 6],
       [5, 5, 5, 5]])

In [103]:
import numpy as np

arr = np.arange(32).reshape((8, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [105]:
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

In [108]:
arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]  # 切片后排序
# arr[[1, 5, 7, 2]][[0, 3, 1, 2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

# 4.1.7 数组转置、换轴

In [116]:
import numpy as np

arr = np.arange(12).reshape(3, 4)
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [117]:
arr.T  # 转置， == transpose()

array([[ 0,  4,  8],
       [ 1,  5,  9],
       [ 2,  6, 10],
       [ 3,  7, 11]])

In [119]:
np.dot(arr, arr.T)  # 计算矩阵内积

array([[ 14,  38,  62],
       [ 38, 126, 214],
       [ 62, 214, 366]])

In [120]:
arr = np.arange(16).reshape((2, 2, 4))
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [123]:
arr.transpose()  # == arr.transpose((2, 1, 0))，更换三个维度的轴，层和列变换
                 # (2, 2, 4) => (4, 2, 2)

array([[[ 0,  8],
        [ 4, 12]],

       [[ 1,  9],
        [ 5, 13]],

       [[ 2, 10],
        [ 6, 14]],

       [[ 3, 11],
        [ 7, 15]]])

In [124]:
arr.transpose((1, 0, 2))  # (2, 2, 4) => (2, 2, 4)，层和行变换

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

In [127]:
arr.swapaxes(1, 0)  # 把axe1和axe0,互换，仅限两个参数

array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])

In [None]:
# arr.T、arr.transpose((1, 0, 2))、arr.swapaxes(1, 0)返回都是数组的视图，没有度数组进行复制，修改会反馈到数组本身

# 单斜杠结尾

In [None]:
str1 = r'C:\\Users\\Administrator\\Anaconda3\\lib\\site-packages\\numpy\\__init__.py'
str2 = str1.replace(r"\\", "\\")
# str2 = str1.replace(r"\\", r"\")
# 单斜杠结尾是非法的，在os.path.join模块中，尤其容易犯这种错误。如果是Python3的话，更建议使用pathlib这个库代替os.path.
print(str1)
print(str2)

# A.1 ndarrary对象内幕

In [134]:
import numpy as np

arr = np.ones((3, 4, 5))
arr.strides  # 跨度
# float 占 8个字节。
# 第一维度，从元素 1 到元素 21，间隔 1*5*4 个元素，总字节数为 80；
# 第二维度，从元素 1 到元素 6，间隔 1*5 个元素，总字节数为 20；
# 第三维度，从元素 1 到元素 2，间隔 1 个元素，总字节数为 4。

(160, 40, 8)

In [145]:
arr = np.arange(60).reshape((3, 4, -1))  # -1 表示自行计算 60/3/4 == 5
arr, arr.strides  # 跨度
# int 占 4个字节。

(array([[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9],
         [10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]],
 
        [[20, 21, 22, 23, 24],
         [25, 26, 27, 28, 29],
         [30, 31, 32, 33, 34],
         [35, 36, 37, 38, 39]],
 
        [[40, 41, 42, 43, 44],
         [45, 46, 47, 48, 49],
         [50, 51, 52, 53, 54],
         [55, 56, 57, 58, 59]]]), (80, 20, 4))

In [142]:
# arr[:, :, [4, 1, 2, 3]]
arr[:, :, ::-1]  # 层、行、列，整体倒序

array([[[ 4,  3,  2,  1,  0],
        [ 9,  8,  7,  6,  5],
        [14, 13, 12, 11, 10],
        [19, 18, 17, 16, 15]],

       [[24, 23, 22, 21, 20],
        [29, 28, 27, 26, 25],
        [34, 33, 32, 31, 30],
        [39, 38, 37, 36, 35]],

       [[44, 43, 42, 41, 40],
        [49, 48, 47, 46, 45],
        [54, 53, 52, 51, 50],
        [59, 58, 57, 56, 55]]])

In [143]:
arr[:, ::-1]  # 层、行各个层倒序

array([[[15, 16, 17, 18, 19],
        [10, 11, 12, 13, 14],
        [ 5,  6,  7,  8,  9],
        [ 0,  1,  2,  3,  4]],

       [[35, 36, 37, 38, 39],
        [30, 31, 32, 33, 34],
        [25, 26, 27, 28, 29],
        [20, 21, 22, 23, 24]],

       [[55, 56, 57, 58, 59],
        [50, 51, 52, 53, 54],
        [45, 46, 47, 48, 49],
        [40, 41, 42, 43, 44]]])

In [144]:
arr[::-1]  # 层倒序

array([[[40, 41, 42, 43, 44],
        [45, 46, 47, 48, 49],
        [50, 51, 52, 53, 54],
        [55, 56, 57, 58, 59]],

       [[20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29],
        [30, 31, 32, 33, 34],
        [35, 36, 37, 38, 39]],

       [[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9],
        [10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]]])

# A.2.1 重塑数组

In [147]:
import numpy as np

arr = np.arange(15).reshape(3, -1)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [148]:
# 扁平化、分散化
arr.ravel()  # 返回数据的视图
arr.flatten()  # 返回数据的副本

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

# A.2.4 tile和repeat

In [1]:
import numpy as np

arr = np.arange(3)
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [2]:
arr.repeat([2, 3, 4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [5]:
arr = np.random.randn(2, 2)
arr

array([[0.22769693, 2.14671262],
       [2.42941769, 0.06936072]])

In [8]:
arr.repeat([2, 3], axis=0)  # axis0轴上的第一个元素重复2遍，第二个元素重复3遍

array([[0.22769693, 2.14671262],
       [0.22769693, 2.14671262],
       [2.42941769, 0.06936072],
       [2.42941769, 0.06936072],
       [2.42941769, 0.06936072]])

In [10]:
arr = np.random.randn(2, 2)
arr

array([[-0.97195523,  1.13747127],
       [ 0.65205781, -0.01146126]])

In [11]:
np.tile(arr, [2, 3])  # 铺瓷砖，每个元素重复的次数相同，在axis0轴上重复2遍，在axis1轴上重复3遍

array([[-0.97195523,  1.13747127, -0.97195523,  1.13747127, -0.97195523,
         1.13747127],
       [ 0.65205781, -0.01146126,  0.65205781, -0.01146126,  0.65205781,
        -0.01146126],
       [-0.97195523,  1.13747127, -0.97195523,  1.13747127, -0.97195523,
         1.13747127],
       [ 0.65205781, -0.01146126,  0.65205781, -0.01146126,  0.65205781,
        -0.01146126]])

# take、put

In [12]:
import numpy as np

In [13]:
arr = np.arange(10) * 100
inds = [7, 1, 2, 6]

arr.take(inds)  # == arr[inds]（神奇索引）

array([700, 100, 200, 600])

In [15]:
arr.put(inds, [40, 41, 42, 43])  # arr.put(inds, 39)
arr

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [16]:
arr = np.random.randn(2, 4)
arr

array([[ 0.72578708,  0.44506336, -0.24456831, -0.26862911],
       [-1.50038793, -0.50346738, -0.14528943,  1.19407286]])

In [17]:
inds = [2, 0, 2, 1]
arr.take(inds, axis=1)  # 注意inds的索引值不要超出范围

array([[-0.24456831,  0.72578708, -0.24456831,  0.44506336],
       [-0.14528943, -1.50038793, -0.14528943, -0.50346738]])