**NumPy处理大量数组的效率更高**

In [1]:
import numpy as np

my_arr = np.arange(1000000)
my_list = list(range(1000000))

In [8]:
%time for _ in range(10): my_arr2 = my_arr * 2

CPU times: user 26.3 ms, sys: 16.4 ms, total: 42.7 ms
Wall time: 41.9 ms


In [9]:
%time for _ in range(10): my_list2 = [x * 2 for x in my_list]

CPU times: user 736 ms, sys: 201 ms, total: 937 ms
Wall time: 963 ms


# 多维数组对象 ndarray

In [19]:
import numpy as np
data = np.random.randn(2, 3)
data

array([[ 0.54000743,  1.1118009 ,  0.85228074],
       [-1.37988643,  2.14382986,  1.22862503]])

In [20]:
data.shape

(2, 3)

In [21]:
data.dtype

dtype('float64')

## **arange是Python内建函数range的数组版**

In [24]:
range(15)

range(0, 15)

In [26]:
list(range(15))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]

In [25]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

## **数组的切片是原数组的视图，所以“切片”并不是复制出来的，任何对视图的修改都会反映到原数组上**

In [2]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [5]:
arr_slice = arr[4:7]
arr_slice

array([4, 5, 6])

In [8]:
arr_slice[1] = 123

In [14]:
arr_slice

array([  4, 123,   6])

In [9]:
arr

array([  0,   1,   2,   3,   4, 123,   6,   7,   8,   9])

**如果想要一份数组切片的拷贝而不是一份视图的话，就必须显式地复制这个数组**

In [10]:
arr_copy = arr[4:7].copy()
arr_copy

array([  4, 123,   6])

In [11]:
arr_copy[1] = 987

In [13]:
arr_copy

array([  4, 987,   6])

In [12]:
arr

array([  0,   1,   2,   3,   4, 123,   6,   7,   8,   9])

## 数组的切片和索引

In [1]:
arr2d = np.arange(1, 10).reshape((3, 3))

In [2]:
arr2d

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

**如果索引和切片混用，就可以得到低维度的切片**

In [3]:
# 索引和切片混用
arr2d[:2,2]

array([3, 6])

In [4]:
arr2d[:2,2].shape

(2,)

In [5]:
# 只切片
arr2d[:2,2:3]

array([[3],
       [6]])

In [6]:
arr2d[:2,2:3].shape

(2, 1)

## 神奇索引
- 神奇索引的结果总是一维的
- 神奇索引和切片不同，它总是将数据复制到一个新的数组中


In [7]:
arr = np.arange(32).reshape((8, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

In [15]:
# 神奇索引
arr[[1, 5, 7, 2], [0, 3, 1, 2]]

array([ 4, 23, 29, 10])

In [12]:
arr[[1, 5, 7, 2]]

array([[ 4,  5,  6,  7],
       [20, 21, 22, 23],
       [28, 29, 30, 31],
       [ 8,  9, 10, 11]])

In [14]:
# 这个不是神奇索引，这是选择矩阵的行列子集
arr[[1, 5, 7, 2]][:, [0, 3, 1, 2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

## 数组的转置和换轴
- T属性，transpose()和swapaxes()返回的都是原数据的视图，而没有对数据进行复制。修改视图会修改原数据

### T属性

In [17]:
arr = np.arange(15).reshape((3,5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [18]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [27]:
arr.T[0, 1] = 50

In [28]:
arr

array([[ 0,  1,  2,  3,  4],
       [50,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

### transpose()方法

In [31]:
arr = np.arange(15).reshape((3,5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [32]:
arr.transpose(1,0)

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [33]:
arr.transpose(1, 0)[0, 2] = 100

In [34]:
arr

array([[  0,   1,   2,   3,   4],
       [  5,   6,   7,   8,   9],
       [100,  11,  12,  13,  14]])

### swapaxes()方法

In [35]:
arr = np.arange(15).reshape((3, 5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [36]:
arr.swapaxes(0,1)

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [37]:
arr.swapaxes(0, 1)[1, 1] = 60

In [38]:
arr

array([[ 0,  1,  2,  3,  4],
       [ 5, 60,  7,  8,  9],
       [10, 11, 12, 13, 14]])

# 通用函数

In [47]:
arr = np.random.randn(10)
arr

array([ 2.35352296, -0.28923918,  0.34484462, -2.31361388, -0.5587532 ,
       -1.35216686,  0.19194967, -0.15974791,  0.72535182, -0.62118493])

In [53]:
np.sqrt(arr)

array([1.1129222 ,        nan, 0.87539272,        nan,        nan,
              nan, 0.81357655,        nan, 0.96065752,        nan])

In [54]:
# arr没有改变
arr

array([1.23859582,        nan, 0.76631242,        nan,        nan,
              nan, 0.6619068 ,        nan, 0.92286288,        nan])

In [52]:
# 通用函数接收一个可选参数out, A location into which the result is stored.
# 把通用函数返回的结果赋值给arr
np.sqrt(arr, arr)

array([1.23859582,        nan, 0.76631242,        nan,        nan,
              nan, 0.6619068 ,        nan, 0.92286288,        nan])

In [51]:
arr

array([1.5341196 ,        nan, 0.58723472,        nan,        nan,
              nan, 0.43812061,        nan, 0.85167589,        nan])

# 使用数组进行面向数组编程

In [62]:
points_1 = np.arange(-10, -5, 1)
points_1

array([-10,  -9,  -8,  -7,  -6])

In [64]:
points_2 = np.arange(0, 4, 1)
points_2

array([0, 1, 2, 3])

## meshgrid(indexing='xy') 笛卡尔indexing

In [81]:
# 把两个数组的笛卡尔积内的元素的第一二个坐标分别放入两个矩阵中
xs, ys = np.meshgrid(points_1, points_2)
xs  # shape (4,5)

array([[-10,  -9,  -8,  -7,  -6],
       [-10,  -9,  -8,  -7,  -6],
       [-10,  -9,  -8,  -7,  -6],
       [-10,  -9,  -8,  -7,  -6]])

In [80]:
ys  # shape (4,5)

array([[0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2],
       [3, 3, 3, 3, 3]])

## meshgrid(indexing='ij') matrix indexing

In [83]:
xs_2, ys_2 = np.meshgrid(points_1, points_2, indexing='ij')

In [84]:
xs_2  # shape (5,4)

array([[-10, -10, -10, -10],
       [ -9,  -9,  -9,  -9],
       [ -8,  -8,  -8,  -8],
       [ -7,  -7,  -7,  -7],
       [ -6,  -6,  -6,  -6]])

In [85]:
ys_2  # shape (5,4)

array([[0, 1, 2, 3],
       [0, 1, 2, 3],
       [0, 1, 2, 3],
       [0, 1, 2, 3],
       [0, 1, 2, 3]])

## 将条件逻辑作为数组操作 
**np.where(condtion,x,y)**
- 会生成新的数组
- where的第二个和第三个参数x,y并不需要是数组，它们可以是标量

In [87]:
arr = np.random.randn(4, 4)
arr

array([[ 0.03778451, -0.50445874,  0.23271541, -0.32424875],
       [-0.69529623,  1.1013967 , -0.65204392, -0.90047203],
       [ 2.24631944, -0.23436736,  1.13555187, -0.8394797 ],
       [-1.24935032,  0.16731078,  0.88514611, -0.90534109]])

In [89]:
# 第二个参数是标量
np.where(arr > 0, 2, arr)

array([[ 2.        , -0.50445874,  2.        , -0.32424875],
       [-0.69529623,  2.        , -0.65204392, -0.90047203],
       [ 2.        , -0.23436736,  2.        , -0.8394797 ],
       [-1.24935032,  2.        ,  2.        , -0.90534109]])

## 数学和统计方法

In [91]:
arr = np.arange(12).reshape((3, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [95]:
arr.sum(axis=0)

array([12, 15, 18, 21])

In [96]:
arr.mean(axis=1)

array([1.5, 5.5, 9.5])

In [97]:
arr.max(axis=1)

array([ 3,  7, 11])

## 布尔值数组的方法

布尔值会被强制为1(True)和0(False)。因此，sum()通常可以用于计算布尔值数组中的True个数

In [98]:
arr = np.random.randn(100)
arr

array([ 0.47763325,  0.02338774,  1.56760857,  0.05163515,  1.21014224,
        0.42732683,  0.04366304, -1.73803583,  0.41301762,  0.64958057,
        0.24919848, -0.83308999, -0.01418481, -0.65826761, -2.64534985,
        1.54961657, -0.07186953,  0.11791718, -0.12158699,  0.21566191,
        0.9364842 , -0.58619117, -1.83899546,  1.99466492,  1.36570137,
        0.44108963,  0.53336202, -0.52715695, -1.27065108, -0.87321314,
        0.04821331,  0.05579315, -0.2893762 ,  0.8671984 ,  0.09556753,
       -0.40143751, -0.21838279, -0.44065695, -1.13478097, -0.29597314,
       -0.1120933 , -0.54077478,  0.73651855,  0.44184604,  1.42973472,
        1.8473794 ,  0.66133546, -0.33179439,  0.11037665,  0.11415863,
        0.19950886, -0.65962284,  0.87015938,  0.46404488,  0.08609615,
        0.56351674,  1.18071134,  0.04281816,  2.69881434, -1.47590998,
        0.73324778,  0.90224076, -1.06204575,  0.20553571, -0.36430705,
        0.68161316,  0.6270793 ,  0.24035295, -1.18635347, -0.31

In [100]:
(arr > 0).sum() # 正值的个数

53