In [1]:
import numpy as np

## numpy的数组类型为 ndarray
>ndarray.ndim：数组的维数，也称为rank

>ndarray.shape：数组各维的大小tuple 类型，对一个n 行m 列的矩阵来说， shape 为 (n,m)

>ndarray.size：元素的总数。 

>ndarray.dtype：每个元素的类型，可以是 numpy.int32, numpy.int16, and numpy.float64 等

>ndarray.itemsize：每个元素占用的字节数

>ndarray.data：指向数据内存 

数组：不需使用循环即可对数据进行批量运算（矢量化）

## 广播
不同形状的数组之间的算术运算的执行方式

如果两个数组的后缘维度（从末尾开始算的维度）的轴长度相等或其中一方的长度为1，则认为是广播兼容的；广播会在缺失或长度为1的维度上进行
> 让所有输入数组都向其中维数最多的数组看齐，shape属性中不足的部分在前面加1补齐

> 出数组的shape属性是输入数组shape属性在各个轴上的最大值

> 入数组的某个轴长度为1或与输出数组对应轴长度相同，方能使用；否则出错

> 输入数组的某个轴长度为1时，沿着此轴运算时都用此轴的第一组值

In [2]:
arr1 = np.arange(3)
arr2 = np.arange(3).reshape(-1,1)

In [3]:
arr1 + arr2

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [4]:
arr = np.random.randn(4,3)
arr

array([[ 6.08106902e-01, -1.05163460e+00, -2.57124795e-01],
       [ 1.56868650e-03,  6.49659164e-01, -1.35735575e-01],
       [ 1.96223858e+00, -6.31500026e-01,  1.20095734e+00],
       [-3.10905561e-01,  8.70237398e-01,  9.04969105e-01]])

In [5]:
arr - arr.mean(0)

array([[ 0.04285475, -1.01082509, -0.68539131],
       [-0.56368347,  0.69046868, -0.56400209],
       [ 1.39698643, -0.59069051,  0.77269082],
       [-0.87615771,  0.91104692,  0.47670259]])

## 数组重塑
> reshape(a, newshape)
> #作为参数形状的其中一维可以是-1，表示该纬度的大小由数据本身推断而来 

> ravel([order])：多维数据转化为一维数组

In [10]:
np.arange(15).reshape(5,-1)

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [15]:
x = np.arange(12).reshape(3,4)
x

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [13]:
x.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [14]:
#指定行和列的优先顺序（C和Fortran顺序）
x.ravel("F")

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

## 数组的合并
> concatenate((a1, a2, ...), axis=0)

> hstack() 和 vstack()

> r_和c_：辅助堆叠类(堆叠到row和column)

In [16]:
arr1 = np.array([[1,2,3],[4,5,6]])
arr2 = np.array([[7,8,9],[10,11,12]])
np.concatenate([arr1,arr2],axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [17]:
np.concatenate([arr1,arr2],axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [18]:
np.vstack([arr1,arr2])

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [19]:
np.r_[arr1,arr2]

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

## 数组的拆分 
> split(ary, indices_or_sections, axis=0)
> #indices_or_sections :数字或者1维数组（数字：等分；一维数组：指定分割处）

> 便捷化函数：hsplit, vsplit, dsplit (分别沿轴0，轴1，轴2)

In [23]:
arr = np.arange(9)
np.split(arr,3)

[array([0, 1, 2]), array([3, 4, 5]), array([6, 7, 8])]

In [24]:
np.split(arr,[2,4,6])

[array([0, 1]), array([2, 3]), array([4, 5]), array([6, 7, 8])]

## 元素的重复操作 
> repeat(a, repeats, axis=None)：axis默认扁平化

> tile(A,reps) ：沿指定轴向堆叠数组副本，重复填充A reps次

repeat是以元素为单位，tile是以轴为单位，填充的结果不同

In [26]:
arr = np.random.randn(2,2)
arr

array([[ 0.5065706 , -0.99079773],
       [-0.86968458, -0.04902275]])

In [27]:
arr.repeat(2)

array([ 0.5065706 ,  0.5065706 , -0.99079773, -0.99079773, -0.86968458,
       -0.86968458, -0.04902275, -0.04902275])

In [29]:
arr.repeat(2,axis=1)

array([[ 0.5065706 ,  0.5065706 , -0.99079773, -0.99079773],
       [-0.86968458, -0.86968458, -0.04902275, -0.04902275]])

In [30]:
arr.repeat([1,2],axis=1)

array([[ 0.5065706 , -0.99079773, -0.99079773],
       [-0.86968458, -0.04902275, -0.04902275]])

In [32]:
arr.repeat([3,2],axis=1)

array([[ 0.5065706 ,  0.5065706 ,  0.5065706 , -0.99079773, -0.99079773],
       [-0.86968458, -0.86968458, -0.86968458, -0.04902275, -0.04902275]])

In [33]:
np.tile(arr,2)

array([[ 0.5065706 , -0.99079773,  0.5065706 , -0.99079773],
       [-0.86968458, -0.04902275, -0.86968458, -0.04902275]])

In [34]:
np.tile(arr,[3,2])

array([[ 0.5065706 , -0.99079773,  0.5065706 , -0.99079773],
       [-0.86968458, -0.04902275, -0.86968458, -0.04902275],
       [ 0.5065706 , -0.99079773,  0.5065706 , -0.99079773],
       [-0.86968458, -0.04902275, -0.86968458, -0.04902275],
       [ 0.5065706 , -0.99079773,  0.5065706 , -0.99079773],
       [-0.86968458, -0.04902275, -0.86968458, -0.04902275]])

## 花式索引
> take(a, indices, axis=None, out=None, mode='raise')

> put(a, ind, v, mode='raise')

In [35]:
arr = np.arange(10)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [36]:
inds = [7,1,2,6]
arr.take(inds)

array([7, 1, 2, 6])

In [39]:
arr.put(inds,[40,41,42,43])
arr

array([ 0, 41, 42,  3,  4,  5, 43, 40,  8,  9])

In [40]:
arr = np.random.randn(2,4)
arr

array([[ 2.51398407,  0.75538833,  0.43603253, -0.12974856],
       [ 1.55713167,  0.37763493, -0.42138493, -2.23665321]])

In [41]:
inds = [2,0,2,1]
arr.take(inds,axis=1)

array([[ 0.43603253,  2.51398407,  0.43603253,  0.75538833],
       [-0.42138493,  1.55713167, -0.42138493,  0.37763493]])

## 排序
> sort(a, axis=-1, kind='quicksort', order=None) 

> argsort(a, axis=-1, kind='quicksort', order=None)

> lexsort(keys, axis=-1)：可以一次性对多个键数组进行间接排序，优先级为从后往前

In [47]:
values = np.array([5,0,1,3,2])
indexer = values.argsort()
indexer

array([1, 2, 4, 3, 0], dtype=int64)

In [48]:
arr = np.random.randn(3,5)
arr

array([[ 1.76118708, -1.0425258 ,  0.37776571,  0.25553718,  0.25966405],
       [ 0.51337156, -1.04630387, -1.50008894,  0.78717084, -0.4164357 ],
       [ 0.08228841,  0.38559032,  0.13490722, -1.37889964, -1.01403675]])

In [49]:
arr[:,arr[0].argsort()]

array([[-1.0425258 ,  0.25553718,  0.25966405,  0.37776571,  1.76118708],
       [-1.04630387,  0.78717084, -0.4164357 , -1.50008894,  0.51337156],
       [ 0.38559032, -1.37889964, -1.01403675,  0.13490722,  0.08228841]])