# 基本操作

In [1]:
import numpy as np


创建一个`numpy`数组

In [3]:
a = np.array([1, 2, 3])
print(a)


[1 2 3]


创建多维数组

In [5]:
a = np.array([[1, 2, 3],
             [4, 5, 6],
             [7, 8, 9]])
print(a)
print(a.ndim)


[[1 2 3]
 [4 5 6]
 [7 8 9]]
2


新增数据

In [7]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
print(np.concatenate([a, b]))


[1 2 3 4 5 6]


新增数据（多维）

In [8]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

# 首先将a和b转换为二维数组
a = np.expand_dims(a, axis=0)
b = b[np.newaxis, :]

print('after expand dims: ', a)
print('after expand dims: ', b)

# 然后沿着行方向进行拼接
c = np.concatenate([a, b], axis=0)
print(c)


after expand dims:  [[1 2 3]]
after expand dims:  [[4 5 6]]
[[1 2 3]
 [4 5 6]]


在不同维度上新增数据

In [10]:
print('concatenate along axis 0: ', np.concatenate([c, c], axis=0))
print('concatenate along axis 1: ', np.concatenate([c, c], axis=1))


concatenate along axis 0:  [[1 2 3]
 [4 5 6]
 [1 2 3]
 [4 5 6]]
concatenate along axis 1:  [[1 2 3 1 2 3]
 [4 5 6 4 5 6]]


横向合并和纵向合并

In [11]:
a = np.array([
    [1, 2],
    [3, 4]
])
b = np.array([
    [5, 6],
    [7, 8]
])
print('horizontal stack: ', np.hstack([a, b]))
print('vertical stack: ', np.vstack([a, b]))


horizontal stack:  [[1 2 5 6]
 [3 4 7 8]]
vertical stack:  [[1 2]
 [3 4]
 [5 6]
 [7 8]]


显示数组的尺寸及形状

In [14]:
print('a: ', a)
print('size of a: ', a.size)
print('shape of a: ', a.shape)


a:  [[1 2]
 [3 4]]
size of a:  4
shape of a:  (2, 2)


访问数据

In [18]:
a = np.array([1, 2, 3])
print('a[0]: ', a[0])
print('a[1]: ', a[1])

print('a[[0, 1]]: ', a[[0, 1]])
print('a[[0, 1, 0]]: ', a[[0, 1, 0]])


a[0]:  1
a[1]:  2
a[[0, 1]]:  [1 2]
a[[0, 1, 0]]:  [1 2 1]


In [19]:
b = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])
print('b[0, 0]: ', b[0, 0])
print('b[0, 1]: ', b[0, 1])


b[0, 0]:  1
b[0, 1]:  2


使用切片

In [20]:
a = np.array([1, 2, 3])
print('a[0:2]: ', a[0:2])
print('a[:2]: ', a[:2])
print('a[-2:]: ', a[-2:])


a[0:2]:  [1 2]
a[:2]:  [1 2]
a[-2:]:  [2 3]


In [21]:
b = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])
print('b[0:2, 0:2]: ', b[0:2, 0:2])
print('b[:2, :2]: ', b[:2, :2])


b[0:2, 0:2]:  [[1 2]
 [4 5]]
b[:2, :2]:  [[1 2]
 [4 5]]


条件筛选

In [22]:
a = np.array([1, 2, 3, 4, 5, 6])
print('a>3: ', a[a > 3])


a>3:  [4 5 6]


筛选并替换

In [24]:
np.where(a > 3, -1, a)

a = np.array([1, 2, 3, 4, 5, 6])
b = np.array([7, 8, 9, 10, 11, 12])
np.where(a > 3, a, b)


array([7, 8, 9, 4, 5, 6])

基础计算

In [27]:
a = np.array([1, 2, 3, 4, 5, 6])
for i, _ in enumerate(a):
    a[i] += 3
print(a)


[4 5 6 7 8 9]


或者

In [28]:
print(list(map(lambda x: x + 3, a)))


[7, 8, 9, 10, 11, 12]


numpy 写法

In [29]:
print(a + 3)


[ 7  8  9 10 11 12]


点乘

In [30]:
a = np.array([1, 2, 3, 4, 5, 6])
a = a.reshape(2, 3)
b = np.array([7, 8, 9, 10, 11, 12])
b = b.reshape(3, 2)

print(a.dot(b))
print(np.dot(a, b))


[[ 58  64]
 [139 154]]
[[ 58  64]
 [139 154]]


# 数据分析

最大最小值

In [32]:
a = np.array([1, 2, 3, 4, 5, 6])

print('max: ', np.max(a))
print('max: ', a.max())

print('min: ', np.min(a))
print('min: ', a.min())


max:  6
max:  6
min:  1
min:  1


求和

In [33]:
print(a.sum())


21


累乘

In [34]:
print(a.prod())


720


非零数

In [35]:
print(np.count_nonzero(a))
print(a.size)


6
6


平均数、中位数

In [37]:
a = np.array([1, 2, 3, 4, 5, 100])
print('mean: ', np.mean(a))
print('median: ', np.median(a))


mean:  19.166666666666668
median:  3.5


标准差

In [38]:
print('std: ', np.std(a))


std:  36.17281053805775


取得最大值、最小值的索引

In [39]:
a = np.array([1, 2, 3, 4, 5, 1, 2, 5])
print('argmin: ', np.argmin(a))
print('argmax: ', np.argmax(a))


argmin:  0
argmax:  4


`floor`和`ceil`

In [41]:
a = np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])
print('floor: ', np.floor(a))
print('ceil: ', np.ceil(a))


floor:  [1. 2. 3. 4. 5. 6.]
ceil:  [2. 3. 4. 5. 6. 7.]


裁减

In [42]:
a = np.array([1, 2, 3, 4, 5, 6])
print('clip: ', np.clip(a, 2, 4))


clip:  [2 2 3 4 4 4]


# 数据转换

增加维度

In [44]:
a = np.array([1, 2, 3, 4, 5, 6])
a_2d = a[np.newaxis, :]
print('a shape: ', a.shape)
print('a_2d: ', a_2d)
print('a_2d shape: ', a_2d.shape)


a shape:  (6,)
a_2d:  [[1 2 3 4 5 6]]
a_2d shape:  (1, 6)


In [45]:
a = np.array([1, 2, 3, 4, 5, 6])
a_none = a[:, None]
a_expand = np.expand_dims(a, axis=1)

print('a none shape: ', a_none.shape)
print('a_none: ', a_none)

print('a expand shape: ', a_expand.shape)
print('a expand: ', a_expand)


a none shape:  (6, 1)
a_none:  [[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
a expand shape:  (6, 1)
a expand:  [[1]
 [2]
 [3]
 [4]
 [5]
 [6]]


减少维度

In [46]:
a = np.array([1, 2, 3, 4, 5, 6])
a_expand = np.expand_dims(a, axis=1)
print(a_expand)

a_squeeze = np.squeeze(a_expand)
a_squeeze_axis = np.squeeze(a_expand, axis=1)
print(a_squeeze)
print(a_squeeze_axis)


[[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
[1 2 3 4 5 6]
[1 2 3 4 5 6]


改变维度

In [47]:
a = np.array([1, 2, 3, 4, 5, 6])
a1 = a.reshape([2, 3])
a2 = a.reshape([3, 1, 2])

print('a1 shape: ', a1.shape)
print('a1: ', a1)

print('a2 shape: ', a2.shape)
print('a2: ', a2)


a1 shape:  (2, 3)
a1:  [[1 2 3]
 [4 5 6]]
a2 shape:  (3, 1, 2)
a2:  [[[1 2]]

 [[3 4]]

 [[5 6]]]


平展

In [48]:
print('ravel a1: ', a1.ravel())
print('flatten a2: ', a2.flatten())


ravel a1:  [1 2 3 4 5 6]
flatten a2:  [1 2 3 4 5 6]


矩阵转置

In [49]:
a = np.array([1, 2, 3, 4, 5, 6])
a = a.reshape([2, 3])
aT1 = a.T
aT2 = a.transpose()

print('aT1: ', aT1)
print('aT2: ', aT2)


aT1:  [[1 4]
 [2 5]
 [3 6]]
aT2:  [[1 4]
 [2 5]
 [3 6]]


合并

In [51]:
a = np.array([1, 2, 3, 4, 5, 6])
b = np.array([7, 8, 9, 10, 11, 12])
print(np.column_stack((a, b)))


[1 2 3 4 5 6]
[[ 1  7]
 [ 2  8]
 [ 3  9]
 [ 4 10]
 [ 5 11]
 [ 6 12]]


In [52]:
a = np.array([1, 2, 3, 4, 5, 6])
b = np.array([7, 8, 9, 10, 11, 12])
print(np.row_stack((a, b)))


[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]]


`vstack`和`hstack`需要先保证维度正确，才可以进行合并

In [54]:
a = np.array([1, 2, 3, 4, 5, 6])[:, None]
b = np.array([7, 8, 9, 10, 11, 12])[:, None]
print(np.hstack((a, b)))

a = np.array([1, 2, 3, 4, 5, 6])[None, :]
b = np.array([7, 8, 9, 10, 11, 12])[None, :]
print(np.vstack((a, b)))


[[ 1  7]
 [ 2  8]
 [ 3  9]
 [ 4 10]
 [ 5 11]
 [ 6 12]]
[[1 2 3 4 5 6]]
[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]]


In [55]:
a = np.array([
    [1, 2],
    [3, 4]
])
b = np.array([
    [5, 6],
    [7, 8]
])

print(np.concatenate([a, b], axis=0))
print(np.concatenate([a, b], axis=1))


[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[1 2 5 6]
 [3 4 7 8]]


拆解

In [56]:
a = np.array([[1, 2, 3, 4, 5, 6],
              [7, 8, 9, 10, 11, 12],
              [13, 14, 15, 16, 17, 18],
              [19, 20, 21, 22, 23, 24]])
print('vsplit a: ', np.vsplit(a, 2))
print('vsplit a: ', np.vsplit(a, [2, 3]))


vsplit a:  [array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12]]), array([[13, 14, 15, 16, 17, 18],
       [19, 20, 21, 22, 23, 24]])]
vsplit a:  [array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12]]), array([[13, 14, 15, 16, 17, 18]]), array([[19, 20, 21, 22, 23, 24]])]


In [57]:
a = np.array([[1, 2, 3, 4, 5, 6],
              [7, 8, 9, 10, 11, 12],
              [13, 14, 15, 16, 17, 18],
              [19, 20, 21, 22, 23, 24]])
print('hsplit a: ', np.hsplit(a, 2))
print('hsplit a: ', np.hsplit(a, [2, 3]))


hsplit a:  [array([[ 1,  2,  3],
       [ 7,  8,  9],
       [13, 14, 15],
       [19, 20, 21]]), array([[ 4,  5,  6],
       [10, 11, 12],
       [16, 17, 18],
       [22, 23, 24]])]
hsplit a:  [array([[ 1,  2],
       [ 7,  8],
       [13, 14],
       [19, 20]]), array([[ 3],
       [ 9],
       [15],
       [21]]), array([[ 4,  5,  6],
       [10, 11, 12],
       [16, 17, 18],
       [22, 23, 24]])]


In [58]:
a = np.array([[1, 2, 3, 4, 5, 6],
              [7, 8, 9, 10, 11, 12],
              [13, 14, 15, 16, 17, 18],
              [19, 20, 21, 22, 23, 24]])
print('split a: ', np.split(a, 2, axis=0))
print('split a: ', np.split(a, [2, 3], axis=1))


split a:  [array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12]]), array([[13, 14, 15, 16, 17, 18],
       [19, 20, 21, 22, 23, 24]])]
split a:  [array([[ 1,  2],
       [ 7,  8],
       [13, 14],
       [19, 20]]), array([[ 3],
       [ 9],
       [15],
       [21]]), array([[ 4,  5,  6],
       [10, 11, 12],
       [16, 17, 18],
       [22, 23, 24]])]


# 数据处理

使用`unique`函数，可以去除重复的元素

In [59]:
a = np.array([1, 2, 3, 3, 4, 2, 1])
print(a)
print('unique: ', np.unique(a))


[1 2 3 3 4 2 1]
unique:  [1 2 3 4]


使用`isnan`函数，可以判断是否为`NaN`

使用`argwhere`函数，可以找到满足条件的元素的索引

# 数据读取

从文件中加载数据

In [62]:
data = np.loadtxt('data.csv', delimiter=',', skiprows=1, dtype=np.int64)
print(data)


[[20131    10    67]
 [20132    11    88]
 [20133    12    98]
 [20134     8   100]
 [20135     9    75]
 [20136    12    78]]


从字符串中加载数据

In [64]:
raw_string = '1, 2, 3, 4, 5, 6'
data = np.fromstring(raw_string, dtype=np.int64, sep=',')
print(data)


[1 2 3 4 5 6]


保存数据到文件

In [67]:
a = np.array([1, 2, 3, 4, 5, 6])
print('save txt: ', np.savetxt('data.txt', a, fmt='%d', delimiter=','))

a = np.loadtxt('data.txt', dtype=np.int64, delimiter=',')
print('load txt: ', a)

save txt:  None
load txt:  [1 2 3 4 5 6]


二进制形式保存

In [68]:
a = np.array([1, 2, 3, 4, 5, 6])
print('save: ', np.save('data.npy', a))

a = np.load('data.npy')
print('load: ', a)

save:  None
load:  [1 2 3 4 5 6]


一个文件保存多个数组

In [69]:
a = np.array([1, 2, 3, 4, 5, 6])
b = np.array([7, 8, 9, 10, 11, 12])
print('savez: ', np.savez('data.npz', a=a, b=b))

npzfile = np.load('data.npz')
print('npzfile: ', npzfile.files)
print('npzfile a: ', npzfile['a'])
print('npzfile b: ', npzfile['b'])

savez:  None
npzfile:  ['a', 'b']
npzfile a:  [1 2 3 4 5 6]
npzfile b:  [ 7  8  9 10 11 12]


保存并做数据压缩

In [70]:
a = np.array([1, 2, 3, 4, 5, 6])
b = np.array([7, 8, 9, 10, 11, 12])
print('savez_compressed: ', np.savez_compressed('data.npz', a=a, b=b))

npzfile = np.load('data.npz')
print('npzfile: ', npzfile.files)
print('npzfile a: ', npzfile['a'])
print('npzfile b: ', npzfile['b'])

savez_compressed:  None
npzfile:  ['a', 'b']
npzfile a:  [1 2 3 4 5 6]
npzfile b:  [ 7  8  9 10 11 12]


# 数据生成

生成全0、全1数组

In [73]:
z = np.zeros(10)
print(z)

z = np.zeros([2, 3])
print(z)

z = np.ones([2, 3, 4])
print(z)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]]
[[[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]]


使用某个数值填充数组

In [74]:
a = np.full([2, 3], 5)
print(a)

[[5 5 5]
 [5 5 5]]


使用现有形状

In [76]:
a = np.zeros([2, 3])
b = np.zeros_like(a)

print(a)
print(b)

c = np.ones_like(a)
print(c)

c = np.full_like(a, 5)
print(c)

[[0. 0. 0.]
 [0. 0. 0.]]
[[0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]]
[[5. 5. 5.]
 [5. 5. 5.]]
