# 基本操作

In [142]:
import numpy as np


创建一个`numpy`数组

In [143]:
a = np.array([1, 2, 3])
print(a)


[1 2 3]


创建多维数组

In [144]:
a = np.array([[1, 2, 3],
             [4, 5, 6],
             [7, 8, 9]])
print(a)
print(a.ndim)


[[1 2 3]
 [4 5 6]
 [7 8 9]]
2


新增数据

In [145]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
print(np.concatenate([a, b]))


[1 2 3 4 5 6]


新增数据（多维）

In [146]:
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

# 首先将a和b转换为二维数组
a = np.expand_dims(a, axis=0)
b = b[np.newaxis, :]

print('after expand dims: ', a)
print('after expand dims: ', b)

# 然后沿着行方向进行拼接
c = np.concatenate([a, b], axis=0)
print(c)


after expand dims:  [[1 2 3]]
after expand dims:  [[4 5 6]]
[[1 2 3]
 [4 5 6]]


在不同维度上新增数据

In [147]:
print('concatenate along axis 0: ', np.concatenate([c, c], axis=0))
print('concatenate along axis 1: ', np.concatenate([c, c], axis=1))


concatenate along axis 0:  [[1 2 3]
 [4 5 6]
 [1 2 3]
 [4 5 6]]
concatenate along axis 1:  [[1 2 3 1 2 3]
 [4 5 6 4 5 6]]


横向合并和纵向合并

In [148]:
a = np.array([
    [1, 2],
    [3, 4]
])
b = np.array([
    [5, 6],
    [7, 8]
])
print('horizontal stack: ', np.hstack([a, b]))
print('vertical stack: ', np.vstack([a, b]))


horizontal stack:  [[1 2 5 6]
 [3 4 7 8]]
vertical stack:  [[1 2]
 [3 4]
 [5 6]
 [7 8]]


显示数组的尺寸及形状

In [149]:
print('a: ', a)
print('size of a: ', a.size)
print('shape of a: ', a.shape)


a:  [[1 2]
 [3 4]]
size of a:  4
shape of a:  (2, 2)


访问数据

In [150]:
a = np.array([1, 2, 3])
print('a[0]: ', a[0])
print('a[1]: ', a[1])

print('a[[0, 1]]: ', a[[0, 1]])
print('a[[0, 1, 0]]: ', a[[0, 1, 0]])


a[0]:  1
a[1]:  2
a[[0, 1]]:  [1 2]
a[[0, 1, 0]]:  [1 2 1]


In [151]:
b = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])
print('b[0, 0]: ', b[0, 0])
print('b[0, 1]: ', b[0, 1])


b[0, 0]:  1
b[0, 1]:  2


使用切片

In [152]:
a = np.array([1, 2, 3])
print('a[0:2]: ', a[0:2])
print('a[:2]: ', a[:2])
print('a[-2:]: ', a[-2:])


a[0:2]:  [1 2]
a[:2]:  [1 2]
a[-2:]:  [2 3]


In [153]:
b = np.array([[1, 2, 3],
              [4, 5, 6],
              [7, 8, 9]])
print('b[0:2, 0:2]: ', b[0:2, 0:2])
print('b[:2, :2]: ', b[:2, :2])


b[0:2, 0:2]:  [[1 2]
 [4 5]]
b[:2, :2]:  [[1 2]
 [4 5]]


条件筛选

In [154]:
a = np.array([1, 2, 3, 4, 5, 6])
print('a>3: ', a[a > 3])


a>3:  [4 5 6]


筛选并替换

In [155]:
np.where(a > 3, -1, a)

a = np.array([1, 2, 3, 4, 5, 6])
b = np.array([7, 8, 9, 10, 11, 12])
np.where(a > 3, a, b)


array([7, 8, 9, 4, 5, 6])

基础计算

In [156]:
a = np.array([1, 2, 3, 4, 5, 6])
for i, _ in enumerate(a):
    a[i] += 3
print(a)


[4 5 6 7 8 9]


或者

In [157]:
print(list(map(lambda x: x + 3, a)))


[7, 8, 9, 10, 11, 12]


numpy 写法

In [158]:
print(a + 3)


[ 7  8  9 10 11 12]


点乘

In [159]:
a = np.array([1, 2, 3, 4, 5, 6])
a = a.reshape(2, 3)
b = np.array([7, 8, 9, 10, 11, 12])
b = b.reshape(3, 2)

print(a.dot(b))
print(np.dot(a, b))


[[ 58  64]
 [139 154]]
[[ 58  64]
 [139 154]]


# 数据分析

最大最小值

In [160]:
a = np.array([1, 2, 3, 4, 5, 6])

print('max: ', np.max(a))
print('max: ', a.max())

print('min: ', np.min(a))
print('min: ', a.min())


max:  6
max:  6
min:  1
min:  1


求和

In [161]:
print(a.sum())


21


累乘

In [162]:
print(a.prod())


720


非零数

In [163]:
print(np.count_nonzero(a))
print(a.size)


6
6


平均数、中位数

In [164]:
a = np.array([1, 2, 3, 4, 5, 100])
print('mean: ', np.mean(a))
print('median: ', np.median(a))


mean:  19.166666666666668
median:  3.5


标准差

In [165]:
print('std: ', np.std(a))


std:  36.17281053805775


取得最大值、最小值的索引

In [166]:
a = np.array([1, 2, 3, 4, 5, 1, 2, 5])
print('argmin: ', np.argmin(a))
print('argmax: ', np.argmax(a))


argmin:  0
argmax:  4


`floor`和`ceil`

In [167]:
a = np.array([1.1, 2.2, 3.3, 4.4, 5.5, 6.6])
print('floor: ', np.floor(a))
print('ceil: ', np.ceil(a))


floor:  [1. 2. 3. 4. 5. 6.]
ceil:  [2. 3. 4. 5. 6. 7.]


裁减

In [168]:
a = np.array([1, 2, 3, 4, 5, 6])
print('clip: ', np.clip(a, 2, 4))


clip:  [2 2 3 4 4 4]


# 数据转换

增加维度

In [169]:
a = np.array([1, 2, 3, 4, 5, 6])
a_2d = a[np.newaxis, :]
print('a shape: ', a.shape)
print('a_2d: ', a_2d)
print('a_2d shape: ', a_2d.shape)


a shape:  (6,)
a_2d:  [[1 2 3 4 5 6]]
a_2d shape:  (1, 6)


In [170]:
a = np.array([1, 2, 3, 4, 5, 6])
a_none = a[:, None]
a_expand = np.expand_dims(a, axis=1)

print('a none shape: ', a_none.shape)
print('a_none: ', a_none)

print('a expand shape: ', a_expand.shape)
print('a expand: ', a_expand)


a none shape:  (6, 1)
a_none:  [[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
a expand shape:  (6, 1)
a expand:  [[1]
 [2]
 [3]
 [4]
 [5]
 [6]]


减少维度

In [171]:
a = np.array([1, 2, 3, 4, 5, 6])
a_expand = np.expand_dims(a, axis=1)
print(a_expand)

a_squeeze = np.squeeze(a_expand)
a_squeeze_axis = np.squeeze(a_expand, axis=1)
print(a_squeeze)
print(a_squeeze_axis)


[[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
[1 2 3 4 5 6]
[1 2 3 4 5 6]


改变维度

In [172]:
a = np.array([1, 2, 3, 4, 5, 6])
a1 = a.reshape([2, 3])
a2 = a.reshape([3, 1, 2])

print('a1 shape: ', a1.shape)
print('a1: ', a1)

print('a2 shape: ', a2.shape)
print('a2: ', a2)


a1 shape:  (2, 3)
a1:  [[1 2 3]
 [4 5 6]]
a2 shape:  (3, 1, 2)
a2:  [[[1 2]]

 [[3 4]]

 [[5 6]]]


平展

In [173]:
print('ravel a1: ', a1.ravel())
print('flatten a2: ', a2.flatten())


ravel a1:  [1 2 3 4 5 6]
flatten a2:  [1 2 3 4 5 6]


矩阵转置

In [174]:
a = np.array([1, 2, 3, 4, 5, 6])
a = a.reshape([2, 3])
aT1 = a.T
aT2 = a.transpose()

print('aT1: ', aT1)
print('aT2: ', aT2)


aT1:  [[1 4]
 [2 5]
 [3 6]]
aT2:  [[1 4]
 [2 5]
 [3 6]]


合并

In [175]:
a = np.array([1, 2, 3, 4, 5, 6])
b = np.array([7, 8, 9, 10, 11, 12])
print(np.column_stack((a, b)))


[[ 1  7]
 [ 2  8]
 [ 3  9]
 [ 4 10]
 [ 5 11]
 [ 6 12]]


In [176]:
a = np.array([1, 2, 3, 4, 5, 6])
b = np.array([7, 8, 9, 10, 11, 12])
print(np.row_stack((a, b)))


[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]]


`vstack`和`hstack`需要先保证维度正确，才可以进行合并

In [177]:
a = np.array([1, 2, 3, 4, 5, 6])[:, None]
b = np.array([7, 8, 9, 10, 11, 12])[:, None]
print(np.hstack((a, b)))

a = np.array([1, 2, 3, 4, 5, 6])[None, :]
b = np.array([7, 8, 9, 10, 11, 12])[None, :]
print(np.vstack((a, b)))


[[ 1  7]
 [ 2  8]
 [ 3  9]
 [ 4 10]
 [ 5 11]
 [ 6 12]]
[[ 1  2  3  4  5  6]
 [ 7  8  9 10 11 12]]


In [178]:
a = np.array([
    [1, 2],
    [3, 4]
])
b = np.array([
    [5, 6],
    [7, 8]
])

print(np.concatenate([a, b], axis=0))
print(np.concatenate([a, b], axis=1))


[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[1 2 5 6]
 [3 4 7 8]]


拆解

In [179]:
a = np.array([[1, 2, 3, 4, 5, 6],
              [7, 8, 9, 10, 11, 12],
              [13, 14, 15, 16, 17, 18],
              [19, 20, 21, 22, 23, 24]])
print('vsplit a: ', np.vsplit(a, 2))
print('vsplit a: ', np.vsplit(a, [2, 3]))


vsplit a:  [array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12]]), array([[13, 14, 15, 16, 17, 18],
       [19, 20, 21, 22, 23, 24]])]
vsplit a:  [array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12]]), array([[13, 14, 15, 16, 17, 18]]), array([[19, 20, 21, 22, 23, 24]])]


In [180]:
a = np.array([[1, 2, 3, 4, 5, 6],
              [7, 8, 9, 10, 11, 12],
              [13, 14, 15, 16, 17, 18],
              [19, 20, 21, 22, 23, 24]])
print('hsplit a: ', np.hsplit(a, 2))
print('hsplit a: ', np.hsplit(a, [2, 3]))


hsplit a:  [array([[ 1,  2,  3],
       [ 7,  8,  9],
       [13, 14, 15],
       [19, 20, 21]]), array([[ 4,  5,  6],
       [10, 11, 12],
       [16, 17, 18],
       [22, 23, 24]])]
hsplit a:  [array([[ 1,  2],
       [ 7,  8],
       [13, 14],
       [19, 20]]), array([[ 3],
       [ 9],
       [15],
       [21]]), array([[ 4,  5,  6],
       [10, 11, 12],
       [16, 17, 18],
       [22, 23, 24]])]


In [181]:
a = np.array([[1, 2, 3, 4, 5, 6],
              [7, 8, 9, 10, 11, 12],
              [13, 14, 15, 16, 17, 18],
              [19, 20, 21, 22, 23, 24]])
print('split a: ', np.split(a, 2, axis=0))
print('split a: ', np.split(a, [2, 3], axis=1))


split a:  [array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12]]), array([[13, 14, 15, 16, 17, 18],
       [19, 20, 21, 22, 23, 24]])]
split a:  [array([[ 1,  2],
       [ 7,  8],
       [13, 14],
       [19, 20]]), array([[ 3],
       [ 9],
       [15],
       [21]]), array([[ 4,  5,  6],
       [10, 11, 12],
       [16, 17, 18],
       [22, 23, 24]])]


# 数据处理

使用`unique`函数，可以去除重复的元素

In [182]:
a = np.array([1, 2, 3, 3, 4, 2, 1])
print(a)
print('unique: ', np.unique(a))


[1 2 3 3 4 2 1]
unique:  [1 2 3 4]


使用`isnan`函数，可以判断是否为`NaN`

使用`argwhere`函数，可以找到满足条件的元素的索引

# 数据读取

从文件中加载数据

In [183]:
data = np.loadtxt('data.csv', delimiter=',', skiprows=1, dtype=np.int64)
print(data)


[[20131    10    67]
 [20132    11    88]
 [20133    12    98]
 [20134     8   100]
 [20135     9    75]
 [20136    12    78]]


从字符串中加载数据

In [184]:
raw_string = '1, 2, 3, 4, 5, 6'
data = np.fromstring(raw_string, dtype=np.int64, sep=',')
print(data)


[1 2 3 4 5 6]


保存数据到文件

In [185]:
a = np.array([1, 2, 3, 4, 5, 6])
print('save txt: ', np.savetxt('data.txt', a, fmt='%d', delimiter=','))

a = np.loadtxt('data.txt', dtype=np.int64, delimiter=',')
print('load txt: ', a)

save txt:  None
load txt:  [1 2 3 4 5 6]


二进制形式保存

In [186]:
a = np.array([1, 2, 3, 4, 5, 6])
print('save: ', np.save('data.npy', a))

a = np.load('data.npy')
print('load: ', a)

save:  None
load:  [1 2 3 4 5 6]


一个文件保存多个数组

In [187]:
a = np.array([1, 2, 3, 4, 5, 6])
b = np.array([7, 8, 9, 10, 11, 12])
print('savez: ', np.savez('data.npz', a=a, b=b))

npzfile = np.load('data.npz')
print('npzfile: ', npzfile.files)
print('npzfile a: ', npzfile['a'])
print('npzfile b: ', npzfile['b'])

savez:  None
npzfile:  ['a', 'b']
npzfile a:  [1 2 3 4 5 6]
npzfile b:  [ 7  8  9 10 11 12]


保存并做数据压缩

In [188]:
a = np.array([1, 2, 3, 4, 5, 6])
b = np.array([7, 8, 9, 10, 11, 12])
print('savez_compressed: ', np.savez_compressed('data.npz', a=a, b=b))

npzfile = np.load('data.npz')
print('npzfile: ', npzfile.files)
print('npzfile a: ', npzfile['a'])
print('npzfile b: ', npzfile['b'])

savez_compressed:  None
npzfile:  ['a', 'b']
npzfile a:  [1 2 3 4 5 6]
npzfile b:  [ 7  8  9 10 11 12]


# 数据生成

生成全0、全1数组

In [189]:
z = np.zeros(10)
print(z)

z = np.zeros([2, 3])
print(z)

z = np.ones([2, 3, 4])
print(z)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
[[0. 0. 0.]
 [0. 0. 0.]]
[[[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]]


使用某个数值填充数组

In [190]:
a = np.full([2, 3], 5)
print(a)

[[5 5 5]
 [5 5 5]]


使用现有形状

In [191]:
a = np.zeros([2, 3])
b = np.zeros_like(a)

print(a)
print(b)

c = np.ones_like(a)
print(c)

c = np.full_like(a, 5)
print(c)

[[0. 0. 0.]
 [0. 0. 0.]]
[[0. 0. 0.]
 [0. 0. 0.]]
[[1. 1. 1.]
 [1. 1. 1.]]
[[5. 5. 5.]
 [5. 5. 5.]]


# 创建规则数据

In [192]:
print('python range:', list(range(5)))
print('numpy arange:', np.arange(5))

print('python range:', list(range(3, 10, 2)))
print('numpy arange:', np.arange(3, 10, 2))

python range: [0, 1, 2, 3, 4]
numpy arange: [0 1 2 3 4]
python range: [3, 5, 7, 9]
numpy arange: [3 5 7 9]


某一个区间均匀取点

In [193]:
print('linspace:', np.linspace(-1, 1, 5))

linspace: [-1.  -0.5  0.   0.5  1. ]


不强求一定取端点

In [194]:
print('5 segments between [1, 5]:', np.linspace(-1, 1, 5, endpoint=False))

5 segments between [1, 5]: [-1.  -0.6 -0.2  0.2  0.6]


单位矩阵

In [195]:
print('identity:', np.identity(5))

identity: [[1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 1.]]


In [196]:
print('eye:', np.eye(5, 8))

eye: [[1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0.]]


创建一个多维向量，但不初始化，后续需要手动赋值

In [197]:
print('empty:', np.empty([3, 4]))

empty: [[4.9e-324 9.9e-324 1.5e-323 2.0e-323]
 [2.5e-323 3.0e-323 3.5e-323 4.0e-323]
 [4.4e-323 4.9e-323 5.4e-323 5.9e-323]]


下面比较一下使用`ones`和`empty`之间的速度

In [198]:
import time
t0 = time.time()

for _ in range(1000):
    _ = np.ones([100, 100])
    
t1 = time.time()

for _ in range(1000):
    _ = np.empty([100, 100])

t2 = time.time()

print('ones time:', t1 - t0)
print('empty time:', t2 - t1)

ones time: 0.010580062866210938
empty time: 0.0018999576568603516


可见，`empty`比`ones`速度快很多

# 多种随机数生成

In [199]:
rand_array = np.random.rand(3, 2)
print(rand_array)

[[0.75099059 0.7101086 ]
 [0.38127606 0.86026798]
 [0.51390346 0.67507971]]


类似还有

In [200]:
rand_array = np.random.random([3, 2])
print(rand_array)

[[0.07446241 0.56968371]
 [0.90688058 0.31068651]
 [0.29436479 0.72242953]]


按照标准正态分布区生成随机数

In [201]:
rand_array = np.random.randn(3, 2)
print(rand_array)

[[ 0.97846248  0.8182055 ]
 [-0.76411177  0.17451279]
 [ 0.73207405 -0.92477919]]


随机整数生成

In [202]:
rand_array = np.random.randint(low=-3, high=6, size=10)
print(rand_array)

[-3  2 -1 -3  1  0  4  5  5  0]


## 对已有的数据进行随机操作

In [203]:
data = np.array([1,2,3,4,5])

选择其中一个

In [204]:
print(np.random.choice(data))

3


选择其中两个

In [205]:
print(np.random.choice(data, size=2))

[4 2]


不放回的选择其中多个

In [206]:
print(np.random.choice(data, size=2, replace=False))

[5 2]


带权重地选择

In [207]:
print(np.random.choice(data, size=3, p=[0.1, 0.2, 0.3, 0.4, 0]))

[4 4 4]


将数组重新排序（洗牌）

In [208]:
np.random.shuffle(data)
print(data)

[4 3 1 5 2]


类似的，还有`permutation`，它既可以随机生成乱序数据，也可以将已有的数据打乱，但是它不会改变原始数组，而是生成一个新的

In [209]:
print('new data:', np.random.permutation(5))
print('change old data:', np.random.permutation(data))

new data: [2 1 0 4 3]
change old data: [1 5 2 4 3]


## 随机分布

In [210]:
print('normal:', np.random.normal(1, 0.2, 10))
print('uniform:', np.random.uniform(-1, 1, 10))

normal: [1.02853853 1.23106209 1.09242596 1.14687032 1.05806646 1.15177791
 1.0854448  1.24052037 0.7936627  0.84683202]
uniform: [ 0.36005432  0.25916522 -0.16956019  0.04887242 -0.24441833  0.00508265
  0.94055797 -0.79882178 -0.98276472  0.18448744]


设置随机数种子

In [211]:
np.random.seed(1)