Numpy

what: 科学计算库

why: 
- python list 在大数据速度慢，内存高，不能做向量、矩阵运算（循环多
- 快速、节省空间，提供数组化算术运算和高级别的广播功能
- 不需要循环计算

In [1]:
# ndaarray 多维数组对象
import numpy as np

# 列表转多维数组
lst1 = [1, 2, 3, 4, 5]
nd1 = np.array(lst1)
print(nd1)
print(type(nd1))
print(nd1.shape)

# 嵌套列表转多维数组
lst2 = [[1, 2, 3], [4, 5, 6]]
nd2 = np.array(lst2)
print(nd2)
print(type(nd2))
print(nd2.shape)

[1 2 3 4 5]
<class 'numpy.ndarray'>
(5,)
[[1 2 3]
 [4 5 6]]
<class 'numpy.ndarray'>
(2, 3)


In [2]:
# random模块创建数组
print('生成形状为(2, 3)，值在(0,1)范围内的随机数组')
print(np.random.randn(2, 3), end='\n\n')

print('生成形状为(2, 3)，值在[0,1)范围，均匀分布的随机数组')
print(np.random.uniform(low=0, high=1, size=(2, 3)), end='\n\n')

print('生成形状为(2, 3)，值在[1,10)范围内的随机整数数组')
print(np.random.randint(low=1, high=10, size=(2, 3)), end='\n\n')

print('生成满足正态分布的随机数组')
print(np.random.normal(loc=0, scale=1, size=(3, 3)), end='\n\n')

生成形状为(2, 3)，值在(0,1)范围内的随机数组
[[ 0.390876   -0.06850796  0.32995192]
 [-0.68603685 -0.47848706  1.8489352 ]]

生成形状为(2, 3)，值在[0,1)范围，均匀分布的随机数组
[[0.12021552 0.86779324 0.70521451]
 [0.47690158 0.04490878 0.49965357]]

生成形状为(2, 3)，值在[1,10)范围内的随机整数数组
[[5 1 9]
 [8 3 7]]

生成满足正态分布的随机数组
[[-0.57584743  0.95763932 -0.88149806]
 [ 1.8568515   1.08830715  0.23593656]
 [-1.4730332   0.77789399  1.58512488]]



算术运算

In [None]:
# 逐元素运算 （哈达玛积）

A = np.array([[1, 2], [-1, 4]])
B = np.array([[2, 0], [3, 4]])

print(A * B)
print(np.multiply(A, B))

[[ 2  0]
 [-3 16]]
[[ 2  0]
 [-3 16]]


In [4]:
# 点积
X1 = np.array([[1, 2], [3, 4]])
X2 = np.array([[5, 6, 7], [8, 9, 10]])

print(X1.dot(X2))
print(np.dot(X1, X2))

[[21 24 27]
 [47 54 61]]
[[21 24 27]
 [47 54 61]]


In [5]:
# 矩阵乘法
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6, 7], [8, 9, 10]])

print(A @ B)
print(np.matmul(A, B))


[[21 24 27]
 [47 54 61]]
[[21 24 27]
 [47 54 61]]


In [6]:
# 数组变形

A = np.array([[1, 2, 3], [4, 5, 6]])
print(A.reshape(3, 2))
print(A.reshape(2, 3))
print(A.reshape(-1, 1))
print(A.reshape(1, -1))



[[1 2]
 [3 4]
 [5 6]]
[[1 2 3]
 [4 5 6]]
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]]
[[1 2 3 4 5 6]]


In [9]:
# flatten 展平
print('展平')
A = np.array([[1, 2, 3], [4, 5, 6]])
print(A.flatten())

# 转置
print('转置')
A = np.array([[1, 2, 3], [4, 5, 6]])
print(A.T)

# squeeze 压缩
print('压缩')
A = np.arange(6).reshape(1, 2, 3)
print(A.squeeze())
print(A.squeeze().shape)

# transpose 转置
print('transpose转置')
A = np.array([[1, 2, 3], [4, 5, 6]])
print(A.transpose(1, 0))
print(A.transpose(1, 0).shape)


展平
[1 2 3 4 5 6]
转置
[[1 4]
 [2 5]
 [3 6]]
压缩
[[0 1 2]
 [3 4 5]]
(2, 3)
transpose转置
[[1 4]
 [2 5]
 [3 6]]
(3, 2)


In [10]:
# 合并数组
A = np.array([[1, 2], [3, 4]])
B = np.array([[5, 6], [7, 8]])

print(np.concatenate((A, B), axis=0))
print(np.concatenate((A, B), axis=1))


[[1 2]
 [3 4]
 [5 6]
 [7 8]]
[[1 2 5 6]
 [3 4 7 8]]


In [11]:
# 批处理

data_train = np.random.randn(1000, 2, 3)
print(data_train.shape)

np.random.shuffle(data_train)

batch_size = 100

for i in range(0, len(data_train), batch_size):
    X_batch = data_train[i:i+batch_size]
    y_batch = data_train[i:i+batch_size]
    print(X_batch.shape)
    print(y_batch.shape)

(1000, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)
(100, 2, 3)


In [12]:
# 通用函数

A = np.array([[1, 2, 3], [4, 5, 6]])

print(np.sum(A)) # 求和
print(np.mean(A)) # 求平均值
print(np.max(A)) # 求最大值
print(np.min(A)) # 求最小值
print(np.std(A)) # 求标准差
print(np.var(A)) # 求方差

# 广播
A = np.array([[1, 2, 3], [4, 5, 6]])
B = np.array([1, 2, 3])

print(A + B)


21
3.5
6
1
1.707825127659933
2.9166666666666665
[[2 4 6]
 [5 7 9]]
