# NumPy

* ndarray 多维数组

In [1]:
import numpy as np

# 生成指定维度的随机多维数据
data = np.random.rand(2, 3)
print(data)
print(type(data))

[[ 0.10902339  0.54266862  0.18187775]
 [ 0.83086593  0.1864485   0.23520957]]
<class 'numpy.ndarray'>


* ndim, shape 和 dtype 属性

In [2]:
print('维度个数', data.ndim)
print('各维度大小: ', data.shape)
print('数据类型: ', data.dtype)

维度个数 2
各维度大小:  (2, 3)
数据类型:  float64


* 创建ndarray

In [3]:
# list转换为 ndarray
l = range(10)
data = np.array(l)
print(data)
print(data.shape)
print(data.ndim)

[0 1 2 3 4 5 6 7 8 9]
(10,)
1


In [4]:
# 嵌套序列转换为ndarray
l2 = [range(10), range(10)]
data = np.array(l2)
print(data)
print(data.shape)

[[0 1 2 3 4 5 6 7 8 9]
 [0 1 2 3 4 5 6 7 8 9]]
(2, 10)


In [5]:
# np.zeros, np.ones 和 np.empty

# np.zeros
zeros_arr = np.zeros((3, 4))

# np.ones
ones_arr = np.ones((2, 3))

# np.empty
empty_arr = np.empty((3, 3))

# np.empty 指定数据类型
empty_int_arr = np.empty((3, 3), int)

print(zeros_arr)
print('-------------')
print(ones_arr)
print('-------------')
print(empty_arr)
print('-------------')
print(empty_int_arr)

[[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]
-------------
[[ 1.  1.  1.]
 [ 1.  1.  1.]]
-------------
[[  4.94065646e-324   9.88131292e-324   1.48219694e-323]
 [  1.97626258e-323   2.47032823e-323   2.96439388e-323]
 [  3.45845952e-323   3.95252517e-323   4.44659081e-323]]
-------------
[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [6]:
# np.arange()
print(np.arange(10))

[0 1 2 3 4 5 6 7 8 9]


* ndarray数据类型

In [7]:
zeros_float_arr = np.zeros((3, 4), dtype=np.float64)
print(zeros_float_arr)
print(zeros_float_arr.dtype)

# astype转换数据类型
zeros_int_arr = zeros_float_arr.astype(np.int32)
print(zeros_int_arr)
print(zeros_int_arr.dtype)

[[ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]
 [ 0.  0.  0.  0.]]
float64
[[0 0 0 0]
 [0 0 0 0]
 [0 0 0 0]]
int32


* 矢量化 (vectorization)

In [8]:
# 矢量与矢量运算
arr = np.array([[1, 2, 3],
                [4, 5, 6]])

print("元素相乘：")
print(arr * arr)

print("矩阵相加：")
print(arr + arr)

元素相乘：
[[ 1  4  9]
 [16 25 36]]
矩阵相加：
[[ 2  4  6]
 [ 8 10 12]]


In [9]:
# 矢量与标量运算
print(1. / arr)
print(2. * arr)

[[ 1.          0.5         0.33333333]
 [ 0.25        0.2         0.16666667]]
[[  2.   4.   6.]
 [  8.  10.  12.]]


* 索引与切片

In [10]:
# 一维数组
arr1 = np.arange(10)
print(arr1)

print(arr1[2:5])

[0 1 2 3 4 5 6 7 8 9]
[2 3 4]


In [11]:
# 多维数组
arr2 = np.arange(12).reshape(3,4)
print(arr2)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [12]:
print(arr2[1])

print(arr2[0:2, 2:])

print(arr2[:, 1:3])

[4 5 6 7]
[[2 3]
 [6 7]]
[[ 1  2]
 [ 5  6]
 [ 9 10]]


In [13]:
# 条件索引

# 找出 data_arr 中 2015年后的数据
data_arr = np.random.rand(3,3)
print(data_arr)

year_arr = np.array([[2000, 2001, 2000],
                     [2005, 2002, 2009],
                     [2001, 2003, 2010]])

#is_year_after_2005 = year_arr >= 2005
#print is_year_after_2005, is_year_after_2005.dtype

#filtered_arr = data_arr[is_year_after_2005]

filtered_arr = data_arr[year_arr >= 2005]
print(filtered_arr)

[[ 0.22629326  0.52993859  0.36597228]
 [ 0.2286688   0.25178955  0.85513878]
 [ 0.25505027  0.63624895  0.90562835]]
[ 0.2286688   0.85513878  0.90562835]


In [14]:
# 多个条件
filtered_arr = data_arr[(year_arr <= 2005) & (year_arr % 2 == 0)]
print(filtered_arr)

[ 0.22629326  0.36597228  0.25178955]


* 转置

In [15]:
arr = np.random.rand(2,3)
print(arr)
print(arr.transpose())

[[ 0.85552326  0.68728048  0.26984755]
 [ 0.26596775  0.99504862  0.89734115]]
[[ 0.85552326  0.26596775]
 [ 0.68728048  0.99504862]
 [ 0.26984755  0.89734115]]


In [16]:
arr3d = np.random.rand(2,3,4)
print(arr3d)
print('----------------------')
print(arr3d.transpose((1,0,2))) # 3x2x4 

[[[ 0.84009694  0.42597787  0.49171572  0.2054864 ]
  [ 0.3239181   0.96883187  0.6668831   0.340976  ]
  [ 0.76281724  0.95786586  0.15241726  0.33309573]]

 [[ 0.7577724   0.5628459   0.7139561   0.66797148]
  [ 0.95285231  0.68565026  0.4834988   0.0948594 ]
  [ 0.15935472  0.77640512  0.91335056  0.59868325]]]
----------------------
[[[ 0.84009694  0.42597787  0.49171572  0.2054864 ]
  [ 0.7577724   0.5628459   0.7139561   0.66797148]]

 [[ 0.3239181   0.96883187  0.6668831   0.340976  ]
  [ 0.95285231  0.68565026  0.4834988   0.0948594 ]]

 [[ 0.76281724  0.95786586  0.15241726  0.33309573]
  [ 0.15935472  0.77640512  0.91335056  0.59868325]]]


* 通用函数

In [17]:
arr = np.random.randn(2,3)

print(arr)
print(np.ceil(arr))
print(np.floor(arr))
print(np.rint(arr))
print(np.isnan(arr))

[[-1.01835745 -0.51314444 -0.24728602]
 [-0.43934979 -0.33467943  1.48863882]]
[[-1. -0. -0.]
 [-0. -0.  2.]]
[[-2. -1. -1.]
 [-1. -1.  1.]]
[[-1. -1. -0.]
 [-0. -0.  1.]]
[[False False False]
 [False False False]]


* np.where

In [18]:
arr = np.random.randn(3,4)
print(arr)

np.where(arr > 0, 1, -1)

[[-0.82854348  0.56950389  1.07614183 -0.0776851 ]
 [ 0.59204165  1.00040079  0.04982638  0.08904182]
 [ 1.80784253  0.46760312 -0.42034164  0.52075475]]


array([[-1,  1,  1, -1],
       [ 1,  1,  1,  1],
       [ 1,  1, -1,  1]])

*  常用的统计方法

In [19]:
arr = np.arange(10).reshape(5,2)
print(arr)

print(np.sum(arr))
print(np.sum(arr, axis=0))
print(np.sum(arr, axis=1))

[[0 1]
 [2 3]
 [4 5]
 [6 7]
 [8 9]]
45
[20 25]
[ 1  5  9 13 17]


* np.all 和 np.any

In [20]:
arr = np.random.randn(2,3)
print(arr)

print(np.any(arr > 0))
print(np.all(arr > 0))

[[-1.65605921  0.06656008 -0.49038012]
 [ 1.01382473  0.10583154 -1.787527  ]]
True
False


* np.unique

In [21]:
arr = np.array([[1, 2, 1], [2, 3, 4]])
print(arr)
print(np.unique(arr))

[[1 2 1]
 [2 3 4]]
[1 2 3 4]
