https://mp.weixin.qq.com/s/hx3UE6rezm1VTmM59L442w

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

### Basic

In [2]:
import numpy as np

print(np.__version__)
a = np.array([1,2,3,4,5,6])
b = np.array([7,8,9,10,11,12])
print(a, a.dtype)
print(b, b.dtype)

1.19.2
[1 2 3 4 5 6] int32
[ 7  8  9 10 11 12] int32


In [2]:
B = np.array((5,6,7.8,3))
B.dtype

dtype('float64')

In [3]:
a[0]

1

In [4]:
a[0] = 7.4
a

array([7, 2, 3, 4, 5, 6])

In [5]:
# the element of numpy array can be changed. But the array's dtype is inchangeable.
a.dtype


dtype('int32')

In [6]:
a = np.array([[1,2,3], [4,5,6]])
print(a.ndim)
print(a[0])
print(a[1])
print(a[0][1])
print(a[1][2])

2
[1 2 3]
[4 5 6]
2
6


In [7]:
b = np.array([[[1,2,3], [4,5,6]], [[4,5,6], [1,2,3]]])
print(b.ndim)
print(b.shape)
print(b.shape[0], b.shape[1], b.shape[2])

3
(2, 2, 3)
2 2 3


In [8]:
A = np.arange(10)
A

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [9]:
A[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

### 常量

np.nan, np.NaN, np.NAN表示空值
**但是他们并不相等**

In [7]:
import numpy as np

print(np.nan == np.nan)    #False
print(np.nan != np.nan)    #True
print(np.nan != np.NAN)    #True

False
True
True


np.isnan(x, *args, *kwargs) Test element-wise for NaN and return result as a boolean array.

In [10]:
import numpy as np
x = np.array([1,1,8,np.nan, 10])
print(x)
y = np.isnan(x)
print(y)
z = np.count_nonzero(y)
print(z)

[ 1.  1.  8. nan 10.]
[False False False  True False]
1


numpy.inf 表示正无穷大
numpy.pi 表示圆周率pi
numpy.e 表示自然常数e

### Numpy基本数据类型

|类型       |备注  |说明   |
|----------|----|------|
|bool_=bool8|8位|布尔类型|
|int8 = byte|8位|整型|
|int16=short|16位|整型|
|int32=intc|32位|整型|
|int_=int64=long=int0=intp|64位|整型|
|uint8=ubyte|8位|无符号整数|
|uint16=ushort|16位|无符号整数|
|uint32=uintc|32位|无符号整数|
|uint64=uintp=unit0=uint|64位|无符号整数|
|float16=half|16位|浮点型|
|float32=single|32位|浮点型|
|float_=float64=double|64位|浮点型|
|str =*unicode* = str0=unicode|   |Unicode字符串|
|datetime64    |  |日期时间类型|
|timedelta64   |   | 表示二个时间之间的间隔|


**每个内建类型都有一个唯一定义它的字符代码**

|字符   |对应类型  |备注  |
|------|---------|-----|
|b      |boolean |‘b1’|
|i     |signed integer|'i1','i2','i4', 'i8'|
|u     |unsigned integer|'u1', 'u2', 'u4', 'u8'|
|f     |floating-point| 'f2', 'f4', 'f8'|
|c     |complex floating-point|         |
|m     |timedelta64   |表示两个时间的间隔  |
|M     |datetime64    |日期事件类型       |
|O     |object        |                 |
|S     |(byte-)string |S3表示长度为3的字符串|
|U     |Unicode       |Unicode字符串     |
|V     |Void          |                 |

In [19]:
import numpy as np

a = np.dtype('b1')
print(a.type)    # <class 'numpy.bool_'>
print(a.itemsize)    #1

a = np.dtype('i1')
print(a.type)    # <class 'numpy.int8'>
print(a.itemsize)    #1
a = np.dtype('i2')
print(a.type)    # <class 'numpy.int16'>
print(a.itemsize)    #2
a = np.dtype('i4')
print(a.type)    # <class 'numpy.int32'>
print(a.itemsize)    #4
a = np.dtype('i8')
print(a.type)    # <class 'numpy.int64'>
print(a.itemsize)    #8

a = np.dtype('u1')
print(a.type)    # <class 'numpy.uint8'>
print(a.itemsize)    #1
a = np.dtype('u2')
print(a.type)    # <class 'numpy.uint16'>
print(a.itemsize)    #2
a = np.dtype('u4')
print(a.type)    # <class 'numpy.uint32'>
print(a.itemsize)    #4
a = np.dtype('u8')
print(a.type)    # <class 'numpy.uint64'>
print(a.itemsize)    #8

a = np.dtype('f2')
print(a.type)    # <class 'numpy.float16'>
print(a.itemsize)    #2
a = np.dtype('f4')
print(a.type)    # <class 'numpy.float32'>
print(a.itemsize)    #4
a = np.dtype('f8')
print(a.type)    # <class 'numpy.float64'>
print(a.itemsize)    #8

a = np.dtype('S')
print(a.type)    # <class 'numpy.bytes_'>
print(a.itemsize)    #0
a = np.dtype('S3')
print(a.type)    # <class 'numpy.bytes_'>
print(a.itemsize)    #3

a = np.dtype('U3')
print(a.type)    # <class 'numpy.str_'>
print(a.itemsize)    #12


<class 'numpy.bool_'>
1
<class 'numpy.int8'>
1
<class 'numpy.int16'>
2
<class 'numpy.int32'>
4
<class 'numpy.int64'>
8
<class 'numpy.uint8'>
1
<class 'numpy.uint16'>
2
<class 'numpy.uint32'>
4
<class 'numpy.uint64'>
8
<class 'numpy.float16'>
2
<class 'numpy.float32'>
4
<class 'numpy.float64'>
8
<class 'numpy.bytes_'>
0
<class 'numpy.bytes_'>
3
<class 'numpy.str_'>
12


In [24]:
class iinfo(object):
    def __init__(self, int_type):
        pass
    def min(self):
        pass
    def max(self):
        pass

import numpy as np

ii16 = np.iinfo(np.int16)
print(ii16.min)    #-32768
print(ii16.max)    #32767

ii32 = np.iinfo(np.int32)
print(ii32.min)    #-2147483648
print(ii32.max)    #2147483647

-32768
32767
-2147483648
2147483647


In [26]:
class finfo(object):
    def __init__(self, int_type):
        pass
    def min(self):
        pass
    def max(self):
        pass

import numpy as np

ff16 = np.finfo(np.float16)
print(ff16.bits)
print(ff16.min)    #-65500
print(ff16.max)    #65500
print(ff16.eps)

ff32 = np.finfo(np.float32)
print(ff32.bits)   #32
print(ff32.min)    #-3.4028235e+38
print(ff32.max)    #3.4028235e+38
print(ff32.eps)    #1.1920929e-07

16
-65500.0
65500.0
0.000977
32
-3.4028235e+38
3.4028235e+38
1.1920929e-07


### INDEX ARRAY

In [9]:
arr_rand = np.array([8,8,3,7,7,0,4,2,5,2])
b = arr_rand > 4

# 根据数组是否大于4，满足为True, 不满足为False
b 

# 定位数组大于5的索引
index_gt5 = np.where(arr_rand > 5)
print(f'Positions where value > 5: {index_gt5}')

# 由索引得到满足条件的值
arr_rand.take(index_gt5)

# np.where 也可以接受另外两个参数x，y，当满足条件时，为x，否则为y
np.where(arr_rand > 5, "gt5", 'le5')

# np.argmax, np.argmin分别获取最大和最小索引
print("Position of max value: ", np.argmax(arr_rand))
print("Position of min value: ", np.argmin(arr_rand))

# np.max, np.min分别获取数组最大最小值
print("Max value: ", np.max(arr_rand))
print("Min value: ", np.min(arr_rand))

array([ True,  True, False,  True,  True, False, False, False,  True,
       False])

Positions where value > 5: (array([0, 1, 3, 4], dtype=int64),)


array([[8, 8, 7, 7]])

array(['gt5', 'gt5', 'le5', 'gt5', 'gt5', 'le5', 'le5', 'le5', 'le5',
       'le5'], dtype='<U3')

Position of max value:  0
Position of min value:  5
Max value:  8
Min value:  0


In [11]:
A = np.random.permutation(np.arange(10))
A

array([3, 9, 5, 6, 2, 1, 7, 4, 0, 8])

In [12]:
A[[4,3,8]]

array([2, 6, 0])

In [13]:
A[[True, True, False, True, False, False,True, False, True, True]]


array([3, 9, 6, 7, 0, 8])

In [14]:
A[A<4]

array([3, 2, 1, 0])

In [17]:
A[(A>4)]

array([9, 5, 6, 7, 8])

In [None]:
B = np.random.rand(1000000)
%timeit np.sum(B)

In [None]:
%timeit B.sum()

In [None]:
%timeit sum(B)


In [None]:
%%timeit
s = 0
for x in B:
    s += x

**astype() to convert to another type **

In [19]:
numeric_strings = np.array(['1.2', '-9.6', '42'], dtype = np.string_)
numeric_strings.astype(float)

array([ 1.2, -9.6, 42. ])

In [20]:
arr = np.array([3.7, -1.2, -2.6, 0.5, 12.9, 10.1])
arr.astype(np.int32)

array([ 3, -1, -2,  0, 12, 10])

In [21]:
arr = np.array([1, 2, 3, 4, 5])
arr.dtype

dtype('int32')

In [22]:
float_arr = arr.astype(np.float)
float_arr.dtype

dtype('float64')

In [23]:
float_arr

array([1., 2., 3., 4., 5.])

In [24]:
int_array = np.arange(10)
int_array

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [25]:
calibers = np.array([.22, .270, .357, .380, .44, .50], dtype=np.float64)
int_array.astype(calibers.dtype)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [26]:
empty_uint32 = np.empty(8, dtype='u4')
empty_uint32

array([4391013, 7143535, 5439580, 6684769, 4391013, 7143535, 7471184,
       7209065], dtype=uint32)

### Numpy array vs. Python list

In [14]:
# list
a = [2,5,8]
a= [q * 2 for q in a]
print(a)
print(type(a))

[4, 10, 16]
<class 'list'>


In [10]:
# numpy array
a = np.array([2,5,8])
a= a * 2
print(a)
print(type(a))
print(a.dtype)

[ 4 10 16]
<class 'numpy.ndarray'>
int32


In [11]:
# list
a = [1,2,3]
b = [4,5,6]
l = [p + q for p,q in zip(a,b)]
l

[5, 7, 9]

In [16]:
# numpy array

a = np.array([1,2,3])
b = np.array([4,5,6])
l = a+b
print(l)
print(type(l))

[5 7 9]
<class 'numpy.ndarray'>


In [5]:
# convert list to numpy ndarray
import numpy as np

d_array = np.array([1,2,3,4,5])
type(d_array)
d_array.shape
d_array.ndim

d_array2 = np.array([1,2,3,4,5]).reshape(5,1)
type(d_array2)
d_array2.shape
d_array2.ndim


numpy.ndarray

(5,)

1

numpy.ndarray

(5, 1)

2

### 向量初始化

为了创建Numpy数组，一种方法是转换Python列表。**Numpy数组类型**可以直接从列表元素类型**推导**得到。

Numpy数组不能像Python列表一样增长。数组的末端没有留下任何便于快速附加元素的空间。
因此，常见的作法是要么先使用Python列表，准备好之后转化为numpy数组。
要么是使用np.zeros or np.empty预先留下必要的空间。

In [24]:
a = np.array([1., 2., 3.])
print(a.dtype)
print(a.shape)

float64
(3,)


In [27]:
b = np.zeros(10, float)
print(b.dtype)
print(b.shape)
print(b)

float64
(10,)
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [28]:
c = np.zeros_like(b)
print(c.dtype)
print(c.shape)
print(c)

float64
(10,)
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]


In [29]:
c = np.ones(6)
print(c)
print(c.shape)
print(c.dtype)

[1. 1. 1. 1. 1. 1.]
(6,)
float64


In [31]:
d = np.empty_like(c)
print(d)
print(d.shape)
print(d.dtype)


[1. 1. 1. 1. 1. 1.]
(6,)
float64


#### Numpy中由两个函数能用单调序列执行数组初始化

np.arange(*stop*)

np.arange(*start*, *stop*)

np.arange(*start*, *stop*, *step*)



In [34]:
a = np.arange(1,4,1)
print(a)
print(type(a))

[1 2 3]
<class 'numpy.ndarray'>


In [36]:
# 类型转换
b = np.arange(1,6,1).astype(float)
print(b)
print(type(b))
print(b.dtype)

[1. 2. 3. 4. 5.]
<class 'numpy.ndarray'>
float64


arange并不擅长处理浮点数

这时，用linspace

In [37]:
a = np.linspace(0.,1.,10)  #最后一个参数统计的是数据点的数量，而不是区间
print(a)
print(type(a))

[0.         0.11111111 0.22222222 0.33333333 0.44444444 0.55555556
 0.66666667 0.77777778 0.88888889 1.        ]
<class 'numpy.ndarray'>


In [39]:
b = np.linspace(0.,1.,11)  #最后一个参数统计的是数据点的数量，而不是区间
print(b)
print(type(b))

[0.  0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9 1. ]
<class 'numpy.ndarray'>


### 一维数组、 行向量、列向量

In [3]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [8]:
import numpy as np

a = np.random.randn(5)    #生成了一个被称为a的一维数组。在执行逻辑回归和神经网络时，不需要使用这些一维数组
type(a)
a
a.shape
a.T
np.dot(a,a.T)

numpy.ndarray

array([ 1.17542511, -0.4430374 , -0.33184927,  0.98419191, -0.01057279])

(5,)

array([ 1.17542511, -0.4430374 , -0.33184927,  0.98419191, -0.01057279])

2.6567757506859353

In [15]:
a = np.random.randn(5,1)    #产生一个(5,1)向量或者列向量
a.shape
a 
a.T
np.dot(a.T, a)
np.dot(a, a.T)

(5, 1)

array([[-0.15622021],
       [-1.23260494],
       [ 0.55642602],
       [-0.35611513],
       [-1.04028741]])

array([[-0.15622021, -1.23260494,  0.55642602, -0.35611513, -1.04028741]])

array([[3.06234551]])

array([[ 0.02440475,  0.19255781, -0.08692499,  0.05563238,  0.16251392],
       [ 0.19255781,  1.51931495, -0.68585347,  0.43894927,  1.28226341],
       [-0.08692499, -0.68585347,  0.30960992, -0.19815173, -0.57884299],
       [ 0.05563238,  0.43894927, -0.19815173,  0.12681799,  0.37046209],
       [ 0.16251392,  1.28226341, -0.57884299,  0.37046209,  1.0821979 ]])

In [17]:
a = np.random.randn(1,5)  #产生一个(1,5)向量或者行向量
a.shape
a 
a.T
np.dot(a.T, a)
np.dot(a, a.T)

(1, 5)

array([[ 1.1128907 ,  1.67924002, -0.4937158 ,  0.53172569,  0.86530246]])

array([[ 1.1128907 ],
       [ 1.67924002],
       [-0.4937158 ],
       [ 0.53172569],
       [ 0.86530246]])

array([[ 1.23852572,  1.86881061, -0.54945173,  0.59175257,  0.96298706],
       [ 1.86881061,  2.81984704, -0.82906733,  0.89289505,  1.45305052],
       [-0.54945173, -0.82906733,  0.24375529, -0.26252137, -0.4272135 ],
       [ 0.59175257,  0.89289505, -0.26252137,  0.28273221,  0.46010354],
       [ 0.96298706,  1.45305052, -0.4272135 ,  0.46010354,  0.74874835]])

array([[5.3336086]])

In [22]:
a = np.random.randn(6)
a
assert a.shape == (6,)
b = a.reshape(6,1)
b
assert b.shape == (6,1)

array([-0.35479979,  0.01725271,  0.11744238,  0.23953191, -1.59198007,
        0.15055281])

array([[-0.35479979],
       [ 0.01725271],
       [ 0.11744238],
       [ 0.23953191],
       [-1.59198007],
       [ 0.15055281]])

### Numpy Broadcasting


In [37]:
import numpy as np

A = np.array([[56.0, 0.0, 4.4, 68.0],
             [1.2, 104.0, 52.0, 8.0],
             [1.8, 135.0, 99.0, 0.9]])
print(A) 

[[ 56.    0.    4.4  68. ]
 [  1.2 104.   52.    8. ]
 [  1.8 135.   99.    0.9]]


In [38]:
cal = A.sum(axis = 0)    # axis 用来指明将要进行的运算沿哪个轴执行，在numpy中，0轴时垂直的，也就是列；而1轴是水平的，也就是行. 返回时一个一维数组
# cal.reshape(1,4)
print(cal)
cal.shape
assert cal.shape != (1,4)

percentage = 100 * A / cal.reshape(1,4)
percentage

[ 59.  239.  155.4  76.9]


(4,)

array([[94.91525424,  0.        ,  2.83140283, 88.42652796],
       [ 2.03389831, 43.51464435, 33.46203346, 10.40312094],
       [ 3.05084746, 56.48535565, 63.70656371,  1.17035111]])

##### Numpy广播机制

如果两个数组的后缘维度的周长度相符或其中一方的周长度为1，则认为它们时广播兼容的。广播会在缺失维度和周长度为1的维度上进行。


### From and to CSV

导入数据的标准方法是使用np.genfromtxt函数，它可以从web URLs导入数据，处理缺失值，多种分隔符，处理不规则的列数等功能。一个不太通用的版本是用np.loadtxt函数导入数据，它假设数据集无缺失值.

In [11]:
# turn off scientific representation 
np.set_printoptions(suppress=True)

# import data from url csv file
path = "./data/Auto.csv"

data = np.genfromtxt(path, delimiter=',', skip_header= 1, filling_values=-999, dtype = 'float')
data[:5]

array([[  18. ,    8. ,  307. ,  130. , 3504. ,   12. ,   70. ,    1. ,
        -999. ],
       [  15. ,    8. ,  350. ,  165. , 3693. ,   11.5,   70. ,    1. ,
        -999. ],
       [  18. ,    8. ,  318. ,  150. , 3436. ,   11. ,   70. ,    1. ,
        -999. ],
       [  16. ,    8. ,  304. ,  150. , 3433. ,   12. ,   70. ,    1. ,
        -999. ],
       [  17. ,    8. ,  302. ,  140. , 3449. ,   10.5,   70. ,    1. ,
        -999. ]])

In [14]:
# data2 = np.genfromtxt(path, delimiter=',', skip_header= 1, filling_values=-999, dtype = 'object')
data2 = np.genfromtxt(path, delimiter=',', skip_header= 1, filling_values=-999, dtype = None)
data2[:6]

  data2 = np.genfromtxt(path, delimiter=',', skip_header= 1, filling_values=-999, dtype = None)


array([(18., 8, 307., 130, 3504, 12. , 70, 1, b'"chevrolet chevelle malibu"'),
       (15., 8, 350., 165, 3693, 11.5, 70, 1, b'"buick skylark 320"'),
       (18., 8, 318., 150, 3436, 11. , 70, 1, b'"plymouth satellite"'),
       (16., 8, 304., 150, 3433, 12. , 70, 1, b'"amc rebel sst"'),
       (17., 8, 302., 140, 3449, 10.5, 70, 1, b'"ford torino"'),
       (15., 8, 429., 198, 4341, 10. , 70, 1, b'"ford galaxie 500"')],
      dtype=[('f0', '<f8'), ('f1', '<i4'), ('f2', '<f8'), ('f3', '<i4'), ('f4', '<i4'), ('f5', '<f8'), ('f6', '<i4'), ('f7', '<i4'), ('f8', 'S38')])

In [16]:
np.savetxt("out.csv", data, delimiter=',')

### 保存和加载numpy数据

Numpy提供了.npy和.npz文件类型来实现。如果保存一个ndarray数据，使用np.save保存为.npy文件；若保存多个ndarray数据，使用np.savez保存为.npz文件。加载numpy数据，则统一用np.load函数.

In [28]:
arr_rand2 = arr_rand*2
np.save('./data/myarray.npy', arr_rand)
np.savez('./data/myarray.npz', arr_rand, arr_rand2)
a = np.load('./data/myarray.npy')
a 
b = np.load('./data/myarray.npz')
b.files
b['arr_0']
b['arr_1']

# 增加索引保存数据
c = np.savez('./data/myarray.npz', arr_m = arr_rand, arr_n= arr_rand2)
d = np.load('./data/myarray.npz')
d['arr_m']
d['arr_n']

array([8, 8, 3, 7, 7, 0, 4, 2, 5, 2])

['arr_0', 'arr_1']

array([8, 8, 3, 7, 7, 0, 4, 2, 5, 2])

array([16, 16,  6, 14, 14,  0,  8,  4, 10,  4])

array([8, 8, 3, 7, 7, 0, 4, 2, 5, 2])

array([16, 16,  6, 14, 14,  0,  8,  4, 10,  4])

### 按行或列拼接数组

In [33]:
a = np.ones([4,4])
b = np.zeros([4,4])
a
b

# 行拼接数组
np.concatenate([a,b], axis = 0)
np.vstack([a,b])
np.r_[a,b]

# 列拼接数组
np.concatenate([a,b], axis = 1)
np.hstack([a,b])
np.c_[a,b]

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

array([[1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.]])

array([[1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.]])

array([[1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.],
       [1., 1., 1., 1., 0., 0., 0., 0.]])

### 按列对数组进行排序

np.sort，np.argsort和np.lexsort

In [34]:
arr = np.random.randint(1,6, size=[8,4])
arr

array([[3, 4, 5, 2],
       [1, 5, 5, 1],
       [4, 4, 2, 2],
       [3, 5, 4, 5],
       [4, 2, 5, 2],
       [3, 5, 1, 1],
       [4, 1, 1, 2],
       [2, 5, 2, 1]])

In [35]:
# np.sort基于列对arr数组进行排序
# axis = 0 表示列排序，1 表示行排序
np.sort(arr, axis = 0)

array([[1, 1, 1, 1],
       [2, 2, 1, 1],
       [3, 4, 2, 1],
       [3, 4, 2, 2],
       [3, 5, 4, 2],
       [4, 5, 5, 2],
       [4, 5, 5, 2],
       [4, 5, 5, 5]])

In [42]:
# np.sort排序函数认为所有列是相互独立的，对所有列进行排序，破坏了行的结构
# np.argsort可以保留行的完整性

# 对arr的第一列进行排序，返回索引
sorted_index_1stcol = arr[:,0].argsort()
# 根据第一列的索引对数组排序，保护行完整性
arr[sorted_index_1stcol]
# 倒转argsort索引实现递减排序
arr[sorted_index_1stcol[::-1]]
# 若要基于多个列对数组进行排序，使用np.lexsort()函数，
# 它的参数是元组类型，元组的每个元素表示数组的某一列，
# 排序规则是：越靠右的列，优先级越高

# 先比较第一列，第一列相同的情况下再比较第二列
lexsorted_index = np.lexsort((arr[:,1], arr[:,0]))
lexsorted_index

array([[1, 5, 5, 1],
       [2, 5, 2, 1],
       [3, 4, 5, 2],
       [3, 5, 4, 5],
       [3, 5, 1, 1],
       [4, 4, 2, 2],
       [4, 2, 5, 2],
       [4, 1, 1, 2]])

array([[4, 1, 1, 2],
       [4, 2, 5, 2],
       [4, 4, 2, 2],
       [3, 5, 1, 1],
       [3, 5, 4, 5],
       [3, 4, 5, 2],
       [2, 5, 2, 1],
       [1, 5, 5, 1]])

array([1, 7, 0, 3, 5, 6, 4, 2], dtype=int64)

### 用Numpy处理时间
np.datetime64创建日期对象，精确到纳秒

In [56]:
# 创建datetime64对象
date64 = np.datetime64('2021-04-10 19:33:33')
date64

# 分离时间
dt64 = np.datetime64(date64, 'D')
dt64

# 增加时间单元
tenminutes = np.timedelta64(10, 'm')
tenseconds = np.timedelta64(10, 's')
tennanoseconds = np.timedelta64(10,'ns')

print('Add 10 days: ', dt64 + 10)
print('Add 10 minutes: ', dt64 + tenminutes)
print('Add 10 seconds: ', dt64 + tenseconds)
print('Add 10 nanoseconds: ', dt64 + tennanoseconds)

# convert dt64 to string
np.datetime_as_string(dt64)

# check whether a business day
np.is_busday(dt64)

# 设置周六周日为工作日
dt64 = np.datetime64('2021-04-11')
np.is_busday(dt64, weekmask = 'Sat Sun')

# 查看后几个工作日的日期,若当前为非工作日，会报错
dt64 = np.datetime64('2021-04-12')
t = np.busday_offset(dt64,3)
t 

# 可以增加参数forward或backward来报错，
# forward的含义是若当前日期非工作日，那么往前寻找最接近当前日期的工作日，
# backward的含义则是往后寻找最接近当前日期的工作日.
dt64 = np.datetime64('2021-04-10')
print("Add 2 business days, rolling forward to nearest biz day: ", np.busday_offset(dt64, 2, roll='forward'))
print("Add 2 business days, rolling backward to nearest biz day: ", np.busday_offset(dt64, 2, roll='backward'))


numpy.datetime64('2021-04-10T19:33:33')

numpy.datetime64('2021-04-10')

Add 10 days:  2021-04-20
Add 10 minutes:  2021-04-10T00:10
Add 10 seconds:  2021-04-10T00:00:10
Add 10 nanoseconds:  2021-04-10T00:00:00.000000010


'2021-04-10'

False

True

numpy.datetime64('2021-04-15')

Add 2 business days, rolling forward to nearest biz day:  2021-04-14
Add 2 business days, rolling backward to nearest biz day:  2021-04-13


#### 创建时间日期序列

In [57]:
dates = np.arange(np.datetime64('2021-04-10'), np.datetime64('2021-04-30'))
dates
np.is_busday(dates)

array(['2021-04-10', '2021-04-11', '2021-04-12', '2021-04-13',
       '2021-04-14', '2021-04-15', '2021-04-16', '2021-04-17',
       '2021-04-18', '2021-04-19', '2021-04-20', '2021-04-21',
       '2021-04-22', '2021-04-23', '2021-04-24', '2021-04-25',
       '2021-04-26', '2021-04-27', '2021-04-28', '2021-04-29'],
      dtype='datetime64[D]')

array([False, False,  True,  True,  True,  True,  True, False, False,
        True,  True,  True,  True,  True, False, False,  True,  True,
        True,  True])

#### 将numpy.datetime64对象转化为datetime.datetime对象

In [61]:
import datetime

type(dt64)
dt = dt64.tolist()
type(dt)
dt
#获取datetime对象的年月日很方便
dt.year
dt.month
dt.day
dt.weekday()

numpy.datetime64

datetime.date

datetime.date(2021, 4, 10)

2021

4

10

5

### 高阶函数

#### 标量函数向量化

In [63]:
# 定义标量函数

def foo(x):
    if x % 2 == 1:
        return x ** 2
    else:
        return x / 2

# On a scalar
foo(10)
foo(11)
foo([10,11,12])

5.0

121

TypeError: unsupported operand type(s) for %: 'list' and 'int'

In [65]:
# 标量函数向量化，向量化的输出类型是float
foo_v = np.vectorize(foo, otypes = [float])

foo_v([10,11,12])
foo_v([[10,11,12], [1,2,3]])

array([  5., 121.,   6.])

array([[  5., 121.,   6.],
       [  1.,   1.,   9.]])

#### apply_along_axis函数


In [66]:
np.random.seed(100)
arr_x = np.random.randint(1,10, size=[4,10])
arr_x

array([[9, 9, 4, 8, 8, 1, 5, 3, 6, 3],
       [3, 3, 2, 1, 9, 5, 1, 7, 3, 5],
       [2, 6, 4, 5, 5, 4, 8, 2, 2, 8],
       [8, 1, 3, 4, 3, 6, 9, 2, 1, 8]])

In [67]:
np.apply_along_axis(np.max, 0, arr=arr_x)

array([9, 9, 4, 8, 9, 6, 9, 7, 6, 8])

In [68]:
np.apply_along_axis(np.max, 1, arr=arr_x)

array([9, 9, 8, 9])