### NumPy is the fundamental package for scientific computing with Python. It contains among other things:

- a powerful N-dimensional array object
- sophisticated (broadcasting) functions
- tools for integrating C/C++ and Fortran code
- useful linear algebra, Fourier transform, and random number capabilities

Besides its obvious scientific uses, NumPy can also be used as an efficient multi-dimensional container of generic data. Arbitrary data-types can be defined. This allows NumPy to seamlessly and speedily integrate with a wide variety of databases.


In [2]:
import numpy as np
import time
import math

### 1 [多维数组ndarray](https://docs.scipy.org/doc/numpy-dev/reference/arrays.ndarray.html)

In [None]:
x = [(1,2,3),(4,5,6),(7,8,9)]
a = np.array(x,dtype=np.float) #定义数据与对应的类型
a

In [None]:
a.dtype #a的数据类型

In [None]:
a.ndim #返回数组的维数

In [None]:
a.shape #a.shape返回a中各维对应的元素个数

In [None]:
a.shape = (9,1) #变更各维元素个数
a

In [None]:
len(a),a.size #len()返回a第一维对应的元素个数，a.size返回a中的元素个数

In [None]:
a.tolist() #ndarray.tolist()返回一个ndarray对应的列表

In [None]:
a.shape = (3,3)
a.flatten() #扁平化返回一个对应的1维数组，a本身不发生变化

In [None]:
a.flat[8] #将所有的元素展开至一维，在进行元素的读取

#### 索引：
- ndarray索引方法: [idx0,indx1,...]
- 利用序列的索引方法: [idx0][index1]...

In [None]:
b = np.arange(16).reshape((2,2,4)) #定义一个3维数组
b

In [None]:
b[0,0,0]==b[0][0][0]

In [None]:
b[0,0,0]

#### 切片：

In [None]:
b[:][0][0] #获取b的一个copy，然后取其中的索引为0的数组，最后再该数组索引为0的数组。

In [None]:
b[:,0,0] #针对第1维所有的数组，分别取索引为0的数组中的索引为0的元素

In [None]:
b[[0,1],[0,1],[0,1]] #取[0,0,0]和[1,1,1]两个元素

In [None]:
b[[0,1],[0,1],0] #取[0,0,0]和[1,1,0]两个元素

也可以用布尔值作为索引

In [None]:
b>0

In [None]:
b[b>5] #返回1维数组

In [None]:
b[b>5].shape

#### [常用的ndarray创建方法](https://docs.scipy.org/doc/numpy-dev/reference/routines.array-creation.html)

- np.arange(start,stop,step) 不包含stop

In [None]:
np.arange(0.1,10,0.5) #np.arange()中的参数可以为浮点数

In [None]:
np.arange(0.1,-1,0.5)

- np.linspace(start,stop,number) 包含stop

In [None]:
np.linspace(0,9,10)

- np.logspace(start,stop,number)

In [None]:
np.logspace(0,9,10)

In [None]:
np.logspace(0,9,10) == 10**np.linspace(0,9,10)

- np.zeros(n),np.zeros_like()

In [None]:
np.zeros(10)

In [None]:
np.zeros_like(a) #返回与a有相同结构的ndarray，里面的元素为0

- np.ones(n),np.ones_like()

In [None]:
np.ones(10)

In [None]:
np.ones_like(a) #返回与a有相同结构的ndarray，里面的元素为1

#### 自定义结构数组  
>在C语言中我们可以通过struct关键字定义结构类型，结构中的字段占据连续的内存空间，每个结构体占用的内存大小都相同，因此可以很容易地定义结构数组。和C语言一样，在NumPy中也很容易对这 种结构数组进行操作。只要NumPy中的结构定义和C语言中的定义相同，NumPy就可以很方便地读取C语言的结构数组的二进制数据，转换为NumPy的结构数组。

In [None]:
p_type = np.dtype({'names':['name','age','weight'],
                   'formats':['S32','i','f']})

In [None]:
x = np.array([('xiao fang',18,60.0),('lu lu',20,66.5)],dtype=p_type)
x

x[0]是一个结构元素，它和数组a共享内存数据，因此可以通过修改它的字段，改变原始数组中的对应字段

In [None]:
x[1].dtype

In [None]:
x.dtype

In [None]:
x[0]['name'] = 'Li fang' #修改其中的元素
x

## 2 [universal function](https://docs.scipy.org/doc/numpy-dev/reference/ufuncs.html)

numpy内置的许多ufunc函数都是由c或者Fotran编程实现的，因此它们的计算速度非常快。
- 包含常用的数学函数，针对ndarray进行操作。
- 包含statistics模块，可完成基本的统计推断。
- Financial functions
...

In [None]:
d = np.arange(1,21).reshape((2,2,5))
d

In [None]:
np.exp(d) #等价于np.e**d

In [None]:
np.sqrt(d)

In [None]:
np.sin(d)

#### 比较math标准库中的函数和numpy库中的ufunc函数的计算效率

In [None]:
x = [i*0.001 for i in xrange(1000000)]
start = time.clock()
for i,t in enumerate(x):
    x[i] = math.sqrt(t)
    
print "math.sin耗时:%.4fs"%(time.clock() - start)

In [None]:
x = [i * 0.001 for i in xrange(1000000)]
x = np.array(x)
start = time.clock()
np.sqrt(x,x)
print "numpy.sin耗时:%.4fs"%(time.clock() - start)

#### [frompyfunc()定义ufunc](https://docs.scipy.org/doc/numpy-dev/reference/generated/numpy.frompyfunc.html#numpy.frompyfunc)

In [None]:
def f(x,y):
    if x < y:
        return y-x
    else:  
        return x-y

In [None]:
ufunc_f = np.frompyfunc(f,2,1) #将f转化为univeral function，设置2个input，1个output

In [None]:
x = np.arange(-1,1,0.00001)
y = np.arange(1,-1,-0.00001)

In [None]:
t1 = time.clock()
for i,j in zip(x,y):
    f(i,j)
print u'f耗时:%.4fs'%(time.clock()-t1)

In [None]:
t1 = time.clock()
ufunc_f(x,y)
print u'ufunc_f耗时:%.4fs'%(time.clock()-t1)

##### [vectorize()向量化函数](https://docs.scipy.org/doc/numpy-dev/reference/generated/numpy.vectorize.html#numpy.vectorize)

In [None]:
v_f = np.vectorize(f)

In [None]:
t1 = time.clock()
v_f(x,y)
print u'v_f耗时:%.4fs'%(time.clock()-t1)

#### [Logic functions](https://docs.scipy.org/doc/numpy-dev/reference/routines.logic.html)

In [None]:
a = np.zeros(10)
a[5:9] = 1,np.nan,np.inf,-np.inf

In [None]:
np.all(a) #判断a中的所有元素是否等价于false

In [None]:
np.any(a) #判断a中是否存在至少1个等价于true的元素

In [None]:
np.isnan(a) #判断a中的元素是否为空值

In [None]:
np.isfinite(a) #是否有限

In [None]:
np.isinf(a) #是否无限

In [None]:
np.isneginf(a) #是否正无限

In [None]:
np.isposinf(a) #是否负无限

#### [数学运算](https://docs.scipy.org/doc/numpy-dev/reference/routines.math.html)

In [None]:
a = np.arange(10).reshape((2,5))
b = np.arange(-9,1).reshape((2,5))

- 支持python内建运算符

In [None]:
a + b

In [None]:
abs(b)

In [None]:
a - b

In [None]:
a*b

In [None]:
a**2

- numpy运算符

In [None]:
np.add(a,b)

In [None]:
np.subtract(a,b)

In [None]:
np.multiply(a,b)

In [None]:
np.negative(b) == a

In [None]:
np.maximum(a,b) #分别取a,b对应位置的最大值

In [None]:
np.minimum(a,b) #分别取a,b对应位置的最小值

In [None]:
np.diff(a,n=2,axis=1) #差分 默认沿第一维进行n阶差分

In [None]:
x = np.array([1, 2, 4, 7, 11, 16], dtype=np.float)
np.gradient(x) #求梯度

- nan

In [None]:
a = np.array([1,2,3,np.nan,5])

In [None]:
np.nansum(a)

In [None]:
np.nanprod(a)

#### [排序](https://docs.scipy.org/doc/numpy-dev/reference/routines.sort.html)

In [None]:
a = np.random.randint(20,size=(4,5))
a

In [None]:
np.sort(a) #默认沿第1维排序

In [None]:
np.sort(a,axis=0) #默认沿第0维排序

In [None]:
np.sort(a,axis=None)  #flatten

In [None]:
a.sort(axis=0)
a

### 3. [随机数生成](https://docs.scipy.org/doc/numpy-dev/reference/routines.random.html)

In [None]:
np.random.rand(3,2) #(0,1)均匀分布

In [None]:
np.random.randn(3,2) #mean=0,std=1的正态分布

In [None]:
np.random.randint(low=10,high=100,size=(10,2)) #下限为low（包含），上限为hight（不包含）的均匀分布

In [None]:
a_list = ['pooh', 'rabbit', 'piglet', 'Christopher']
np.random.choice(a_list, size = (2,5), p=[0.5, 0.1, 0.1, 0.3]) #根据概率p选择a_list中的元素，

In [None]:
np.random.shuffle(a_list) #随机打乱元素次序

In [None]:
a = np.random.randint(20,size=(4,5))
np.random.shuffle(a) #沿第1维随机打乱，a为变化后的状态
a

In [None]:
np.random.permutation(a) #沿第1维随机打乱,a保持不变
a

In [None]:
np.random.seed(55) #设置初始化随机状态
np.random.randint(10)

### 4. [统计](https://docs.scipy.org/doc/numpy-1.12.0/reference/routines.statistics.html)

In [22]:
a = np.random.randint(low=1,high=100,size=(4,5))

In [26]:
a

array([[99, 61, 39, 33, 64],
       [52, 40, 22, 76, 93],
       [11, 45, 60, 49, 68],
       [13, 31, 13, 67, 67]])

In [23]:
np.max(a,axis=0) #axis为最大最小值对应的维度 np.nanmax(a)为去除空值后的最大最小值

array([99, 61, 60, 76, 93])

In [25]:
np.ptp(a,axis=0) #Range of values (maximum - minimum) along an axis

array([88, 30, 47, 43, 29])

In [35]:
np.median(a,axis=0)

array([ 32.5,  42.5,  30.5,  58. ,  67.5])

In [36]:
np.mean(a,axis=0)

array([ 43.75,  44.25,  33.5 ,  56.25,  73.  ])

In [39]:
np.average(a,axis=0,weights=[0.1,0.2,0.4,0.3]) #加权平均

array([ 28.6,  41.4,  36.2,  58.2,  72.3])

In [41]:
np.std(a,axis=0) #Compute the standard deviation along the specified axis.

array([ 35.84253758,  10.89437928,  17.92344833,  16.57369904,  11.64044673])

In [42]:
np.var(a,axis=0) #Compute the variance along the specified axis.

array([ 1284.6875,   118.6875,   321.25  ,   274.6875,   135.5   ])

In [43]:
np.corrcoef(a) #计算相关系数

array([[ 1.        ,  0.0518404 , -0.72559466, -0.39556977],
       [ 0.0518404 ,  1.        ,  0.22334035,  0.87954569],
       [-0.72559466,  0.22334035,  1.        ,  0.52611577],
       [-0.39556977,  0.87954569,  0.52611577,  1.        ]])

In [45]:
np.cov(a) #计算协方差矩阵 一行为一个变量，一列为一个观测

array([[ 676.2 ,   38.1 , -412.65, -280.8 ],
       [  38.1 ,  798.8 ,  138.05,  678.6 ],
       [-412.65,  138.05,  478.3 ,  314.1 ],
       [-280.8 ,  678.6 ,  314.1 ,  745.2 ]])

In [47]:
a[0],np.cov(a[0])

(array([99, 61, 39, 33, 64]), array(676.2))

In [57]:
np.histogram(a,bins=5,density=False) #density=True概率密度, normed=True概率密度函数

(array([4, 5, 4, 5, 2]), array([ 11. ,  28.6,  46.2,  63.8,  81.4,  99. ]))

In [68]:
x = np.arange(4)
y = np.arange(5)

In [66]:
np.dot(x,y) #内积

14

In [69]:
np.outer(x,y) #外积

array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12]])

In [72]:
for i in x:
    for j in y:
        print '{0}x{1}={2}'.format(i,j,i*j),
    print 

0x0=0 0x1=0 0x2=0 0x3=0 0x4=0
1x0=0 1x1=1 1x2=2 1x3=3 1x4=4
2x0=0 2x1=2 2x2=4 2x3=6 2x4=8
3x0=0 3x1=3 3x2=6 3x3=9 3x4=12


### 5. [输入输出](https://docs.scipy.org/doc/numpy-1.12.0/reference/routines.io.html)

In [83]:
a = np.arange(100)
b = np.sin(a)

- np.save()保存单个npy文件

In [84]:
np.save('1',a)
np.load('1.npy')

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
       51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67,
       68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84,
       85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

- np.savez()保存多个npy文件

In [86]:
np.savez('2',a=a,b=b) #如果不加关键参数，则以arr_0,arr_1对应ndarray
x = np.load('2.npz')

In [92]:
np.fromstring('123')

ValueError: string size must be a multiple of element size

In [93]:
np.savetxt('1',a)

In [94]:
np.loadtxt('1.txt')

IOError: [Errno 2] No such file or directory: '1.txt'

In [96]:
import os
print os.getcwd()

/Users/xiaoyu/IPython notebook/教学
