In [1]:
import numpy as np

### 从python列表生成一维数组

In [2]:
list_of_ints = [1, 2, 3]
Array_1 = np.array(list_of_ints)
Array_1

array([1, 2, 3])

In [3]:
Array_1[1]

2

In [4]:
type(Array_1)

numpy.ndarray

In [5]:
Array_1.dtype

dtype('int32')

### 控制内存大小

In [6]:
Array_1.nbytes

12

In [7]:
Array_1 = np.array(list_of_ints, dtype='int8')
Array_1.nbytes

3

In [8]:
# 也可以改变数据类型，使用astype创建一个新的数组
Array_1b = Array_1.astype('float32')
Array_1b

array([ 1.,  2.,  3.], dtype=float32)

### 使用异构类型生成np列表

In [9]:
complex_list = [1, 2, 3] + [1., 2., 3.] + ['a', 'b', 'c']
Array_2 = np.array(complex_list[:3])
print 'complext_list[:3]', Array_2.dtype

Array_2 = np.array(complex_list[:6])
print 'complext_list[:6]', Array_2.dtype

Array_2 = np.array(complex_list)
print 'complext_list[:]', Array_2.dtype

print isinstance(Array_2[0], np.number)

complext_list[:3] int32
complext_list[:6] float64
complext_list[:] |S32
False


### 多维数组

In [10]:
a_list_of_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
Array_2D = np.array(a_list_of_list)
Array_2D

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [11]:
Array_2D[1, 1]

5

In [12]:
a_list_of_list_of_list = [[[1,2], [3,4], [5,6]], [[7,8], [9,10],[11,12]]]
Array_3D = np.array(a_list_of_list_of_list)
Array_3D

array([[[ 1,  2],
        [ 3,  4],
        [ 5,  6]],

       [[ 7,  8],
        [ 9, 10],
        [11, 12]]])

In [13]:
Array_3D[0,2,0]

5

In [14]:
# 可以使用元组创建数组，使用.items()方法将字典转变成二维数组
np.array({1:2, 3:4, 5:6}.items())

array([[1, 2],
       [3, 4],
       [5, 6]])

### 改变数组大小

In [15]:
original_array = np.array([1, 2, 3, 4, 5, 6, 7, 8])
Array_a = original_array.reshape(4,2)
Array_b = original_array.reshape(4,2).copy()
# .copy()创建新的数组，其余的方式都是改变数组的view方式
Array_c = original_array.reshape(2, 2, 2)
original_array[0] = -1

In [16]:
Array_a

array([[-1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8]])

In [17]:
Array_b

array([[1, 2],
       [3, 4],
       [5, 6],
       [7, 8]])

In [18]:
Array_c

array([[[-1,  2],
        [ 3,  4]],

       [[ 5,  6],
        [ 7,  8]]])

In [19]:
#使用.shape()方法也能得到相同的结果
original_array.shape = (4,2)
original_array

array([[-1,  2],
       [ 3,  4],
       [ 5,  6],
       [ 7,  8]])

### 使用函数生成数组

In [20]:
ordinal_values = np.arange(9).reshape(3,3)
ordinal_values

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [21]:
# 可以使用[::-1]颠倒顺序
ordinal_values = np.arange(9)[::-1].reshape(3,3)
ordinal_values

array([[8, 7, 6],
       [5, 4, 3],
       [2, 1, 0]])

In [22]:
# 可以使用随机整数产生数组
np.random.randint(low=1, high=10, size=(3,3)).reshape(3,3)

array([[5, 6, 3],
       [2, 7, 8],
       [5, 5, 9]])

In [23]:
# 全零
np.zeros((3,3))

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [24]:
#全1
np.ones((3,3))

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [25]:
#单位矩阵
np.eye(3)

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [26]:
# 等差数列
np.linspace(start=0, stop=1, num=10)

array([ 0.        ,  0.11111111,  0.22222222,  0.33333333,  0.44444444,
        0.55555556,  0.66666667,  0.77777778,  0.88888889,  1.        ])

In [27]:
# 等比数列
np.logspace(start=0, stop=1, num=10, base=10.0)

array([  1.        ,   1.29154967,   1.66810054,   2.15443469,
         2.7825594 ,   3.59381366,   4.64158883,   5.9948425 ,
         7.74263683,  10.        ])

In [28]:
# 生成标准正态分布的3*3矩阵
np.random.normal(size=(3, 3))

array([[ 1.69258356, -1.1812894 , -1.08606383],
       [-1.45554735,  0.09575163,  0.88230499],
       [-1.70250086,  0.87604094, -0.8843305 ]])

In [29]:
# 可以指定不同的均值和标准差，loc表示均值，scale表示标准差
np.random.normal(loc=1.0, scale=3.0, size=(3,3))

array([[-5.38606503,  2.36304119, -0.12588862],
       [ 3.04940239,  0.8793054 ,  6.05138431],
       [-1.66159456,  1.24867045, -1.02246997]])

In [30]:
# 生成均匀分布的3*3矩阵
np.random.uniform(low=0.0, high=1.0, size=(3,3))

array([[ 0.04469917,  0.09663072,  0.90862367],
       [ 0.45710016,  0.02766612,  0.6035141 ],
       [ 0.13707136,  0.13773984,  0.21456225]])

#### 直接从文件中获得数组

In [31]:
# loadtxt需要指定文件名、分隔符和数据类型，如果dtype有误，则无法加载
housing = np.loadtxt('regression-datasets-housing.csv', delimiter=',', dtype=float)
housing

array([[  6.32000000e-03,   1.80000000e+01,   2.31000000e+00, ...,
          3.96900000e+02,   4.98000000e+00,   2.40000000e+01],
       [  2.73100000e-02,   0.00000000e+00,   7.07000000e+00, ...,
          3.96900000e+02,   9.14000000e+00,   2.16000000e+01],
       [  2.72900000e-02,   0.00000000e+00,   7.07000000e+00, ...,
          3.92830000e+02,   4.03000000e+00,   3.47000000e+01],
       ..., 
       [  6.07600000e-02,   0.00000000e+00,   1.19300000e+01, ...,
          3.96900000e+02,   5.64000000e+00,   2.39000000e+01],
       [  1.09590000e-01,   0.00000000e+00,   1.19300000e+01, ...,
          3.93450000e+02,   6.48000000e+00,   2.20000000e+01],
       [  4.74100000e-02,   0.00000000e+00,   1.19300000e+01, ...,
          3.96900000e+02,   7.88000000e+00,   1.19000000e+01]])

In [None]:
### 从pandas提取数据

In [33]:
import pandas as pd
housing_filename = 'regression-datasets-housing.csv'
housing = pd.read_csv(housing_filename, header=None)

housing.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,0.00632,18,2.31,0,0.538,6.575,65.2,4.09,1,296,15,396.9,4.98,24.0
1,0.02731,0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17,396.9,9.14,21.6
2,0.02729,0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17,392.83,4.03,34.7
3,0.03237,0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18,394.63,2.94,33.4
4,0.06905,0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18,396.9,5.33,36.2


In [36]:
housing_array = housing.values
housing_array.dtype
housing_array

array([[  6.32000000e-03,   1.80000000e+01,   2.31000000e+00, ...,
          3.96900000e+02,   4.98000000e+00,   2.40000000e+01],
       [  2.73100000e-02,   0.00000000e+00,   7.07000000e+00, ...,
          3.96900000e+02,   9.14000000e+00,   2.16000000e+01],
       [  2.72900000e-02,   0.00000000e+00,   7.07000000e+00, ...,
          3.92830000e+02,   4.03000000e+00,   3.47000000e+01],
       ..., 
       [  6.07600000e-02,   0.00000000e+00,   1.19300000e+01, ...,
          3.96900000e+02,   5.64000000e+00,   2.39000000e+01],
       [  1.09590000e-01,   0.00000000e+00,   1.19300000e+01, ...,
          3.93450000e+02,   6.48000000e+00,   2.20000000e+01],
       [  4.74100000e-02,   0.00000000e+00,   1.19300000e+01, ...,
          3.96900000e+02,   7.88000000e+00,   1.19000000e+01]])

## NumPy快速操作和计算

In [39]:
a = np.arange(5).reshape(1,5)
a

array([[0, 1, 2, 3, 4]])

In [43]:
a += 1
a

array([[3, 4, 5, 6, 7]])

In [41]:
a * a

array([[ 1,  4,  9, 16, 25]])

In [45]:
a = np.arange(5).reshape(1, 5) + 1
a

array([[1, 2, 3, 4, 5]])

In [46]:
b = np.arange(5).reshape(5,1) + 1
b

array([[1],
       [2],
       [3],
       [4],
       [5]])

In [47]:
a * b

array([[ 1,  2,  3,  4,  5],
       [ 2,  4,  6,  8, 10],
       [ 3,  6,  9, 12, 15],
       [ 4,  8, 12, 16, 20],
       [ 5, 10, 15, 20, 25]])

In [48]:
a2 = np.array([1, 2, 3, 4, 5]*5).reshape(5, 5)
a2

array([[1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5],
       [1, 2, 3, 4, 5]])

In [49]:
b2 = a2.T
b2

array([[1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2],
       [3, 3, 3, 3, 3],
       [4, 4, 4, 4, 4],
       [5, 5, 5, 5, 5]])

In [50]:
a2 * b2

array([[ 1,  2,  3,  4,  5],
       [ 2,  4,  6,  8, 10],
       [ 3,  6,  9, 12, 15],
       [ 4,  8, 12, 16, 20],
       [ 5, 10, 15, 20, 25]])

#### 进行按位操作的函数包括：abs(), sign(), round(), floor(), sqrt(), log(), exp() 等；
#### 在特定坐标轴上进行运算的包括： sum(), prod()等

In [51]:
print a2

[[1 2 3 4 5]
 [1 2 3 4 5]
 [1 2 3 4 5]
 [1 2 3 4 5]
 [1 2 3 4 5]]


In [52]:
np.sum(a2, axis=0)

array([ 5, 10, 15, 20, 25])

In [53]:
np.sum(a2, axis=1)

array([15, 15, 15, 15, 15])

In [54]:
np.sqrt(a2)

array([[ 1.        ,  1.41421356,  1.73205081,  2.        ,  2.23606798],
       [ 1.        ,  1.41421356,  1.73205081,  2.        ,  2.23606798],
       [ 1.        ,  1.41421356,  1.73205081,  2.        ,  2.23606798],
       [ 1.        ,  1.41421356,  1.73205081,  2.        ,  2.23606798],
       [ 1.        ,  1.41421356,  1.73205081,  2.        ,  2.23606798]])

#### 相对简单的python列表来说，NumPy函数对数组的运算速度非常快

In [55]:
%timeit -n 1 -r 3 [i+1.0 for i in range(10**6)]

1 loop, best of 3: 99 ms per loop


In [56]:
%timeit -n 1 -r 3 np.arange(10**6) + 1.0

1 loop, best of 3: 5.72 ms per loop


In [57]:
import math
%timeit -n 1 -r 3 np.sqrt(np.arange(10**6))

1 loop, best of 3: 7.27 ms per loop


### 矩阵运算

In [59]:
M = np.arange(5*5, dtype=float).reshape(5,5)
M

array([[  0.,   1.,   2.,   3.,   4.],
       [  5.,   6.,   7.,   8.,   9.],
       [ 10.,  11.,  12.,  13.,  14.],
       [ 15.,  16.,  17.,  18.,  19.],
       [ 20.,  21.,  22.,  23.,  24.]])

In [60]:
coefs = np.array([1., 0.5, 0.5, 0.5, 0.5])
coefs_matrix = np.column_stack((coefs, coefs[::-1]))
coefs_matrix

array([[ 1. ,  0.5],
       [ 0.5,  0.5],
       [ 0.5,  0.5],
       [ 0.5,  0.5],
       [ 0.5,  1. ]])

In [61]:
np.dot(M, coefs)

array([  5.,  20.,  35.,  50.,  65.])

In [62]:
np.dot(coefs, M)

array([ 25.,  28.,  31.,  34.,  37.])

In [63]:
np.dot(M, coefs_matrix)

array([[  5.,   7.],
       [ 20.,  22.],
       [ 35.,  37.],
       [ 50.,  52.],
       [ 65.,  67.]])

**Numpy还提供了矩阵对象类，是ndarray的子类，默认为二维矩阵**，其乘法和特殊操作与矩阵一致

### NumPy数组切片和索引

In [64]:
M = np.arange(10*10, dtype=int).reshape(10,10)
M

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])

In [66]:
M[2:9:2, :]  # 提取数组中第2-8行的偶数行

array([[20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89]])

In [67]:
M[2:9:2, 5:] # 行切片后进行列切片，提取第6列后的数据

array([[25, 26, 27, 28, 29],
       [45, 46, 47, 48, 49],
       [65, 66, 67, 68, 69],
       [85, 86, 87, 88, 89]])

In [68]:
M[2:9:2, 5::-1]

array([[25, 24, 23, 22, 21, 20],
       [45, 44, 43, 42, 41, 40],
       [65, 64, 63, 62, 61, 60],
       [85, 84, 83, 82, 81, 80]])

In [70]:
# 可以创建布尔索引，使用变量切分数组
row_index = (M[:,0]>=20) &(M[:,0]<=80)
col_index = M[0,:]>=5
M[row_index, :][:, col_index]

array([[25, 26, 27, 28, 29],
       [35, 36, 37, 38, 39],
       [45, 46, 47, 48, 49],
       [55, 56, 57, 58, 59],
       [65, 66, 67, 68, 69],
       [75, 76, 77, 78, 79],
       [85, 86, 87, 88, 89]])

In [73]:
# 使用布尔掩模，全局选择
mask = (M>=20) & (M<=90) & ((M / 10.) % 1 >= 0.5)
M[mask]

array([25, 26, 27, 28, 29, 35, 36, 37, 38, 39, 45, 46, 47, 48, 49, 55, 56,
       57, 58, 59, 65, 66, 67, 68, 69, 75, 76, 77, 78, 79, 85, 86, 87, 88,
       89])