# 创建numpy数组

In [7]:
import numpy as np

In [8]:
print(np.__version__)

1.16.5


In [9]:
np.array([1,2,3])

array([1, 2, 3])

In [11]:
np.ones(5)

array([1., 1., 1., 1., 1.])

In [12]:
np.zeros(3)

array([0., 0., 0.])

In [18]:
np.random.random(3)

array([0.81478133, 0.71187427, 0.72868487])

# 数据类型

In [19]:
a = np.array([1,2,3,4])
type(a)

numpy.ndarray

In [23]:
np.array((0,1,2,3))

array([0, 1, 2, 3])

# numpy 常见操作

In [25]:
#创建1-6的数组
np.arange(6)  #前闭后开,不包含6

array([0, 1, 2, 3, 4, 5])

In [27]:
# 创建-2到1间隔为0.5的数组, 前闭后开

np.arange(-2,1,0.5)

array([-2. , -1.5, -1. , -0.5,  0. ,  0.5])

In [28]:
#把0-2的空间分割成5份，包含2
np.linspace(0,2,5)

array([0. , 0.5, 1. , 1.5, 2. ])

In [30]:
np.linspace(0,2,5,endpoint=False)

array([0. , 0.4, 0.8, 1.2, 1.6])

## numpy二维数组

In [32]:
np.ones([2,3])

array([[1., 1., 1.],
       [1., 1., 1.]])

In [33]:
np.zeros([3,3])

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [34]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [35]:
#创建一个单位矩阵  
np.eye(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

In [37]:
# 创建一个斜对角矩阵
a = np.array([1, 2, 3])
d = np.diag(a)  # 2D的斜对角矩阵
d

array([[1, 0, 0],
       [0, 2, 0],
       [0, 0, 3]])

# numpy数组的属性

* 维度 
* 形状
* 元素个数

In [38]:
#维度
d = np.array([1,2,3,4])
d.ndim

1

In [44]:
d = np.zeros([3,3,4])
d.ndim

3

In [45]:
#形状
d = np.array([1,2,3,4])
d.shape

(4,)

In [46]:
d = np.zeros([3,4])
d.shape

(3, 4)

In [47]:
d = np.array([1,2,3,4])
d.size

4

In [48]:
d = np.zeros([3,4])
d.size

12

In [49]:
# 数据类型
d.dtype

dtype('float64')

In [50]:
a = np.array([1,2,3,4])
a.dtype

dtype('int32')

In [52]:
a = np.array([1.0,2,3,4])
a.dtype

dtype('float64')

In [53]:
a.astype(int).dtype

dtype('int32')

## 数据类型的转化

In [55]:
a.astype(int).dtype

dtype('int32')

## numpy的四则运算

In [56]:
data = np.array([1,2])
ones = np.ones(2)

In [57]:
data + ones

array([2., 3.])

In [59]:
data * data

array([1, 4])

In [60]:
data - ones

array([0., 1.])

In [61]:
# 平方
data ** 2

array([1, 4], dtype=int32)

In [62]:
#三角函数, 弧度
np.sin(data)

array([0.84147098, 0.90929743])

In [63]:
# 开平方
np.sqrt(data)

array([1.        , 1.41421356])

In [64]:
# 指数函数
np.exp(data)

array([2.71828183, 7.3890561 ])

In [65]:
# 布尔过滤
a = np.array([10,20,30,40,50])
a

array([10, 20, 30, 40, 50])

In [66]:
a<=35

array([ True,  True,  True, False, False])

# numpy 数组和python的list的区别

In [71]:
a = [10,20,30,40]
a

[10, 20, 30, 40]

In [72]:
b = list(range(2,6))
b

[2, 3, 4, 5]

In [73]:
a + b

[10, 20, 30, 40, 2, 3, 4, 5]

In [77]:
#a - b   a/ b 
# 报错

In [78]:
a * 2

[10, 20, 30, 40, 10, 20, 30, 40]

## numpy 二维数组


In [82]:
a = np.array([ [0,1],[2,3] ])
a

array([[0, 1],
       [2, 3]])

In [83]:
a[0]

array([0, 1])

In [84]:
a[1]

array([2, 3])

![2d_indexing](./2d_indexing.png)

# numpy数组的切片和拆分

In [88]:
a = np.arange(25).reshape(5,5)

In [98]:
# 黄色
a[::2,2::2]

array([[ 2,  4],
       [12, 14],
       [22, 24]])

In [93]:
#绿色
a[3:,3:]

array([[18, 19],
       [23, 24]])

In [92]:
#紫色
a[:,1]

array([ 1,  6, 11, 16, 21])

In [90]:
#红色
a[1,2:4]

array([7, 8])

# 矩阵的变换

In [99]:
# 矩阵的转置 (行变列,列变行)
a = np.array([[1,2],[3,4]])
a


array([[1, 2],
       [3, 4]])

In [100]:
a.T

array([[1, 3],
       [2, 4]])

In [101]:
# 矩阵的形状变换

c = np.array([1,2,3,4,5,6])
c.shape

(6,)

In [102]:
c.reshape(2,3)

array([[1, 2, 3],
       [4, 5, 6]])

In [103]:
c.reshape(3,2)

array([[1, 2],
       [3, 4],
       [5, 6]])

In [108]:
c.reshape(-1,3)

array([[1, 2, 3],
       [4, 5, 6]])

In [109]:
# 降维打击
c.ravel()

array([1, 2, 3, 4, 5, 6])

## numpy数据统计

In [110]:
a = np.array([
    [1,2],
    [3,4]
])
a

array([[1, 2],
       [3, 4]])

In [111]:
# 求和
np.sum(a)

10

### 方向参数 axis
* axis=0  按列操作
* axis=1  按行操作


In [113]:
# 按行求和
np.sum(a,axis=1)

array([3, 7])

In [114]:
# 按列求和
np.sum(a,axis=0)

array([4, 6])

In [115]:
# 求最大值
np.max(a)

4

In [117]:
# 求最小值
np.min(a)

1

In [118]:
# 求平均值
np.mean(a)

2.5

# 方差

方差越大 说明数据的波动性越大, 

方差小,说明数据的波动性小
(1,2,3,4,5)
1.  计算这些数据的平均值 (1+2+3+4+5)/5  = 3
2. 计算平均值和每个数的差的平方 (3-1)**2 + (3-2)**2 + ...
3.  把求和的结果 再除以元素的数量

In [119]:
np.var(a)

1.25

In [120]:
# 标准差
np.std(a)

1.118033988749895

### numpy求解鸡兔同笼问题

鸡兔同笼是中国古代的数学名题之一。

大约在1500年前，《孙子算经》中就记载了这个有趣的问题。书中是这样叙述的：
今有鸡兔同笼，上有三十五头，下有九十四足，问鸡兔各几何？

翻译： 有若干只鸡兔同在一个笼子里，从上面数，有35个头，从下面数，有94只脚。问笼中各有多少只鸡和兔？

In [122]:
from sympy import *

In [123]:
x,y = symbols('x y')
eq1 = Eq(x+y,35)
eq2 = Eq(2*x+4*y ,94)
solve([eq1,eq2])

{x: 23, y: 12}

## 用高等数学矩阵的方式求解鸡兔同笼

In [125]:
a = np.array([ 
    [1,1],
    [2,4]
])
b = np.array([35,94])
#AX = B
np.linalg.solve(a,b)

array([23., 12.])

In [126]:
np.dot(np.linalg.inv(a), b)

array([23., 12.])

In [127]:
np.dot(np.linalg.inv(a) ,a )

array([[1., 0.],
       [0., 1.]])

In [128]:
np.dot( a ,np.linalg.inv(a))

array([[1., 0.],
       [0., 1.]])

## numpy 随机数api

In [129]:
# 返回一个shape为 2行5列的随机数数组,  值在0到1之间 [0,1)
np.random.rand(2,5)


array([[0.30309771, 0.11978049, 0.27753405, 0.12271411, 0.796814  ],
       [0.96938841, 0.94200346, 0.25497429, 0.79921337, 0.2237096 ]])

In [130]:
# 生成1行10列的正态分布的随机数
np.random.randn(1,10)

array([[-0.7271381 ,  0.13053978, -0.73609776, -0.35511145, -1.90209282,
        -1.68150001,  0.97223966, -1.45459163, -0.10165083,  1.27424268]])

In [133]:
# randint (随机整数)
np.random.randint(2,5,10)
#最小值2,最大值5
#[2,5) 
# 10个随机数

array([4, 3, 2, 2, 4, 4, 4, 3, 2, 2])

In [134]:
# 在0到1之间生成指定大小的随机浮点数
np.random.random_sample([10])

array([0.84327892, 0.84934316, 0.56751281, 0.48545242, 0.34616645,
       0.02300478, 0.56981408, 0.62469799, 0.10397434, 0.23748978])

## 正态分布的期望值和方差

In [136]:
np.random.normal(85,10,size=(3,3))

array([[ 91.97739071,  73.5342878 ,  80.63125231],
       [ 81.32861501,  82.36964871,  75.37289265],
       [ 99.09488092,  92.57299138, 105.30562415]])

In [143]:
b = np.random.randn(4,4)
b

array([[-0.13317305,  1.5513805 , -1.33668495, -0.30590889],
       [-0.53711788, -0.49374932,  1.06428433, -0.26554455],
       [ 0.38878966,  1.97799917, -0.27674043, -1.48152223],
       [-0.2729247 ,  1.41977054, -0.40578052, -0.74188567]])

In [144]:
np.mean(b)


0.009449501168312113

In [146]:
b = np.random.randn(4000,4000)

In [147]:
np.mean(b)

0.0003479968847810779

In [148]:
b.std()

0.9999152737019166

## numpy 数据打散

In [150]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [151]:
np.random.permutation(a) #不修改原始的数据

array([1, 8, 6, 2, 4, 7, 3, 0, 9, 5])

In [153]:
np.random.shuffle(a) #修改原始数据

In [154]:
a

array([3, 1, 9, 0, 7, 8, 2, 6, 5, 4])

## numpy 随机数种子

In [171]:
np.random.seed(seed=3)
np.random.randn(2,2)

array([[ 1.78862847,  0.43650985],
       [ 0.09649747, -1.8634927 ]])

In [172]:
np.random.randn(2,2)

array([[-0.2773882 , -0.35475898],
       [-0.08274148, -0.62700068]])

In [173]:
np.random.randn(2,2)

array([[-0.04381817, -0.47721803],
       [-1.31386475,  0.88462238]])

## numpy逻辑操作

In [178]:
a = np.array([True,True,False])

In [179]:
# 判断是不是都是true
np.all(a)

False

In [180]:
# 判断数组里面是不是有true
np.any(a)

True

In [181]:
a = np.array([1,2,3,4,5,6,7])
a >3

array([False, False, False,  True,  True,  True,  True])

In [182]:
a <5

array([ True,  True,  True,  True, False, False, False])

# numpy 的排序操作

In [185]:
a  = np.random.randn(6)
a

array([ 0.98236743, -1.10106763, -1.18504653, -0.2056499 ,  1.48614836,
        0.23671627])

In [187]:
a.sort()
a

array([-1.18504653, -1.10106763, -0.2056499 ,  0.23671627,  0.98236743,
        1.48614836])

In [188]:
a  = np.random.randn(6)
a

array([-1.02378514, -0.7129932 ,  0.62524497, -0.16051336, -0.76883635,
       -0.23003072])

In [189]:
a.argsort()

array([0, 4, 1, 5, 3, 2], dtype=int64)

In [190]:
b = np.random.randn(5,5)
b

array([[ 0.74505627,  1.97611078, -1.24412333, -0.62641691, -0.80376609],
       [-2.41908317, -0.92379202, -1.02387576,  1.12397796, -0.13191423],
       [-1.62328545,  0.64667545, -0.35627076, -1.74314104, -0.59664964],
       [-0.58859438, -0.8738823 ,  0.02971382, -2.24825777, -0.26776186],
       [ 1.01318344,  0.85279784,  1.1081875 ,  1.11939066,  1.48754313]])

In [191]:
# axis=0 按列操作
# axis=1 按行操作
b.sort(axis = 0)

In [192]:
b

array([[-2.41908317, -0.92379202, -1.24412333, -2.24825777, -0.80376609],
       [-1.62328545, -0.8738823 , -1.02387576, -1.74314104, -0.59664964],
       [-0.58859438,  0.64667545, -0.35627076, -0.62641691, -0.26776186],
       [ 0.74505627,  0.85279784,  0.02971382,  1.11939066, -0.13191423],
       [ 1.01318344,  1.97611078,  1.1081875 ,  1.12397796,  1.48754313]])

## 行向量和列向量

In [194]:
a = np.array([1,2,3,4,5,6])
a

array([1, 2, 3, 4, 5, 6])

In [195]:
a.T

array([1, 2, 3, 4, 5, 6])

In [196]:
a.reshape(6,-1)

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6]])

## 通过概率去计算pi的值

In [211]:
num = 9000000
np.random.seed(3)
b = np.random.rand(num,2) # 随机生成0,1之间的100个点.

d = b**2

c = np.sum(d,axis=1)

countincircle = np.sum(c<=1)


4*countincircle / num

3.140888888888889

## numpy 案例演示

In [216]:
a = np.arange(5) ** 2 
a

array([ 0,  1,  4,  9, 16], dtype=int32)

In [217]:
a[2]

4

In [218]:
a[2:5]

array([ 4,  9, 16], dtype=int32)

In [220]:
a[::2]

array([ 0,  4, 16], dtype=int32)

In [221]:
a[::-1]

array([16,  9,  4,  1,  0], dtype=int32)

In [224]:
a = np.arange(6).reshape(3,2)

In [230]:
b = a+1
b

array([[1, 2],
       [3, 4],
       [5, 6]])

In [231]:
b[0,1]

2

In [232]:
b[0][1]

2

![2d_fancy_indexing](./2d_fancy_indexing.png)

In [246]:
#取紫色
mask = np.array([False,True,True,True,False])
a[1,mask]


array([6, 7, 8])

In [245]:
#取 绿色
a = np.arange(0,25)
a = a.reshape(5,5)
a[[1,3,4],3:]



array([[ 8,  9],
       [18, 19],
       [23, 24]])

In [241]:
# 取红色
a = np.arange(0,25)
a = a.reshape(5,5)
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [242]:
a[(0,2,4),(0,2,4)]

array([ 0, 12, 24])

In [244]:
a[[0,2,4],[0,2,4]]

array([ 0, 12, 24])

## numpy数组的拆分和合并

In [247]:
a = np.arange(9).reshape(3,3)

In [248]:
a

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [249]:
#拆分
np.hsplit(a,3) #水平拆分 沿着哪个方向下刀

[array([[0],
        [3],
        [6]]), array([[1],
        [4],
        [7]]), array([[2],
        [5],
        [8]])]

In [250]:
np.vsplit(a,3) #垂直拆分 沿着哪个方向下刀

[array([[0, 1, 2]]), array([[3, 4, 5]]), array([[6, 7, 8]])]

In [252]:
# 合并
a = np.array([
    [1,2],
    [3,4]
])
b = np.array([
    [6,7],
    [8,9]
])

In [254]:
np.vstack([a,b])

array([[1, 2],
       [3, 4],
       [6, 7],
       [8, 9]])

In [255]:
np.hstack([a,b])

array([[1, 2, 6, 7],
       [3, 4, 8, 9]])

## numpy 的高级操作

In [258]:
a = np.arange(5) ** 2
a

array([ 0,  1,  4,  9, 16], dtype=int32)

In [259]:
b = np.array([3,1,3,0])
b

array([3, 1, 3, 0])

In [260]:
a[b]

array([9, 1, 9, 0], dtype=int32)

In [262]:
a[[3,1,3,0]]

array([9, 1, 9, 0], dtype=int32)

In [263]:
a = np.array([3,2,6,3,4,8,9,1])

In [264]:
np.argsort(a)

array([7, 1, 0, 3, 4, 2, 5, 6], dtype=int64)

In [265]:
a[np.argsort(a)]

array([1, 2, 3, 3, 4, 6, 8, 9])

In [266]:
a = np.random.randint(0, 10, 20)
a


array([1, 8, 4, 6, 3, 1, 2, 4, 9, 6, 6, 8, 8, 6, 3, 1, 8, 5, 6, 6])

In [267]:
np.argmax(a)

8

In [268]:
np.argmin(a)

0

In [269]:
np.nonzero(a)

(array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
        17, 18, 19], dtype=int64),)

In [270]:
np.count_nonzero(a)

20

In [271]:
a = np.arange(6).reshape(2, 3)
b = np.array([0, 1, 2, 3])
a + b

ValueError: operands could not be broadcast together with shapes (2,3) (4,) 

## 广播机制的案例, 地铁票价计算

0 2.7km 3.9km 4.9km 8.6km 10.1km  12.7km

In [272]:
distance = np.array([0,2.7,3.9,4.9,8.6,10.1,12.7])
distance

array([ 0. ,  2.7,  3.9,  4.9,  8.6, 10.1, 12.7])

In [274]:
distance.reshape(7,-1)

array([[ 0. ],
       [ 2.7],
       [ 3.9],
       [ 4.9],
       [ 8.6],
       [10.1],
       [12.7]])

In [277]:
dd = distance - distance.reshape(7,-1)
dd

array([[  0. ,   2.7,   3.9,   4.9,   8.6,  10.1,  12.7],
       [ -2.7,   0. ,   1.2,   2.2,   5.9,   7.4,  10. ],
       [ -3.9,  -1.2,   0. ,   1. ,   4.7,   6.2,   8.8],
       [ -4.9,  -2.2,  -1. ,   0. ,   3.7,   5.2,   7.8],
       [ -8.6,  -5.9,  -4.7,  -3.7,   0. ,   1.5,   4.1],
       [-10.1,  -7.4,  -6.2,  -5.2,  -1.5,   0. ,   2.6],
       [-12.7, -10. ,  -8.8,  -7.8,  -4.1,  -2.6,   0. ]])

In [278]:
np.where(dd>0,dd,0)

array([[ 0. ,  2.7,  3.9,  4.9,  8.6, 10.1, 12.7],
       [ 0. ,  0. ,  1.2,  2.2,  5.9,  7.4, 10. ],
       [ 0. ,  0. ,  0. ,  1. ,  4.7,  6.2,  8.8],
       [ 0. ,  0. ,  0. ,  0. ,  3.7,  5.2,  7.8],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  1.5,  4.1],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  2.6],
       [ 0. ,  0. ,  0. ,  0. ,  0. ,  0. ,  0. ]])