# Numpy (Python) 笔记

NumPy是使用Python进行科学计算的基础软件包。包括：1)功能强大的N维数组对象。2)精密广播功能函数。3)集成C/C+和Fortran代码的工具。4)强大的线性代数、傅立叶变换和随机数功能。

NumPy 最重要的一个特点是其 N 维数组对象 ndarray，它是一系列同类型数据的集合，以 0 下标为开始进行集合中元素的索引。ndarray 对象是用于存放同类型元素的多维数组。ndarray 中的每个元素在内存中都有相同存储大小的区域。
ndarray对象的内容可以通过索引或切片来访问和修改，与 Python 中 list 的切片操作一样。ndarray 数组可以基于 0 - n 的下标进行索引，切片对象可以通过内置的 slice 函数，并设置 start, stop 及 step 参数进行，从原数组中切割出一个新数组。

---

In [2]:
import numpy as np

## 生成长度为10的零向量，第五个数为1

In [3]:
# a null vector of size 10, with 5th value as 1
z = np.zeros(10)
z[4] = 1
z

array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0.])

## 生成一个3x3的矩阵，数值依次为0至8

In [4]:
# a 3x3 matrix with values ranging from 0 to 8
z = np.arange(0,9).reshape(3,3)
z

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

## 将一个向量倒置

In [6]:
# reverse a vector
z = [1, 2, 3]
z = z[::-1]
z

[3, 2, 1]

## 生成一个3x3的单位矩阵，和一个3x3数值都为1的矩阵

In [7]:
# a 3x3 identity matrix
z = np.eye(3)
print(z)
# a 3x3 matrix with values = 1
z = np.ones((3,3))
print(z)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]
[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


## 生成一个3x2的矩阵，数值都为随机数

In [5]:
# a 3x2 matrix with random values
z = np.random.rand(3,2)
z

array([[0.1020582 , 0.629938  ],
       [0.56906957, 0.5652367 ],
       [0.26452785, 0.88756775]])

## 输出上述矩阵的最大值、最小值和平均值，并将该矩阵正态化 

In [10]:
# find the minimum, maximum and mean
z.min()
z.max()
z.mean()
# normalize this matrix
z = (z - z.min())/(z.max() - z.min())
z

array([[1.        , 0.3545928 ],
       [0.52038012, 0.        ],
       [0.13761335, 0.31123769]])

## 矩阵的点乘

In [62]:
# multiply a 5x3 matrix by a 3x2 matrix
# watch out the brackets for ONES!
np.dot(np.ones((5,3)), np.ones((3,2)))

array([[3., 3.],
       [3., 3.],
       [3., 3.],
       [3., 3.],
       [3., 3.]])

## 生成一个数列，并删除所有大于3且小于8的数

In [11]:
# given a 1D array, neglecting all elements from 3 to 8
z = np.arange(11)
z[(z < 3) | (z > 8)]
z

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

## 生成一个长度为10的向量，首尾分别为0和1

In [13]:
# a vector of size 10 with values ranging from 0 to 1
z = np.linspace(0,1,12)
z

array([0.        , 0.09090909, 0.18181818, 0.27272727, 0.36363636,
       0.45454545, 0.54545455, 0.63636364, 0.72727273, 0.81818182,
       0.90909091, 1.        ])

## 生成一个长度为10的随机向量，并从小到大依次排序

In [15]:
# a random vector of size 10 and sort it
z = np.sort(np.random.rand(10))
z

array([0.06947734, 0.10656195, 0.36068987, 0.65391738, 0.7312718 ,
       0.73334269, 0.76963467, 0.84870418, 0.95094765, 0.98214646])

## 生成一个长度为10的随机向量，并将最大值替换为0

In [16]:
# a random vector of size 10, and replace the maximum value by 0
z= np.random.rand(10)
z[np.argmax(z)] = 0

## 找到一个数列中最接近目标值的数所在的位置及其大小

In [17]:
# how to find the closest value and its index to a given scalar in an array?
z = np.random.rand(20)
value = np.random.uniform(0,1) # samples uniformly distribtued in [0,1)
index = np.argmin(np.abs(z-value))
z[index]

0.5919683584033679

## 将整数数列转化为浮点型

In [110]:
# convert an integer array to a float array
z = np.arange(10)
z.astype(np.float)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

## 将矩阵的每一个数减去所在行/列的平均值

In [20]:
# subtract the mean/max/min of each row/column of a matrix
z = np.random.rand(10,5)
a = z - z.mean(axis = 1).reshape(-1,1)
# axis = 1 as row
z

array([[0.92156539, 0.19426958, 0.99297289, 0.32340972, 0.20048656],
       [0.60408336, 0.31642803, 0.86878059, 0.87678555, 0.36802635],
       [0.22025893, 0.06598462, 0.9064989 , 0.61075016, 0.38054605],
       [0.13545996, 0.83704157, 0.08004936, 0.91189374, 0.66820949],
       [0.91255528, 0.07460582, 0.97237639, 0.600175  , 0.4903478 ],
       [0.56279441, 0.58550951, 0.03473012, 0.38456389, 0.57734026],
       [0.66593761, 0.37471689, 0.25634736, 0.26315357, 0.63728125],
       [0.04045642, 0.39560375, 0.4651083 , 0.59508473, 0.47899138],
       [0.84526921, 0.0559703 , 0.04509691, 0.28424103, 0.22387573],
       [0.18953497, 0.4247346 , 0.35178896, 0.08701679, 0.68457922]])

## 将数列[1, 2, 3, 4, 5]每两个书之间插入3个0

In [21]:
# consider the vector [1,2,3,4,5], build a new vector with 3 consecutive zeros interleaved between each value
z = np.array([1,2,3,4,5])
z0 = np.zeros(len(z) + (len(z)-1) * 3) # original vector + vector of 0
z0[::4] = z
z0

array([1., 0., 0., 0., 2., 0., 0., 0., 3., 0., 0., 0., 4., 0., 0., 0., 5.])

## 将矩阵的第0、1行或列互换位置

In [164]:
# swap two rows (row 0 and row 1) of an array?
z = np.random.uniform(0,10,25).reshape(5,5)
z[[0,1]] = z[[1,0]]
# swap two columns (column 0 and column 1) of an array?
z[:,[0,1]] = z[:,[1,0]]

## 输出一个数列中每n个数的平均值 

In [185]:
# compute averages using a sliding window over an array
# CUMSUM!
z = np.random.uniform(0,10,20)
def moving_avg(a,n=3):
    ret = np.cumsum(z)
    # 0, 1, 2,..., n-1 unchanged
    ret[n:] = ret[n:] - ret[:-n]
    return ret[n-1:]/n
print(moving_avg(z,3))

[4.45299954 4.80837571 3.45130001 4.76289969 6.27048065 7.48874799
 7.38068191 7.13729162 7.51151171 5.4302598  6.31378784 4.16665806
 5.7624401  3.11907633 3.20833366 1.83374191 3.05470636 2.38823077]


## 找到一个数列中最频繁出现的值

In [191]:
# find the most frequent value in an array
# BINCOUNT!
z = np.random.randint(0,10,50)
np.argmax(np.bincount(z))

2

## 找到一个数列中前n个最大值

In [220]:
# find the n largest values of an array
# ARGSORT!
z = np.random.uniform(0,10,10)
z[np.argsort(z)[-5:]]

array([7.50615224, 8.33858471, 9.04815743, 9.47395729, 9.91732865])