## Ch04 Number基础

### ndarray: 一种多维数组对象
- create narray
    - array
    - asarray
    - arange
    - ones/ ones_like
    - zeros/ zeros_like
    - empty/ empty_like
    - eye/ identity
- methods
    - random
    - shape
    - reshape
    - ndim
    - dtype

In [None]:
import numpy as np
data = np.random.rand(2,3)

In [None]:
data
data*10
data.shape
data.dtype

In [None]:
data1 = [6, 7.5, 8, 0, 1]
arr1 = np.array(data1)
arr1
arr1.shape

In [None]:

data2 = [[1,2,3,4], [5,6,7,8]]
arr2 = np.array(data2) 
arr2.ndim  # 2
arr2.shape # (2,4)
arr2.dtype #int64

In [None]:
np.zeros((3,6))

In [None]:
np.empty((2,3,2))

In [None]:
np.arange(15)

In [None]:
arr1 = np.array([1,2,3], dtype=np.float64)
arr2 = np.array([1,2,3], dtype=np.int32)

In [None]:
arr1.dtype

In [None]:
arr2.dtype

In [None]:
arr = np.array([1,2,3,4,5])
arr.dtype

In [None]:
float_arr = arr.astype(np.float64)
float_arr.dtype

In [None]:
numberic_strings = np.array(['1.25','-9.6', '42'], dtype=np.string_)
numberic_strings.astype(float)

#### 数组和标量之间的运算

In [None]:
import numpy as np
arr = np.array([[1.,2,3],[4.,5.,6.]])
arr

In [None]:
arr*arr

In [None]:
1/arr

#### 基本的索引和切片

In [None]:
arr = np.arange(10)
arr

In [None]:
arr[5:8]

In [None]:
arr[5:8] = 12
arr

In [None]:
arr_slice = arr[5:8]
arr_slice[1] = 12345
arr

In [None]:
arr_slice[:] = 666
arr

In [None]:
arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
arr2d

In [None]:
arr2d[1][2]  ==  arr2d[1, 2]

In [None]:
arr3d = np.array([ 
    [ [1,2,3],[4,5,6] ], 
    [ [7,8,9], [10,11,12] ]
])
arr3d

In [None]:
arr3d.shape

In [None]:
arr3d[0]

In [None]:
arr3d[0] = 666
arr3d
arr3d[1,0]

##### 切片索引

In [None]:
arr[1:6]
arr2d[:2]
arr2d[:2, 1:]
arr2d[:, :1]

#### 布尔型索引

In [None]:
import math
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Joe', 'Will','Joe'])
data = np.random.randn(7,4)

In [None]:
names

In [None]:
data

In [None]:
names == 'Bob'

In [None]:
mask = (names=='Bob') | (names=='Joe')
print(mask)
print(data.shape)
print(mask.shape)
data[mask]

In [None]:
data[data < 0 ] = 0
data

#### 花式索引
花式索引和切片不一样，它总是将数据复制到新数组中。

In [None]:
arr = np.empty((8,4))
for i in range(8):
    arr[i] = i
arr

In [None]:
arr[[4, 3, 0, 6]]

In [None]:
arr[[-3, -5, -7]]

In [None]:
arr = np.arange(32).reshape((8,4))
arr

In [None]:
arr[[1,5,7,2], [0,3,1,2]]

In [None]:
arr[[1,5,7,2]] [: , [0,3,1,2]]

In [None]:
arr[ np.ix_([1,5,7,2], [0,3,1,2]) ]

#### 数组转置和轴对换
- transpose 转置
- 轴对换
- 内积 np.dot

In [None]:
arr = np.arange(15).reshape((3,5))
arr

In [None]:
arr.T

In [None]:
arr = np.arange(16).reshape((2,2,4))
arr.transpose((1,0,2))

In [None]:
arr = np.arange(16).reshape((2,2,4))
arr.swapaxes(1,2)

### 通用函数： 快速的元素级数组函数
- 一元ufunc
    - ads, fabs
    - sqrt
    - square
    - exp
    - log, log10, log2, log1p
    -sign
    - ceil
    - floor
    - rint
    - modf
    - isnan
    - isfinite, isinf
    - cos, cosh, sin, sinh,tan, tanh
    - arccos, arccosh, arcsin, arcsinh, arctan, arctanh
    - logical_not
    
- 二元ufunc
    - add
    - subtract
    - multiply
    - divide, floor_divide
    - power
    - maximum, fmax
    - minimum, fmin
    - mod
    - copysign
    - greater, greater_equal
    - less, less_equal
    - equal, not_equal
    - logical_and, logical_or, logical_xor
    

In [None]:
arr = np.arange(10)
np.sqrt(arr)

In [None]:
x = np.random.randn(8)
y = np.random.randn(8)
print(x)
print(y)
np.maximum(x,y)

In [None]:
arr = np.random.randn(7)*5
np.modf(arr)

### 利用数组进行数据处理

In [None]:
points = np.arange(-5, 5 , 0.01)
xs, ys = np.meshgrid(points, points)
z = np.sqrt(xs**2+ys**2)

import matplotlib.pyplot as plt
plt.imshow(z, cmap=plt.cm.gray);plt.colorbar()
plt.title('Image plot of $\sqrt{x^2+y^2}$ for a grid of values')


#### 将条件逻辑表述为数组运算
布尔值在计算的过程中可以被当作0或者1来使用


In [None]:
xarr = np.array([1.1,1.2,1.3,1.4,1.5])
yarr = np.array([2.1,2.2,2.3,2.4,2.5])
cond = np.array([True, False, True,True, False])
result = [
    (x if c else y)
          for x, y, c in zip(xarr, yarr, cond)
         ]
result

In [None]:
arr = np.random.randn(4,4)
arr
np.where(arr>0, 2, -2) # 正数设置为2，负数设置为-2
np.where(arr>0, 2, arr)

In [None]:
cond1 = np.array([True, False, False])
cond2 = np.array([False, True, False])
cond3 = np.array([False, False, True])

n =3
result = []
for i in range(n):
    if cond1[i] and cond2[i]:
        result.append(0)
    elif cond1[i]:
        result.append(1)
    elif cond2[i]:
        result.append(2)
    else:
        result.append(3)
print(result)

#等价于
np.where(cond1 & cond2, 0,
        np.where(cond1, 1, 
                 np.where(cond2, 2, 3)
        )
)

#等价于
#result = 1 * (cond1 -cond2) +2 * (cond2 & -cond1)+3 * -(cond1 | cond2)
for i in range(n):
    c1 = True if cond1[i]  else False
    c2 = True if cond2[i]  else False
    result = 1 * (c1 -c2) +2 * (c2 & -c1)+3 * -(c1 | c2)
    
        

In [None]:
a = False
b = True
a-b
c = np.array([a,b])
np.logical_not(c)

#### 数学和统计方法
- sum
- mean
- std, var
- min, max
- argmin, argmax
- cumsum, cumprod


In [None]:
arr = np.random.randn(5,4)
arr.mean() == np.mean(arr)
arr.sum()

arr.mean(axis=1) == arr.mean(1)
arr.sum(axis=0) == arr.sum(0)

In [None]:
arr = np.array([ [0,1,2], [3,4,5], [6,7,8] ])
arr.cumsum(0)

#### 用于布尔型数组的方法

In [None]:
arr = np.random.randn(100)
(arr>0).sum()

In [None]:
bools = np.array( [False, False, True, False] )
bools.any()
bools.all()

#### 排序

In [None]:
arr  = np.random.randn(8)
arr.sort()
arr

In [None]:
arr = np.random.randn(5,3)
arr.sort(1)
arr

In [None]:
large_arr = np.random.randn(1000) 
large_arr.sort()
large_arr[int(0.05*len(large_arr))] #5%分位数

#### 唯一化以及其他的集合逻辑
- unique(X)
- intersect1d(X,Y)
- union1d(X,Y)
- in1d(x, A)
- setdiff1d(X, Y)
- setxor1d(X, Y)

In [None]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob','Will', 'Joe'])
np.unique(names)
sorted(set(names))
(np.unique(names) == sorted(set(names))).all()



### 用于数组的文件输入输出
#### 将数组以二进制格式保存到磁盘

In [None]:
arr = np.arange(10)
np.save('some_array', arr)
np.load('some_array.npy')

In [None]:
#将多个数组保存到一个压缩文件中
np.savez('array_archive.npz', a=arr, b=arr)
arch = np.load('array_archive.npz')
arch['a']

#### 存取文本文件

arr = np.loadtxt('array_ex.txt', delimiter=',')
pf = pd.read_csv('array_ex.txt')

### 线性代数

In [None]:
x = np.array(
    [
        [1,2,3],
        [4,5,6]
    ],
    dtype=np.float
)

y = np.array(
    [
        [6,-23],
        [-1, 7],
        [8, 9]
    ],
    dtype = np.float
)

x.shape
y.shape
x.dot(y)
np.dot(x,y)

In [None]:
np.dot(x, np.ones(3))

**numpy.linalg中有一组标准的矩阵分解运算以及诸如求逆和行列式之类的东西**

- diag
- dot
- trace
- det
- eig
- inv
- pinv
- qr
- svd
- solve
- lstsq

In [None]:
from numpy.linalg import inv, qr
X = np.random.randn(5,5)

In [None]:
mat = X.T.dot(X)
mat

In [None]:
inv(mat)

In [None]:
np.dot(mat, inv(mat))

In [None]:
q,r = qr(mat)

In [None]:
r

### 随机数生成

- seed
- permutation
- shuffle
- rand 均匀分布
- randint 整数
- randn （0，1）正态分布
- bioomial 二项式分布
- normal
- beta
- chisquare 卡方分布
- gamma
- uniform [0,1]均匀分布


In [None]:
sample = np.random.normal(size=(4,4))

In [None]:
sample

In [None]:
from random import normalvariate
N = 1000000
%timeit samples = [normalvariate(0,1) for _ in range(N)]

In [None]:
%timeit samples = np.random.normal(size=N)

### 随机漫步

import random
position = 0
walk = [position]
steps = 1000
for i in range(steps):
    step = 1 if random.randint(0,1) else -1
    position += step
    walk.append(position)


nsteps = 1000
draws = np.random.randint(0,2,size=nsteps)
steps = np.where(draws>0, 1, -1)
walk = steps.cumsum()
print (walk.min())
print (walk.max())
(np.abs(walk) >= 10).argmax()

#### 一次模拟多个随机漫步

nwalks = 5000
nsteps = 1000
draws = np.random.normal(0,2, size=(nwalks, nsteps)) # 0 or 1
steps = np.where(draws>0, -1, 1)
walks = steps.cumsum(1)
walks
walks.max()
walks.min()
hits30 = (np.abs(walks) >= 30).any(1)
hits30
hits30.sum()
crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1)
crossing_times.mean()

steps = np.random.normal(loc=0, scale=0.25, size=(nwalks, nsteps))