# 利用Python进行数据分析之Numpy使用（第四章）

In [None]:
import pandas as pd
fec = pd.read_csv('ch09/P')

In [None]:
import numpy as np
points = np.arange(-5, 5, 0.01)
xs, ys = np.meshgrid(points, points)

import matplotlib.pyplot as plt
z = np.sqrt(xs ** 2 + ys ** 2)
plt.imshow(z, cmap=plt.cm.gray)
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")

In [None]:
import numpy as np
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])

cond = np.array([True, False, True, True, False])

# 根据cond条件选取xarr或yarr中的值
result = [ (x if c else y)
              for x, y, c in zip(xarr, yarr, cond)
         ]

# 使用np.where同样地处理
result = np.where(cond, xarr, yarr)
print(result)

# 使用标题
arr = np.random.randn(4, 4)
np.where(arr > 0, 2, -2)



### 矩阵运算

In [None]:
import numpy as np

# 矩阵运算
x = np.array([[1, 2,3], [4, 5,6]])
y = np.array([[6, 23], [-1, 7], [8, 9]])
print(x.dot(y))


In [None]:
from numpy.linalg import inv, qr
from numpy import random

X = random.randn(5, 8)
print('X', X)

mat = X.T.dot(X)
print('mat', mat)

inv(mat)

mat.dot(inv(mat))

q, r = qr(mat)
print('q, r', q, r)

## 随机数据生成

### seed 随机种子
### permutation 随机排列
### shuffle 随机排列
### rand 均匀分布的样本值
### randint 从给定的上下限范围内随机选取整数
### randn 产生正太分布(平均值为0， 标准差为1)的样本值，类似于MATLAB接口
### binomial 产生二项分布的样本值
### normal 产生正态（高斯）分布的样本值
### beta 产生Beta分布的样本值
### chisquare 产生卡方分布的样本值
### gamma 产生Gamma分布的样本值
### uniform 产生在(0, 1)的均匀分布的样本值

In [None]:
import numpy as np
samples = np.random.normal(size=(4, 4))
samples

In [None]:
from random import normalvariate
N = 1000000
%timeit smaples = [normalvariate(0, 1) for _ in range(N)]

import numpy as np
%timeit np.random.normal(size=N)

### 随机漫步

In [28]:
nwalks = 5000
nsteps = 1000

draws = np.random.randint(0, 2, size=(nwalks, nsteps))
steps = np.where(draws > 0, 1, -1)

walks = steps.cumsum(1)
walks


array([[  1,   2,   3, ...,  -2,  -3,  -4],
       [  1,   0,   1, ...,   0,   1,   2],
       [ -1,   0,   1, ...,  68,  67,  66],
       ...,
       [ -1,  -2,  -1, ...,  56,  55,  56],
       [ -1,  -2,  -1, ..., -36, -37, -36],
       [  1,   0,   1, ...,  10,  11,  12]])

In [32]:
print('最大值', walks.max())
print('最小值', walks.min())

最大值 131
最小值 -113


In [33]:
hits30 = (np.abs(walks) >= 30).any(1)
print(hits30)

print(hits30.sum())

[False  True  True ...  True  True  True]
3408


In [34]:
# 选取那些穿越了30(绝对值)的随机漫步(行), 并调用argmax在轴1上获取穿越时间
crossing_times = (np.abs(walks[hits30]) >= 30).argmax(1)

crossing_times.mean()


497.44659624413146