In [6]:
# 时间序列以及截面对齐
from pandas import Series,DataFrame

s1 = Series(range(3), index=['a', 'b', 'c'])
s2 = Series(range(4), index=['d', 'b', 'c', 'e'])
s3 = Series(range(3), index=['f', 'a', 'c'])
DataFrame({'one':s1, 'two':s2, 'three':s3})

Unnamed: 0,one,two,three
a,0.0,,1.0
b,1.0,1.0,
c,2.0,2.0,2.0
d,,0.0,
e,,3.0,
f,,,0.0


In [7]:
DataFrame({'one':s1, 'two':s2, 'three':s3}, index=list('face'))

Unnamed: 0,one,two,three
f,,,0.0
a,0.0,,1.0
c,2.0,2.0,2.0
e,,3.0,


In [9]:
import numpy as np
import pandas as pd
ts1 = Series(np.random.randn(3),
            index=pd.date_range('2012-6-13', periods=3, freq='W-WED'))
ts1

2012-06-13    1.023319
2012-06-20   -0.541848
2012-06-27   -0.548062
Freq: W-WED, dtype: float64

In [11]:
# 将其重采样到工作日（星期一到星期五）频率
ts2 = ts1.resample('B')
ts2

DatetimeIndexResampler [freq=<BusinessDay>, axis=0, closed=left, label=left, convention=start, base=0]

In [12]:
# 用ffill填充前面的空白
ts1.resample('B').ffill()

2012-06-13    1.023319
2012-06-14    1.023319
2012-06-15    1.023319
2012-06-18    1.023319
2012-06-19    1.023319
2012-06-20   -0.541848
2012-06-21   -0.541848
2012-06-22   -0.541848
2012-06-25   -0.541848
2012-06-26   -0.541848
2012-06-27   -0.548062
Freq: B, dtype: float64

In [13]:
dates = pd.DatetimeIndex(['2012-6-12', '2012-6-17', '2012-6-18',
                         '2012-6-21', '2012-6-22', '2012-6-29'])
ts2 = Series(np.random.randn(6), index=dates)
ts2

2012-06-12   -0.220241
2012-06-17    0.846291
2012-06-18    0.835143
2012-06-21   -0.513421
2012-06-22    0.154146
2012-06-29    0.314019
dtype: float64

In [14]:
# 将ts1中‘最当前’的值（即前向填充）加到ts2上，使用reindex
ts1.reindex(ts2.index).ffill()

2012-06-12   NaN
2012-06-17   NaN
2012-06-18   NaN
2012-06-21   NaN
2012-06-22   NaN
2012-06-29   NaN
dtype: float64

In [15]:
ts2 + ts1.reindex(ts2.index).ffill()

2012-06-12   NaN
2012-06-17   NaN
2012-06-18   NaN
2012-06-21   NaN
2012-06-22   NaN
2012-06-29   NaN
dtype: float64

In [18]:
# 两个有关GDP和通货膨胀的宏观经济时间序列
gdp = Series([1.78, 1.94, 2.08, 2.01, 2.15, 2.31, 2.46],
            index=pd.period_range('1984Q4',periods=7, freq='Q-SEP'))

infl = Series([0.025, 0.045, 0.037, 0.04],
             index=pd.period_range('1982', periods=4, freq='A-DEC'))


In [19]:
gdp

1984Q4    1.78
1985Q1    1.94
1985Q2    2.08
1985Q3    2.01
1985Q4    2.15
1986Q1    2.31
1986Q2    2.46
Freq: Q-SEP, dtype: float64

In [20]:
infl

1982    0.025
1983    0.045
1984    0.037
1985    0.040
Freq: A-DEC, dtype: float64

In [21]:
infl_q = infl.asfreq('Q-SEP', how='end')
infl_q

1983Q1    0.025
1984Q1    0.045
1985Q1    0.037
1986Q1    0.040
Freq: Q-SEP, dtype: float64

In [22]:
infl_q.reindex(gdp.index).ffill()

1984Q4      NaN
1985Q1    0.037
1985Q2    0.037
1985Q3    0.037
1985Q4    0.037
1986Q1    0.040
1986Q2    0.040
Freq: Q-SEP, dtype: float64

In [27]:
data1= DataFrame(np.ones((6,3), dtype=float),
                columns=['a', 'b', 'c'],
                index=pd.date_range('6/12/2012', periods=6))

data2 = DataFrame(np.ones((6,3), dtype=float)*2,
                 columns=['a', 'b', 'c'],
                 index=pd.date_range('6/1/2012', periods=6))

spliced = pd.concat([data1.loc[:'2012-06-14'], data2.loc['2012-06-15':]])
spliced

Unnamed: 0,a,b,c
2012-06-12,1.0,1.0,1.0
2012-06-13,1.0,1.0,1.0
2012-06-14,1.0,1.0,1.0


In [29]:
import pandas.io.data as web
price = web.get_data_yahoo('AAPL', '2011-01-01')['Adj Close']
price[-5:]

ModuleNotFoundError: No module named 'pandas.io.data'

In [31]:
names = ['AAPL', 'GOOG', 'MSFT', 'DELL', 'GS', 'MS', 'BAC', 'C']
def get_px(stock, start, end):
    return web.get_data_yahoo(stock, start, end)['Adj Close']
px = DataFrame({n:get_px(n,'1/1/2009', '6/1/2012') for n in names})


NameError: name 'web' is not defined

In [32]:
ints = np.ones(10, dtype=np.uint16)
floats = np.ones(10, dtype=np.float32)
np.issubdtype(ints.dtype, np.integer)

True

In [33]:
np.issubdtype(floats.dtype, np.floating)

True

In [34]:
# 调用dtype的mro查看其所有的父类
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [35]:
arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [36]:
# 向数组的实例方法reshape传入一个表示新形状的元组
# 可实现将一个数组从一个形状转换为另一个形状
arr.reshape((4,2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [37]:
arr.reshape((4,2)).reshape((2,4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [38]:
# 作为参数的形状的其中一维可以是-1，表示该维度的大小由数据本身
# 推断而来
arr = np.arange(15)
arr.reshape((5,-1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [39]:
# 由于数组的shape属性是一个元组，因此它也可以被传入reshape
other_arr = np.ones((3, 5))
other_arr.shape

(3, 5)

In [40]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [41]:
# 与reshape将一维数组转换为多维数组的运算过程相反的运算通常称为
# 扁平化或散开（raveling）
arr = np.arange(15).reshape((5,3))
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [42]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [43]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [44]:
arr = np.arange(12).reshape((3,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [45]:
# reshape和reval函数可以接受一个表示数组数据存放顺序的order参数
# 一般可以是'C'或'F'
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [46]:
arr.ravel('F')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

In [47]:
# numpy.contenate可以按指定轴将一个数组组成的序列连接在一起
arr1 = np.array([[1,2,3], [4,5,6]])
arr2 = np.array([[7,8,9],[10,11,12]])
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [48]:
np.concatenate([arr1, arr2],axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [49]:
# 常见的连接操作，还可以使用vstack和hstack方法
np.vstack((arr1,arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [50]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [51]:
from numpy.random import randn
arr = randn(5,2)
arr

array([[ 0.9693448 ,  1.06775037],
       [ 1.09239798,  1.96391148],
       [-0.67658074, -2.02023997],
       [ 2.03681677,  0.84273467],
       [ 0.28232811,  1.50707266]])

In [52]:
# split用于将一个数组沿指定轴拆分为多个数组
first, second, third = np.split(arr, [1,3])
first

array([[0.9693448 , 1.06775037]])

In [53]:
second

array([[ 1.09239798,  1.96391148],
       [-0.67658074, -2.02023997]])

In [54]:
third

array([[2.03681677, 0.84273467],
       [0.28232811, 1.50707266]])

In [55]:
# r_和c_他们可以使数组的堆叠操作更为简洁
arr = np.arange(6)
arr1 = arr.reshape((3,2))
arr2 = randn(3,2)
np.r_[arr1, arr2]

array([[ 0.        ,  1.        ],
       [ 2.        ,  3.        ],
       [ 4.        ,  5.        ],
       [ 0.04249315,  1.79305504],
       [-0.37468247,  0.24563408],
       [ 0.5350575 , -1.17456585]])

In [56]:
np.c_[np.r_[arr1,arr2], arr]

array([[ 0.        ,  1.        ,  0.        ],
       [ 2.        ,  3.        ,  1.        ],
       [ 4.        ,  5.        ,  2.        ],
       [ 0.04249315,  1.79305504,  3.        ],
       [-0.37468247,  0.24563408,  4.        ],
       [ 0.5350575 , -1.17456585,  5.        ]])

In [57]:
# repeat会将数组中的各个元素重复一定次数，从而产生一个更大的数组
arr = np.arange(3)
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [58]:
# 如果传入的是一组整数，则各元素可以重复不同次数
arr.repeat([2,3,4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [59]:
# 对于多维数组，可以让他们的元素沿指定轴重复
arr = randn(2, 2)
arr

array([[ 0.00772137, -0.85867698],
       [ 0.34173045,  0.59446155]])

In [60]:
arr.repeat(2, axis=0)

array([[ 0.00772137, -0.85867698],
       [ 0.00772137, -0.85867698],
       [ 0.34173045,  0.59446155],
       [ 0.34173045,  0.59446155]])

In [61]:
arr.repeat([2,3], axis=0)

array([[ 0.00772137, -0.85867698],
       [ 0.00772137, -0.85867698],
       [ 0.34173045,  0.59446155],
       [ 0.34173045,  0.59446155],
       [ 0.34173045,  0.59446155]])

In [62]:
arr.repeat([2,3], axis=1)

array([[ 0.00772137,  0.00772137, -0.85867698, -0.85867698, -0.85867698],
       [ 0.34173045,  0.34173045,  0.59446155,  0.59446155,  0.59446155]])

In [63]:
# title的功能是沿指定轴向堆叠数组的副本
arr

array([[ 0.00772137, -0.85867698],
       [ 0.34173045,  0.59446155]])

In [66]:
np.tile(arr, 2)

array([[ 0.00772137, -0.85867698,  0.00772137, -0.85867698],
       [ 0.34173045,  0.59446155,  0.34173045,  0.59446155]])

In [67]:
# 第二个参数是瓷砖的数量
arr

array([[ 0.00772137, -0.85867698],
       [ 0.34173045,  0.59446155]])

In [69]:
np.tile(arr, (2,1))

array([[ 0.00772137, -0.85867698],
       [ 0.34173045,  0.59446155],
       [ 0.00772137, -0.85867698],
       [ 0.34173045,  0.59446155]])

In [70]:
np.tile(arr, (3,2))

array([[ 0.00772137, -0.85867698,  0.00772137, -0.85867698],
       [ 0.34173045,  0.59446155,  0.34173045,  0.59446155],
       [ 0.00772137, -0.85867698,  0.00772137, -0.85867698],
       [ 0.34173045,  0.59446155,  0.34173045,  0.59446155],
       [ 0.00772137, -0.85867698,  0.00772137, -0.85867698],
       [ 0.34173045,  0.59446155,  0.34173045,  0.59446155]])

In [71]:
arr = np.arange(10)*100
inds = [7,1,2,6]
arr[inds]

array([700, 100, 200, 600])

In [72]:
arr.take(inds)


array([700, 100, 200, 600])

In [73]:
arr.put(inds, 42)
arr

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [75]:
# 将标量值跟数组合并时会发生最简单的广播
arr = np.arange(5)
arr * 4

array([ 0,  4,  8, 12, 16])

In [76]:
arr = randn(4, 3)
arr.mean(0)

array([ 1.02309781, -0.07772562, -0.14793203])

In [77]:
demeaned = arr - arr.mean(0)
demeaned

array([[-1.04068246,  0.54719068, -0.4443597 ],
       [ 1.45907762, -1.38287108,  0.49721868],
       [ 0.5752223 ,  0.42106975,  0.22094524],
       [-0.99361747,  0.41461066, -0.27380423]])

In [78]:
arr

array([[-0.01758464,  0.46946506, -0.59229172],
       [ 2.48217544, -1.4605967 ,  0.34928665],
       [ 1.59832011,  0.34334413,  0.07301321],
       [ 0.02948035,  0.33688504, -0.42173626]])

In [81]:
row_means = arr.mean(1)
row_means.reshape((4, 1))

array([[-0.04680377],
       [ 0.45695513],
       [ 0.67155915],
       [-0.01845696]])

In [82]:
demeaned = arr - row_means.reshape((4,1))
demeaned.mean(1)

array([-3.70074342e-17,  7.40148683e-17, -7.40148683e-17, -1.85037171e-17])

In [83]:
arr - arr.mean(1).reshape((4,1))

array([[ 0.02921913,  0.51626883, -0.54548795],
       [ 2.02522031, -1.91755183, -0.10766848],
       [ 0.92676096, -0.32821502, -0.59854594],
       [ 0.0479373 ,  0.355342  , -0.4032793 ]])

In [84]:
arr = np.zeros((4, 4))
arr_3d = arr[:,np.newaxis,:]
arr_3d.shape

(4, 1, 4)

In [86]:
arr_1d = np.random.normal(size=3)
arr_1d[:,np.newaxis]

array([[-0.07332593],
       [ 0.82182304],
       [ 0.21476859]])

In [88]:
arr = randn(3, 4, 5)
depth_means = arr.mean(2)
depth_means

array([[-0.28195218, -0.20082945, -0.24685242, -0.39101929],
       [ 0.84115007,  0.20496612,  0.09617796,  0.26383064],
       [ 0.20297138,  0.08235723,  0.88280682, -0.23214305]])

In [89]:
arr = np.zeros((4,3))
arr[:] = 5
arr

array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [90]:
col = np.array([1.28, -0.42, 0.44, 1.6])
arr[:] = col[:,np.newaxis]
arr

array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [91]:
arr[:2] = [[-1.37], [0.509]]
arr

array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

In [92]:
# reduce接受一个数组参数，并通过一系列的二元运算对其进行聚合
arr = np.arange(10)
np.add.reduce(arr)

45

In [93]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [94]:
arr.sum()

45

In [95]:
arr = randn(5,5)
arr[::2].sort(1)
arr[:,:-1] < arr[:, 1:]

array([[ True,  True,  True,  True],
       [ True, False, False,  True],
       [ True,  True,  True,  True],
       [ True, False, False,  True],
       [ True,  True,  True,  True]])

In [96]:
# accumulate产生一个跟原数组大小相同的中间累计值数组
arr = np.arange(15).reshape((3,5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [97]:
np.add.accumulate(arr, axis=1)

array([[ 0,  1,  3,  6, 10],
       [ 5, 11, 18, 26, 35],
       [10, 21, 33, 46, 60]], dtype=int32)

In [98]:
arr = np.arange(3).repeat([1,2,2])
arr

array([0, 1, 1, 2, 2])

In [99]:
# outer用于计算两个数组的叉积
np.multiply.outer(arr, np.arange(5))

array([[0, 0, 0, 0, 0],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 2, 4, 6, 8],
       [0, 2, 4, 6, 8]])

In [102]:
# outer输出结果的维度是两个输入数据的维度之和
result = np.subtract.outer(randn(3,4), randn(5))
result.shape

(3, 4, 5)

In [103]:
# reduceat用于计算“局部约简”,其实就是一个对数据各切片进行聚合的
# groupby运算
arr = np.arange(10)
# 最终结果是在arr[0:5], arr[5:8], arr[8:]
np.add.reduceat(arr, [0,5,8])

array([10, 18, 17], dtype=int32)

In [104]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [105]:
arr = np.multiply.outer(np.arange(4), np.arange(5))
arr

array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12]])

In [106]:
np.add.reduceat(arr, [0,2,4], axis=1)

array([[ 0,  0,  0],
       [ 1,  5,  4],
       [ 2, 10,  8],
       [ 3, 15, 12]], dtype=int32)

In [108]:
# 结构化和记录式数组
dtype = [('x', np.float64), ('y', np.int32)]
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)
sarr

array([(1.5       ,  6), (3.14159265, -2)],
      dtype=[('x', '<f8'), ('y', '<i4')])

In [109]:
sarr[0]

(1.5, 6)

In [110]:
sarr[0]['y']

6

In [111]:
# 在定义结构化dtype时，你可以再设置一个形状
dtype = [('x', np.int64, 3), ('y', np.int32)]
arr = np.zeros(4, dtype=dtype)
arr

array([([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0)],
      dtype=[('x', '<i8', (3,)), ('y', '<i4')])

In [112]:
arr = randn(6)
arr.sort()
arr

array([-0.90679854, -0.4513606 , -0.22459773,  0.19525041,  0.39952594,
        1.51560835])

In [113]:
arr = randn(3, 5)
arr

array([[-1.40413792,  0.39790903, -1.4998557 ,  0.47487241,  0.77862368],
       [ 0.03263966, -0.85736126,  0.32413138, -1.15985724,  0.42517327],
       [ 1.16046131, -2.00281075, -0.05484956, -0.69523756, -0.17128936]])

In [115]:
arr[:, 1].sort() # Sort first column values in-place
arr


array([[-1.40413792, -2.00281075, -1.4998557 ,  0.47487241,  0.77862368],
       [ 0.03263966, -0.85736126,  0.32413138, -1.15985724,  0.42517327],
       [ 1.16046131,  0.39790903, -0.05484956, -0.69523756, -0.17128936]])

In [116]:
arr = randn(5)
arr

array([-0.19780268,  1.09006648, -0.07547024, -1.29184588, -0.57237968])

In [117]:
np.sort(arr)

array([-1.29184588, -0.57237968, -0.19780268, -0.07547024,  1.09006648])

In [118]:
arr

array([-0.19780268,  1.09006648, -0.07547024, -1.29184588, -0.57237968])

In [122]:
arr = randn(3, 5)
arr

array([[-1.04616285, -1.03318879,  1.36075311, -0.35954136, -0.56115068],
       [ 0.06942085, -0.27744416, -1.34447721,  0.4062229 ,  0.11364009],
       [ 0.76205029,  1.94538131,  2.53079427, -2.00767249, -0.31791282]])

In [123]:
arr.sort(axis=1)
arr

array([[-1.04616285, -1.03318879, -0.56115068, -0.35954136,  1.36075311],
       [-1.34447721, -0.27744416,  0.06942085,  0.11364009,  0.4062229 ],
       [-2.00767249, -0.31791282,  0.76205029,  1.94538131,  2.53079427]])

In [124]:
arr[::-1] # 可以返回一个反序的列表

array([[-2.00767249, -0.31791282,  0.76205029,  1.94538131,  2.53079427],
       [-1.34447721, -0.27744416,  0.06942085,  0.11364009,  0.4062229 ],
       [-1.04616285, -1.03318879, -0.56115068, -0.35954136,  1.36075311]])

In [125]:
# argsort和numpy.lexsort可以实现给定一个或多个键，得到一个由整数组成的索引数组
# 其中的索引值说明了数据在新顺序下的位置
values = np.array([5, 0, 1, 3, 2])
indexer = values.argsort()
indexer

array([1, 2, 4, 3, 0], dtype=int64)

In [126]:
values[indexer]

array([0, 1, 2, 3, 5])

In [128]:
arr = randn(3, 5)
arr[0] = values
arr

array([[ 5.        ,  0.        ,  1.        ,  3.        ,  2.        ],
       [ 0.18518733, -1.08004172,  0.80167767,  1.12193069,  0.13113357],
       [ 2.14313374,  1.06139602,  1.02191116,  2.93446309,  0.37786412]])

In [129]:
arr[:, arr[0].argsort()]

array([[ 0.        ,  1.        ,  2.        ,  3.        ,  5.        ],
       [-1.08004172,  0.80167767,  0.13113357,  1.12193069,  0.18518733],
       [ 1.06139602,  1.02191116,  0.37786412,  2.93446309,  2.14313374]])

In [132]:
# 假设我们想对一些以姓和名标识的数据进行排序
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])
sorter = np.lexsort((first_name, last_name))
m = zip(last_name[sorter], first_name[sorter])
for i in m:
    print(i)

('Arnold', 'Jane')
('Arnold', 'Steve')
('Jones', 'Bill')
('Jones', 'Bob')
('Walters', 'Barbara')


In [133]:
values = np.array(['2:first', '2:second', '1:first', '1:second', '1:third'])
key = np.array([2,2,1,1,1])
indexer = key.argsort(kind='mergesort')
indexer

array([2, 3, 4, 0, 1], dtype=int64)

In [134]:
# searchsorted是一个在有序数组上执行二分查找的数组方法,
# 在有序数组中查找元素
arr = np.array([0,1,7,12,15])
arr.searchsorted(9)

3

In [135]:
# 传入一组值就能得到一组索引
arr.searchsorted([0,8,11,16])

array([0, 3, 3, 5], dtype=int64)

In [136]:
arr = np.array([0,0,0,1,1,1,1])
arr.searchsorted([0,1])

array([0, 3], dtype=int64)

In [138]:
arr.searchsorted([0,1], side='right')

array([3, 7], dtype=int64)

In [139]:
data = np.floor(np.random.uniform(0,10000,size=50))
bins = np.array([0, 100, 1000, 5000, 10000])
data

array([9043., 3907., 1622., 6803., 8421., 5656., 6549., 5727., 7526.,
       1708., 6679., 7246., 8831., 3179., 6084., 8864.,  738., 5198.,
       2218.,  800., 2695., 5382., 7950., 9837., 4623., 5156., 7278.,
       2584., 6220., 1779., 6695., 7700., 9555., 6124.,  275., 1710.,
       5704., 7447., 8428.,  844., 8476.,  548., 1084., 6403., 6981.,
       4491., 9139., 2126., 2521.,  829.])

In [140]:
lables = bins.searchsorted(data)
lables

array([4, 3, 3, 4, 4, 4, 4, 4, 4, 3, 4, 4, 4, 3, 4, 4, 2, 4, 3, 2, 3, 4,
       4, 4, 3, 4, 4, 3, 4, 3, 4, 4, 4, 4, 2, 3, 4, 4, 4, 2, 4, 2, 3, 4,
       4, 3, 4, 3, 3, 2], dtype=int64)

In [141]:
mmap = np.memmap('mymmap', dtype='float64', mode='w+', shape=(1000,1000))
mmap

memmap([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [144]:
section = mmap[:5]
section[:] = np.random.randn(5, 1000)
mmap.flush()
mmap

memmap([[-0.5661124 , -0.08857305, -0.93179042, ..., -0.86292139,
         -0.07657177, -0.90344755],
        [-0.59358843, -0.28830097,  0.71585764, ..., -0.82337269,
         -0.96112202,  0.71741109],
        [-1.56730636, -1.30543886, -1.43760981, ..., -0.82825736,
          1.97415883,  0.46975286],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]])