# 时间数据重采样

## resample

In [None]:
import pandas as pd
import numpy as np

date_rng = pd.date_range('20170101', periods=100, freq='D')
ser_obj = pd.Series(range(len(date_rng)), index=date_rng)
print(ser_obj.head(7))

In [None]:
# 统计每个月的数据总和
resample_month_sum = ser_obj.resample('M').sum()
# 统计每个月的数据平均
resample_month_mean = ser_obj.resample('M').mean()

print('按月求和：', resample_month_sum)
print('按月求均值：', resample_month_mean)

## 降采样

In [None]:
# 将数据聚合到5天的频率
five_day_sum_sample = ser_obj.resample('5D').sum()
five_day_mean_sample = ser_obj.resample('5D').mean()
five_day_ohlc_sample = ser_obj.resample('5D').ohlc()

print('降采样，sum')
print(five_day_sum_sample)

In [None]:
print('降采样，mean')
print(five_day_mean_sample)

In [None]:
print('降采样，ohlc')
print(five_day_ohlc_sample)

In [None]:
# 使用groupby降采样
print(ser_obj.groupby(lambda x: x.month).sum())

In [None]:
print(ser_obj.groupby(lambda x: x.weekday).sum())

## 升采样

In [19]:
df = pd.DataFrame(np.random.randn(5, 3),
                 index=pd.date_range('20170101', periods=5, freq='W-MON'),
                 columns=['S1', 'S2', 'S3'])
print(df)

                  S1        S2        S3
2017-01-02 -0.112175 -0.539488 -0.666707
2017-01-09 -0.073499  0.954681  0.911928
2017-01-16 -1.729484  1.042449 -0.816687
2017-01-23  1.153226 -2.192437 -0.510171
2017-01-30 -1.639185 -0.166645  0.924192


In [18]:
# 直接重采样会产生空值
print(df.resample('D').asfreq())

                  S1        S2        S3
2017-01-02 -0.491283 -0.128185  0.651688
2017-01-03       NaN       NaN       NaN
2017-01-04       NaN       NaN       NaN
2017-01-05       NaN       NaN       NaN
2017-01-06       NaN       NaN       NaN
2017-01-07       NaN       NaN       NaN
2017-01-08       NaN       NaN       NaN
2017-01-09 -0.039345  0.816387  0.855056
2017-01-10       NaN       NaN       NaN
2017-01-11       NaN       NaN       NaN
2017-01-12       NaN       NaN       NaN
2017-01-13       NaN       NaN       NaN
2017-01-14       NaN       NaN       NaN
2017-01-15       NaN       NaN       NaN
2017-01-16 -0.210142  1.539629  0.212709
2017-01-17       NaN       NaN       NaN
2017-01-18       NaN       NaN       NaN
2017-01-19       NaN       NaN       NaN
2017-01-20       NaN       NaN       NaN
2017-01-21       NaN       NaN       NaN
2017-01-22       NaN       NaN       NaN
2017-01-23 -0.352680 -0.070204 -1.795317
2017-01-24       NaN       NaN       NaN
2017-01-25      

In [20]:
#ffill
print(df.resample('D').ffill(2))

                  S1        S2        S3
2017-01-02 -0.112175 -0.539488 -0.666707
2017-01-03 -0.112175 -0.539488 -0.666707
2017-01-04 -0.112175 -0.539488 -0.666707
2017-01-05       NaN       NaN       NaN
2017-01-06       NaN       NaN       NaN
2017-01-07       NaN       NaN       NaN
2017-01-08       NaN       NaN       NaN
2017-01-09 -0.073499  0.954681  0.911928
2017-01-10 -0.073499  0.954681  0.911928
2017-01-11 -0.073499  0.954681  0.911928
2017-01-12       NaN       NaN       NaN
2017-01-13       NaN       NaN       NaN
2017-01-14       NaN       NaN       NaN
2017-01-15       NaN       NaN       NaN
2017-01-16 -1.729484  1.042449 -0.816687
2017-01-17 -1.729484  1.042449 -0.816687
2017-01-18 -1.729484  1.042449 -0.816687
2017-01-19       NaN       NaN       NaN
2017-01-20       NaN       NaN       NaN
2017-01-21       NaN       NaN       NaN
2017-01-22       NaN       NaN       NaN
2017-01-23  1.153226 -2.192437 -0.510171
2017-01-24  1.153226 -2.192437 -0.510171
2017-01-25  1.15

In [21]:
print(df.resample('D').bfill())

                  S1        S2        S3
2017-01-02 -0.112175 -0.539488 -0.666707
2017-01-03 -0.073499  0.954681  0.911928
2017-01-04 -0.073499  0.954681  0.911928
2017-01-05 -0.073499  0.954681  0.911928
2017-01-06 -0.073499  0.954681  0.911928
2017-01-07 -0.073499  0.954681  0.911928
2017-01-08 -0.073499  0.954681  0.911928
2017-01-09 -0.073499  0.954681  0.911928
2017-01-10 -1.729484  1.042449 -0.816687
2017-01-11 -1.729484  1.042449 -0.816687
2017-01-12 -1.729484  1.042449 -0.816687
2017-01-13 -1.729484  1.042449 -0.816687
2017-01-14 -1.729484  1.042449 -0.816687
2017-01-15 -1.729484  1.042449 -0.816687
2017-01-16 -1.729484  1.042449 -0.816687
2017-01-17  1.153226 -2.192437 -0.510171
2017-01-18  1.153226 -2.192437 -0.510171
2017-01-19  1.153226 -2.192437 -0.510171
2017-01-20  1.153226 -2.192437 -0.510171
2017-01-21  1.153226 -2.192437 -0.510171
2017-01-22  1.153226 -2.192437 -0.510171
2017-01-23  1.153226 -2.192437 -0.510171
2017-01-24 -1.639185 -0.166645  0.924192
2017-01-25 -1.63

In [22]:
print(df.resample('D').fillna('ffill'))

                  S1        S2        S3
2017-01-02 -0.112175 -0.539488 -0.666707
2017-01-03 -0.112175 -0.539488 -0.666707
2017-01-04 -0.112175 -0.539488 -0.666707
2017-01-05 -0.112175 -0.539488 -0.666707
2017-01-06 -0.112175 -0.539488 -0.666707
2017-01-07 -0.112175 -0.539488 -0.666707
2017-01-08 -0.112175 -0.539488 -0.666707
2017-01-09 -0.073499  0.954681  0.911928
2017-01-10 -0.073499  0.954681  0.911928
2017-01-11 -0.073499  0.954681  0.911928
2017-01-12 -0.073499  0.954681  0.911928
2017-01-13 -0.073499  0.954681  0.911928
2017-01-14 -0.073499  0.954681  0.911928
2017-01-15 -0.073499  0.954681  0.911928
2017-01-16 -1.729484  1.042449 -0.816687
2017-01-17 -1.729484  1.042449 -0.816687
2017-01-18 -1.729484  1.042449 -0.816687
2017-01-19 -1.729484  1.042449 -0.816687
2017-01-20 -1.729484  1.042449 -0.816687
2017-01-21 -1.729484  1.042449 -0.816687
2017-01-22 -1.729484  1.042449 -0.816687
2017-01-23  1.153226 -2.192437 -0.510171
2017-01-24  1.153226 -2.192437 -0.510171
2017-01-25  1.15

In [23]:
print(df.resample('D').interpolate('linear'))
#根据插值算法补全数据 == 回归拟合
#linear是线性

                  S1        S2        S3
2017-01-02 -0.112175 -0.539488 -0.666707
2017-01-03 -0.106649 -0.326036 -0.441188
2017-01-04 -0.101124 -0.112583 -0.215669
2017-01-05 -0.095599  0.100870  0.009851
2017-01-06 -0.090074  0.314323  0.235370
2017-01-07 -0.084549  0.527776  0.460889
2017-01-08 -0.079024  0.741228  0.686409
2017-01-09 -0.073499  0.954681  0.911928
2017-01-10 -0.310069  0.967219  0.664983
2017-01-11 -0.546638  0.979758  0.418038
2017-01-12 -0.783207  0.992296  0.171093
2017-01-13 -1.019776  1.004834 -0.075852
2017-01-14 -1.256346  1.017373 -0.322797
2017-01-15 -1.492915  1.029911 -0.569742
2017-01-16 -1.729484  1.042449 -0.816687
2017-01-17 -1.317668  0.580323 -0.772899
2017-01-18 -0.905853  0.118196 -0.729111
2017-01-19 -0.494037 -0.343931 -0.685323
2017-01-20 -0.082221 -0.806057 -0.641535
2017-01-21  0.329594 -1.268184 -0.597747
2017-01-22  0.741410 -1.730310 -0.553959
2017-01-23  1.153226 -2.192437 -0.510171
2017-01-24  0.754310 -1.903038 -0.305262
2017-01-25  0.35