In [50]:
# coding: utf-8
import pandas as pd

In [51]:
# =====读取hdf数据
# 创建hdf文件
df = pd.read_hdf('./data/EURUSD-Data.h5', key='EURUSD_1M')

# 读取某个key指向的数据
print(df)

                <DATE>  <OPEN>  <HIGH>   <LOW>  <CLOSE>
0  2001-01-02 23:01:00  0.9507  0.9507  0.9507   0.9507
1  2001-01-02 23:02:00  0.9506  0.9506  0.9505   0.9505
2  2001-01-02 23:03:00  0.9505  0.9507  0.9505   0.9506
3  2001-01-02 23:04:00  0.9506  0.9506  0.9506   0.9506
4  2001-01-02 23:05:00  0.9506  0.9506  0.9506   0.9506
..                 ...     ...     ...     ...      ...
94 2001-01-03 00:52:00  0.9496  0.9496  0.9496   0.9496
95 2001-01-03 00:53:00  0.9495  0.9495  0.9495   0.9495
96 2001-01-03 00:54:00  0.9494  0.9494  0.9492   0.9492
97 2001-01-03 00:55:00  0.9492  0.9493  0.9492   0.9493
98 2001-01-03 00:56:00  0.9493  0.9497  0.9493   0.9497

[99 rows x 5 columns]


In [45]:
# ===第一种方法：将1分钟数据转为5分钟数据
# 将DATE设定为index
df.set_index('<DATE>', inplace=True)

# 周期转换方法：resample
rule_type = '5T'  # 周期单位: 5T=5分钟 1H=1小时 1D=1天 1W=1周
period_df = df[['<CLOSE>']].resample(rule=rule_type).last()  # last：取这5分钟的最后一行数据

# 开、高、低的价格，成交量
period_df['<OPEN>'] = df['<OPEN>'].resample(rule=rule_type).first()
period_df['<HIGH>'] = df['<HIGH>'].resample(rule=rule_type).max()
period_df['<LOW>'] = df['<LOW>'].resample(rule=rule_type).min()
period_df = period_df[['<OPEN>','<HIGH>','<LOW>','<CLOSE>']]
period_df

Unnamed: 0_level_0,<OPEN>,<HIGH>,<LOW>,<CLOSE>
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2001-01-02 23:00:00,0.9507,0.9507,0.9505,0.9506
2001-01-02 23:05:00,0.9506,0.9507,0.9505,0.9507
2001-01-02 23:10:00,0.9507,0.9507,0.9506,0.9507
2001-01-02 23:15:00,0.9507,0.9507,0.9506,0.9506
2001-01-02 23:20:00,0.9507,0.9507,0.9507,0.9507
2001-01-02 23:25:00,,,,
2001-01-02 23:30:00,0.9507,0.9508,0.9507,0.9508
2001-01-02 23:35:00,0.9507,0.9509,0.9507,0.9509
2001-01-02 23:40:00,0.9509,0.9509,0.9508,0.9508
2001-01-02 23:45:00,0.9508,0.9508,0.9507,0.9507


In [58]:
# ===第二种方法：将1分钟数据转为5分钟数据

# 将DATE设定为index 执行一遍就好了
df.set_index('<DATE>', inplace=True)

KeyError: "None of ['<DATE>'] are in the columns"

In [56]:
rule_type = '5T'
period_df = df.resample(rule=rule_type, base=0, label='left', closed='left').agg(
    {'<OPEN>': 'first',
     '<HIGH>': 'max',
     '<LOW>': 'min',
     '<CLOSE>': 'last',
     })
period_df = period_df[['<OPEN>', '<HIGH>', '<LOW>', '<CLOSE>']]
# base参数：帮助确定转换周期开始的时间
# label='left', closed='left'，建议统一设置成'left'

# ===去除不必要的数据
# 去除一天都没有交易的周
period_df.dropna(subset=['<OPEN>'], inplace=True)
period_df

The new arguments that you should use are 'offset' or 'origin'.

>>> df.resample(freq="3s", base=2)

becomes:

>>> df.resample(freq="3s", offset="2s")

  period_df = df.resample(rule=rule_type, base=0, label='left', closed='left').agg(


Unnamed: 0_level_0,<OPEN>,<HIGH>,<LOW>,<CLOSE>
<DATE>,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2001-01-02 23:00:00,0.9507,0.9507,0.9505,0.9506
2001-01-02 23:05:00,0.9506,0.9507,0.9505,0.9507
2001-01-02 23:10:00,0.9507,0.9507,0.9506,0.9507
2001-01-02 23:15:00,0.9507,0.9507,0.9506,0.9506
2001-01-02 23:20:00,0.9507,0.9507,0.9507,0.9507
2001-01-02 23:30:00,0.9507,0.9508,0.9507,0.9508
2001-01-02 23:35:00,0.9507,0.9509,0.9507,0.9509
2001-01-02 23:40:00,0.9509,0.9509,0.9508,0.9508
2001-01-02 23:45:00,0.9508,0.9508,0.9507,0.9507
2001-01-02 23:50:00,0.9507,0.9508,0.9506,0.9506


In [47]:
# 创建hdf文件 mode='a' 追加模式
h5_store = pd.HDFStore('./data/EURUSD-Data.h5', mode='a')
# 存储数据到hdf
h5_store['EURUSD_5M'] = period_df
print('h5_store中的key= ',h5_store.keys())
# 关闭hdf文件
h5_store.close()


h5_store中的key=  ['/EURUSD_1M', '/EURUSD_5M']


### rule的取值

In [None]:

"""
    B       business day frequency
    C       custom business day frequency (experimental)
    D       calendar day frequency
    W       weekly frequency
    M       month end frequency
    SM      semi-month end frequency (15th and end of month)
    BM      business month end frequency
    CBM     custom business month end frequency
    MS      month start frequency
    SMS     semi-month start frequency (1st and 15th)
    BMS     business month start frequency
    CBMS    custom business month start frequency
    Q       quarter end frequency
    BQ      business quarter endfrequency
    QS      quarter start frequency
    BQS     business quarter start frequency
    A       year end frequency
    BA      business year end frequency
    AS      year start frequency
    BAS     business year start frequency
    BH      business hour frequency
    H       hourly frequency
    T       minutely frequency
    S       secondly frequency
    L       milliseonds
    U       microseconds
    N       nanoseconds
"""


