In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
import h5py

In [3]:
%matplotlib inline

In [4]:
from matplotlib import rcParams
rcParams['figure.figsize'] = [16, 8]
rcParams['font.family'] = ['sans-serif']
rcParams['axes.unicode_minus'] = False 

In [5]:
from matplotlib.font_manager import _rebuild
_rebuild()

In [None]:
f1 = h5py.File('../data/000007.h5','r')
f2 = h5py.File('/Users/d/code/python/xx/000007.h5','r')

In [None]:
for i in f1['/stock'].items():
    print(i)

In [None]:
df1 = pd.read_hdf('../data/000007.h5', '/stock/kline5')
df1_1 = pd.read_hdf('../data/000007-1.h5', '/stock/kline5')
df2 = pd.read_hdf('/Users/d/code/python/xx/000007.h5', '/stock/kline5')

In [None]:
df1.tail()

In [None]:
df1_1[-50:]

In [None]:
df2[-50:]

In [None]:
del df['date']
del df['volume']

In [None]:
p = df.plot()

In [None]:
# p.savefig('../data/000007', dpi=300)
fig = p.get_figure()
fig.savefig("../data/000007-1.png", dpi=300)

In [None]:
pd.read_hdf('../data/002162.h5', '/stock/details')

In [None]:
stocks = pd.read_hdf('../data/002162.h5', '/stock/stocks')

In [None]:
stocks

In [None]:
f = h5py.File('../data/002162.h5','r')

In [None]:
for i in f.items():
    print(i)

In [None]:
for i in f['/stock'].items():
    print(i)

In [None]:
training_set = stocks['closing_price'].values
training_set = training_set.reshape(training_set.shape[0], 1)

In [None]:
plt.plot(training_set, label = '实际数据')
plt.legend()

In [None]:
def sliding_windows(data, seq_length):
    x = []
    y = []

    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length]
        x.append(_x)
        y.append(_y)

    return np.array(x), np.array(y)

sc = MinMaxScaler()
training_data = sc.fit_transform(training_set)

seq_length = 20
x, y = sliding_windows(training_data, seq_length)

train_size = int(len(y) * 0.50)
test_size = len(y) - train_size

dataX = Variable(torch.Tensor(np.array(x)))
dataY = Variable(torch.Tensor(np.array(y)))

trainX = Variable(torch.Tensor(np.array(x[:train_size])))
trainY = Variable(torch.Tensor(np.array(y[:train_size])))

testX = Variable(torch.Tensor(np.array(x[train_size:])))
testY = Variable(torch.Tensor(np.array(y[train_size:])))

In [None]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        
        h_out = h_out.view(-1, self.hidden_size)
        
        out = self.fc(h_out)
        
        return out

In [None]:
class GRU(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(GRU, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        self.gru = nn.GRU(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
#         c_0 = Variable(torch.zeros(
#             self.num_layers, x.size(0), self.hidden_size))
        
        # Propagate input through LSTM
        ula, h_out = self.gru(x, (h_0))
        
        h_out = h_out.view(-1, self.hidden_size)
        
        out = self.fc(h_out)
        
        return out

In [None]:

num_epochs = 3000
learning_rate = 0.01

input_size = 1
hidden_size = 2
num_layers = 1

num_classes = 1

# lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
lstm = GRU(num_classes, input_size, hidden_size, num_layers)

criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(lstm.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    outputs = lstm(trainX)
    optimizer.zero_grad()
    
    # obtain the loss function
    loss = criterion(outputs, trainY)
    
    loss.backward()
    
    optimizer.step()
    if epoch % 100 == 0:
      print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

In [None]:
lstm.eval()

In [None]:
pres = []
pre = None
for i in range(testX.shape[0]):
    if pre is None:
        pre = lstm(testX[i].reshape(1, seq_length, 1))
    else:
        d = testX[i]
        d[-1] = pre.item()
        pre = lstm(d.reshape(1, seq_length, 1))
    pres.append(pre.item())
    
dp = sc.inverse_transform(np.asarray(pres).reshape(testX.shape[0], 1))
xpd = np.asarray(range(train_size, train_size + test_size))

In [None]:
train_predict = lstm(dataX)
data_predict = train_predict.data.numpy()
dataY_plot = dataY.data.numpy()

data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot)

plt.axvline(x=train_size, c='r', linestyle='--')

plt.plot(dataY_plot, label='真实数据')
plt.plot(data_predict, label='预测数据', alpha=0.7)
# plt.plot(xpd, dp, label='二次预测')
plt.suptitle('随机数序列预测')
plt.legend()
plt.savefig('../data/002162', dpi=300)
plt.show()

In [9]:
st = h5py.File('../data/sh000062.h5','r')

In [10]:
for i in st['/stock'].items():
    print(i)

('details', <HDF5 dataset "details": shape (0,), type "|V62">)
('kline15', <HDF5 dataset "kline15": shape (1022,), type "|V28">)
('kline30', <HDF5 dataset "kline30": shape (751,), type "|V28">)
('kline5', <HDF5 dataset "kline5": shape (1022,), type "|V28">)
('kline60', <HDF5 dataset "kline60": shape (471,), type "|V28">)
('stocks', <HDF5 dataset "stocks": shape (0,), type "|V40">)


In [11]:
df = pd.read_hdf('../data/sh000062.h5', '/stock/kline5')

In [13]:
df.head()

Unnamed: 0,date,open,high,close,low,volume
0,1595569800,2061.298096,2061.780029,2052.461914,2052.455078,52126300.0
1,1595570100,2052.141113,2057.0271,2055.866943,2051.787109,38288700.0
2,1595570400,2055.866943,2055.993896,2049.029053,2048.000977,51116700.0
3,1595570700,2049.025879,2049.025879,2043.984985,2042.780029,65422500.0
4,1595571000,2043.369995,2043.418945,2033.948975,2031.973999,75130800.0


In [14]:
date = df['date']
open = df['open']
high = df['high']
close = df['close']
low = df['low']
volume = df['volume']

In [15]:
date[0]

1595569800

In [None]:
print(pd.to_datetime(1594878000.0, unit='s'))
print(pd.Timestamp.fromtimestamp(1594878000.0))
print(pd.Timestamp.fromtimestamp(1.594878e+09))
print(pd.Timestamp.fromtimestamp(1594877952.0))

In [None]:
print(date[0])

# 1.594878e+09
pd.to_datetime('1594878000.0', unit='s')

In [None]:
pd.Timestamp.fromtimestamp('')

In [None]:
date.astype(np.str)

In [None]:
#date.astype(np.float).apply(lambda x: print(x))

In [16]:
date.apply(lambda x: pd.Timestamp.fromtimestamp(float(x)))

0      2020-07-24 13:50:00
1      2020-07-24 13:55:00
2      2020-07-24 14:00:00
3      2020-07-24 14:05:00
4      2020-07-24 14:10:00
               ...        
1017   2020-08-24 14:35:00
1018   2020-08-24 14:40:00
1019   2020-08-24 14:45:00
1020   2020-08-24 14:50:00
1021   2020-08-24 14:55:00
Name: date, Length: 1022, dtype: datetime64[ns]

In [None]:
# pd.to_datetime(df['date'])
print(date[0])
pd.to_datetime(df['date'], unit='s')

In [None]:
date.plot()

In [None]:
df.plot()

In [6]:
df = pd.read_hdf('/Users/d/code/python/xx/stock/sz002319.h5', '/stock/kline5')

In [7]:
df

Unnamed: 0,date,open,high,close,low,volume
0,1594263300,6.57,6.58,6.57,6.57,18600.0
1,1594263600,6.56,6.56,6.56,6.56,6100.0
2,1594263900,6.56,6.57,6.57,6.56,19400.0
3,1594264200,6.57,6.60,6.58,6.57,23100.0
4,1594264500,6.60,6.61,6.60,6.58,26400.0
...,...,...,...,...,...,...
1017,1598250900,7.63,7.66,7.64,7.63,40500.0
1018,1598251200,7.64,7.67,7.67,7.64,22200.0
1019,1598251500,7.67,7.77,7.74,7.66,97100.0
1020,1598251800,7.73,7.73,7.72,7.70,15300.0


In [8]:
df.date.apply(lambda x: pd.Timestamp.fromtimestamp(x))

0      2020-07-09 10:55:00
1      2020-07-09 11:00:00
2      2020-07-09 11:05:00
3      2020-07-09 11:10:00
4      2020-07-09 11:15:00
               ...        
1017   2020-08-24 14:35:00
1018   2020-08-24 14:40:00
1019   2020-08-24 14:45:00
1020   2020-08-24 14:50:00
1021   2020-08-24 14:55:00
Name: date, Length: 1022, dtype: datetime64[ns]