In [27]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler
import h5py
from stockstats import StockDataFrame

import plotly.graph_objects as go

In [2]:
%matplotlib inline

In [3]:
from matplotlib import rcParams
rcParams['figure.figsize'] = [16, 8]
rcParams['font.family'] = ['sans-serif']
rcParams['axes.unicode_minus'] = False 

In [4]:
from matplotlib.font_manager import _rebuild
_rebuild()

In [16]:
df = pd.read_hdf('/Users/d/code/python/left5/data/sh600236.h5', '/stock/kline5')

In [17]:
df

Unnamed: 0,date,open,high,close,low,volume
0,1595569800,4.36,4.36,4.35,4.34,366960.0
1,1595570100,4.35,4.35,4.35,4.34,71773.0
2,1595570400,4.35,4.35,4.34,4.33,142627.0
3,1595570700,4.34,4.35,4.32,4.32,362790.0
4,1595571000,4.32,4.33,4.32,4.31,151400.0
...,...,...,...,...,...,...
1210,1598596800,4.68,4.69,4.69,4.66,187100.0
1211,1598597100,4.69,4.69,4.68,4.68,56900.0
1212,1598597400,4.68,4.69,4.69,4.68,67562.0
1213,1598597700,4.69,4.69,4.68,4.68,87740.0


In [7]:
df.date.apply(lambda x: pd.Timestamp.fromtimestamp(x))

0      2020-07-24 13:50:00
1      2020-07-24 13:55:00
2      2020-07-24 14:00:00
3      2020-07-24 14:05:00
4      2020-07-24 14:10:00
               ...        
1080   2020-08-26 10:20:00
1081   2020-08-26 10:25:00
1082   2020-08-26 10:30:00
1083   2020-08-26 10:35:00
1084   2020-08-26 10:40:00
Name: date, Length: 1085, dtype: datetime64[ns]

In [12]:
training_set = df.to_numpy()[:, 1:]

In [13]:
sc = MinMaxScaler()
training_data = sc.fit_transform(training_set)

In [16]:
training_data

array([[0.07017541, 0.05000036, 0.05263114, 0.05660415, 0.07856112],
       [0.05263114, 0.03333331, 0.05263114, 0.05660415, 0.01498446],
       [0.05263114, 0.03333331, 0.0350877 , 0.0377358 , 0.03024483],
       ...,
       [0.85964918, 0.80000016, 0.85964918, 0.9245284 , 0.01782034],
       [0.84210575, 0.80000016, 0.85964918, 0.9245284 , 0.01128578],
       [0.84210575, 0.80000016, 0.84210575, 0.9245284 , 0.03741238]])

In [27]:
sc = MinMaxScaler()
sc.fit_transform(df[['open', 'high', 'close', 'low']].values)

array([[0.07017517, 0.05000019, 0.0526309 , 0.05660343],
       [0.0526309 , 0.0333333 , 0.0526309 , 0.05660343],
       [0.0526309 , 0.0333333 , 0.03508759, 0.03773594],
       ...,
       [0.8596487 , 0.8000002 , 0.8596487 , 0.9245281 ],
       [0.84210587, 0.8000002 , 0.8596487 , 0.9245281 ],
       [0.84210587, 0.8000002 , 0.84210587, 0.9245281 ]], dtype=float32)

In [41]:
def sliding_windows(data, seq_length, pre_size):
    x = []
    y = []

    for i in range(len(data)-seq_length-1):
        _x = data[i:(i+seq_length)]
        _y = data[i+seq_length:i+seq_length+pre_size]
        x.append(_x)
        y.append(_y)

    return np.array(x), np.array(y)

sc = MinMaxScaler()
training_data = sc.fit_transform(df[['open', 'high', 'close', 'low']].values)

seq_length = 4
x, y = sliding_windows(training_data, seq_length, 1)

train_size = int(len(y) * 0.67)
test_size = len(y) - train_size

dataX = Variable(torch.Tensor(np.array(x)))
dataY = Variable(torch.Tensor(np.array(y)))

trainX = Variable(torch.Tensor(np.array(x[:train_size])))
trainY = Variable(torch.Tensor(np.array(y[:train_size])))

testX = Variable(torch.Tensor(np.array(x[train_size:])))
testY = Variable(torch.Tensor(np.array(y[train_size:])))

In [42]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size))
        
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        
        h_out = h_out.view(-1, self.hidden_size)
        
        out = self.fc(h_out)
        
        return out

In [43]:
num_epochs = 3000
learning_rate = 0.01

input_size = 4
hidden_size = 2
num_layers = 1

num_classes = 4

lstm = LSTM(num_classes, input_size, hidden_size, num_layers)

criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(lstm.parameters(), lr=learning_rate)

# Train the model
for epoch in range(num_epochs):
    outputs = lstm(trainX)
    optimizer.zero_grad()
    
    # obtain the loss function
    loss = criterion(outputs, trainY)
    
    loss.backward()
    
    optimizer.step()
    if epoch % 100 == 0:
      print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

  return F.mse_loss(input, target, reduction=self.reduction)


Epoch: 0, loss: 0.42881
Epoch: 100, loss: 0.00445
Epoch: 200, loss: 0.00434
Epoch: 300, loss: 0.00431
Epoch: 400, loss: 0.00430
Epoch: 500, loss: 0.00429
Epoch: 600, loss: 0.00429
Epoch: 700, loss: 0.00429
Epoch: 800, loss: 0.00429
Epoch: 900, loss: 0.00429
Epoch: 1000, loss: 0.00429
Epoch: 1100, loss: 0.00429
Epoch: 1200, loss: 0.00429
Epoch: 1300, loss: 0.00429
Epoch: 1400, loss: 0.00429
Epoch: 1500, loss: 0.00429
Epoch: 1600, loss: 0.00429
Epoch: 1700, loss: 0.00429
Epoch: 1800, loss: 0.00429
Epoch: 1900, loss: 0.00429
Epoch: 2000, loss: 0.00429
Epoch: 2100, loss: 0.00429
Epoch: 2200, loss: 0.00429
Epoch: 2300, loss: 0.00429
Epoch: 2400, loss: 0.00429
Epoch: 2500, loss: 0.00429
Epoch: 2600, loss: 0.00429
Epoch: 2700, loss: 0.00429
Epoch: 2800, loss: 0.00429
Epoch: 2900, loss: 0.00429


In [None]:
lstm.eval()
train_predict = lstm(dataX)

data_predict = train_predict.data.numpy()
dataY_plot = dataY.data.numpy()

data_predict = sc.inverse_transform(data_predict)
dataY_plot = sc.inverse_transform(dataY_plot.reshape(-1, 4))

plt.axvline(x=train_size, c='r', linestyle='--')

plt.plot(dataY_plot[:,2], label=u'真实数据')
plt.plot(data_predict[:,2], label=u'预测数据')
plt.suptitle('随机数序列预测')
plt.legend()
plt.show()

In [18]:
dd = df

In [19]:
dd.date = df.date.apply(lambda x: pd.Timestamp.fromtimestamp(x))

In [28]:
stock = StockDataFrame.retype(dd)

In [20]:
dd

Unnamed: 0,date,open,high,close,low,volume
0,2020-07-24 13:50:00,4.36,4.36,4.35,4.34,366960.0
1,2020-07-24 13:55:00,4.35,4.35,4.35,4.34,71773.0
2,2020-07-24 14:00:00,4.35,4.35,4.34,4.33,142627.0
3,2020-07-24 14:05:00,4.34,4.35,4.32,4.32,362790.0
4,2020-07-24 14:10:00,4.32,4.33,4.32,4.31,151400.0
...,...,...,...,...,...,...
1210,2020-08-28 14:40:00,4.68,4.69,4.69,4.66,187100.0
1211,2020-08-28 14:45:00,4.69,4.69,4.68,4.68,56900.0
1212,2020-08-28 14:50:00,4.68,4.69,4.69,4.68,67562.0
1213,2020-08-28 14:55:00,4.69,4.69,4.68,4.68,87740.0


In [10]:
go.Candlestick?

In [21]:
INCREASING_COLOR = '#17BECF'
DECREASING_COLOR = '#7F7F7F'

In [22]:
dd['ma5'] = df.close.rolling(window=5, min_periods=1).mean()
dd['ma10'] = df.close.rolling(window=10, min_periods=1).mean()

In [23]:
from lutils.stock import plot_stock

In [24]:
fig = plot_stock(df)

In [25]:
fig.show()

In [31]:
def bbands(price, window_size=10, num_of_std=5):
    rolling_mean = price.rolling(window=window_size).mean()
    rolling_std  = price.rolling(window=window_size).std()
    upper_band = rolling_mean + (rolling_std*num_of_std)
    lower_band = rolling_mean - (rolling_std*num_of_std)
    return rolling_mean, upper_band, lower_band

bb_avg, bb_upper, bb_lower = bbands(dd.close)

In [32]:
colors = np.zeros(df.close.shape).astype(np.str)

In [33]:
colors[df.open >= df.close] = 'red'
colors[df.open < df.close] = 'green'

In [34]:
colors

array(['red', 'red', 'red', ..., 'green', 'red', 'red'], dtype='<U32')

##### https://chart-studio.plotly.com/~jackp/17421/plotly-candlestick-chart-in-python/#/

In [39]:
fig = go.Figure(data=[
    go.Candlestick(
#         x=listdd.index,
        open=dd.open,
        high=dd.high,
        low=dd.low,
        close=dd.close,
        name='000',
        yaxis='y2',
#         increasing = dict(line=dict(color='green')),
#         decreasing = dict(line=dict(color='red'))
    ),
    
    go.Scatter(y=df.ma5, line=dict(color='orange', width=1), name='MA 5', yaxis='y2'),
#     go.Scatter(y=df.ma10, line=dict(color='green', width=1), name='MA 10', yaxis='y2'),
    
    go.Scatter(y=bb_avg, line=dict(color='#ccc', width=1), yaxis='y2', legendgroup='Bollinger Bands', name='Boll Avg 10', showlegend=False),
    go.Scatter(y=bb_upper, line=dict(color='#ccc', width=1), legendgroup='Bollinger Bands', name='Boll Upper', yaxis='y2'),
    go.Scatter(y=bb_lower, line=dict(color='#ccc', width=1), legendgroup='Bollinger Bands', name='Boll Lower', showlegend=False, yaxis='y2'),
    
#     go.Bar(y=dd.volume, yaxis='y', name='volume', marker=dict(color=colors)),
    go.Scatter(y=stock.get('macd'), line=dict(width=1), yaxis='y', name='MACD'),
    go.Scatter(y=stock.get('macds'), line=dict(width=1), yaxis='y', name='MACDS'),
    go.Scatter(y=stock.get('macdh'), line=dict(width=1), yaxis='y', name='MACDH')
])

fig.update_layout(
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=.8,
        xanchor="right",
        x=1
    ), yaxis=dict(
        domain = [0, 0.2],
        showticklabels = False
    ), yaxis2=dict(
        domain = [0.2, 0.8]
    ), margin=dict(
        l=10,
        r=10,
        b=10,
        t=10,
        pad=1
    ), xaxis=dict(
        rangeselector = dict(
            visible = True,
#             x = 0, y = 0,
            buttons = list([
                dict(step="all"),
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        #type='category', #['-', 'linear', 'log', 'date', 'category', 'multicategory']
    ),
    hovermode="x unified"
)

# rangeselector=dict(
#     visibe = True,
#     x = 0, y = 0.9,
#     bgcolor = 'rgba(150, 200, 250, 0.4)',
#     font = dict( size = 13 ),
#     buttons=list([
#         dict(count=1,
#              label='reset',
#              step='all'),
#         dict(count=1,
#              label='1yr',
#              step='year',
#              stepmode='backward'),
#         dict(count=3,
#             label='3 mo',
#             step='month',
#             stepmode='backward'),
#         dict(count=1,
#             label='1 mo',
#             step='month',
#             stepmode='backward'),
#         dict(step='all')
#     ]))
# y=df.Volume, 
# marker=dict( color=colors ),
# type='bar', yaxis='y', name='Volume'

fig.show()