In [None]:
import tensorflow as tf
import pandas as pd
import seaborn as sns
import talib as ta
import numpy as np
import matplotlib.pyplot as plt
tf.config.list_physical_devices('GPU')

In [None]:
def preprocess_data(data: pd.DataFrame):
    """预处理数据，计算指标"""
    data['RSI'] = ta.RSI(data['close'], timeperiod=14)
    data['volatility'] = data['close'].rolling(20).std()
    data['EMA26'] = data['close'].ewm(span=26, adjust=False).mean()
    data['EMA12'] = data['close'].ewm(span=12, adjust=False).mean()
    data['MACD'], data['Signal'], data['Hist'] = ta.MACD(data['close'], fastperiod=12, slowperiod=26, signalperiod=9)
    # 金叉/死叉标记
    data['golden_cross'] = ((data['MACD'] > data['Signal']) & 
                        (data['MACD'].shift(1) <= data['Signal'].shift(1))).astype(int)
    data['death_cross'] = ((data['MACD'] < data['Signal']) & 
                        (data['MACD'].shift(1) >= data['Signal'].shift(1))).astype(int)

    # 最近N期的交叉情况
    for n in [3, 5, 10]:
        data[f'golden_cross_{n}d'] = data['golden_cross'].rolling(n).max()
        data[f'death_cross_{n}d'] = data['death_cross'].rolling(n).max()
    # 顶背离（价格新高但MACD未新高）
    data['price_peak'] = data['close'].rolling(5, center=True).max() == data['close']
    data['macd_peak'] = data['MACD'].rolling(5, center=True).max() == data['MACD']
    data['top_divergence'] = (data['price_peak'] & 
                            (data['MACD'] < data['MACD'].shift(3))).astype(int)

    # 底背离（价格新低但MACD未新低）
    data['price_valley'] = data['close'].rolling(5, center=True).min() == data['close']
    data['macd_valley'] = data['MACD'].rolling(5, center=True).min() == data['MACD']
    data['bottom_divergence'] = (data['price_valley'] & 
                                (data['MACD'] > data['MACD'].shift(3))).astype(int)

    # MACD线瞬时斜率
    data['macd_slope'] = data['MACD'].diff()
    # 移动平均斜率（捕捉趋势强度）
    for window in [3, 5, 10]:
        data[f'macd_slope_ma{window}'] = data['macd_slope'].rolling(window).mean()

    # MACD柱状图变化率
    # data['hist_change_rate'] = data['Hist'].pct_change()

    # 量价配合特征
    data['macd_volume'] = data['MACD'] * data['volume'].pct_change(3)

    # 多周期MACD相关性
    data['macd_12_24_corr'] = data['MACD'].rolling(24).corr(data['MACD'].rolling(12).mean())
    return data.dropna()

In [None]:
df = pd.read_csv('stock.csv', parse_dates=['date'])
df = preprocess_data(df)

In [None]:
# fft = tf.signal.rfft(df['close'])
# f_per_dataset = np.arange(0, len(fft))

# n_samples_h = len(df['close'])
# hours_per_year = 24*2
# years_per_dataset = n_samples_h/(hours_per_year)

# f_per_year = f_per_dataset/years_per_dataset
# plt.step(f_per_year, np.abs(fft))
# plt.xscale('log')
# plt.ylim(0, 400000)
# plt.xlim([0.1, max(plt.xlim())])
# plt.xticks([1, 48], labels=['1/day', '30min'])
# _ = plt.xlabel('Frequency (log scale)')

In [None]:
column_indices = {name: i for i, name in enumerate(df.columns)}

print(column_indices)

n = len(df)
train_df = df[0:int(n*0.7)]
val_df = df[int(n*0.7):int(n*0.9)]
test_df = df[int(n*0.9):]

num_features = df.shape[1]

train_mean = train_df.mean()
train_std = train_df.std()

train_df = (train_df - train_mean) / train_std
val_df = (val_df - train_mean) / train_std
test_df = (test_df - train_mean) / train_std

df_std = (df - train_mean) / train_std
df_std = df_std.melt(var_name='Column', value_name='Normalized')
plt.figure(figsize=(12, 6))
ax = sns.violinplot(x='Column', y='Normalized', data=df_std)
_ = ax.set_xticklabels(df.keys(), rotation=90)

In [None]:
class WindowGenerator():
  def __init__(self, input_width, label_width, shift,
               train_df=train_df, val_df=val_df, test_df=test_df,
               label_columns=None):
    # Store the raw data.
    self.train_df = train_df
    self.val_df = val_df
    self.test_df = test_df

    # Work out the label column indices.
    self.label_columns = label_columns
    if label_columns is not None:
      self.label_columns_indices = {name: i for i, name in
                                    enumerate(label_columns)}
    self.column_indices = {name: i for i, name in
                           enumerate(train_df.columns)}

    # Work out the window parameters.
    self.input_width = input_width
    self.label_width = label_width
    self.shift = shift

    self.total_window_size = input_width + shift

    self.input_slice = slice(0, input_width)
    self.input_indices = np.arange(self.total_window_size)[self.input_slice]

    self.label_start = self.total_window_size - self.label_width
    self.labels_slice = slice(self.label_start, None)
    self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

  def __repr__(self):
    return '\n'.join([
        f'Total window size: {self.total_window_size}',
        f'Input indices: {self.input_indices}',
        f'Label indices: {self.label_indices}',
        f'Label column name(s): {self.label_columns}'])
  

  def split_window(self, features):
    """给定一个连续输入的列表，split_window 方法会将它们转换为输入窗口和标签窗口"""
    inputs = features[:, self.input_slice, :]
    labels = features[:, self.labels_slice, :]
    if self.label_columns is not None:
        labels = tf.stack(
            [labels[:, :, self.column_indices[name]] for name in self.label_columns],
            axis=-1)

    # Slicing doesn't preserve static shape information, so set the shapes
    # manually. This way the `tf.data.Datasets` are easier to inspect.
    inputs.set_shape([None, self.input_width, None])
    labels.set_shape([None, self.label_width, None])
    return inputs, labels

In [None]:
w2 = WindowGenerator(input_width=6, label_width=1, shift=1,
                     label_columns=['close'])
example_window = tf.stack([np.array(train_df[:w2.total_window_size]),
                           np.array(train_df[100:100+w2.total_window_size]),
                           np.array(train_df[200:200+w2.total_window_size])])
example_inputs, example_labels = w2.split_window(example_window)

print('All shapes are: (batch, time, features)')
print(f'Window shape: {example_window.shape}')
print(f'Inputs shape: {example_inputs.shape}')
print(f'Labels shape: {example_labels.shape}')

w2.example = example_inputs, example_labels

In [None]:
def plot(self, model=None, plot_col='close', max_subplots=3):
  inputs, labels = self.example
  plt.figure(figsize=(12, 8))
  plot_col_index = self.column_indices[plot_col]
  max_n = min(max_subplots, len(inputs))
  for n in range(max_n):
    plt.subplot(max_n, 1, n+1)
    plt.ylabel(f'{plot_col} [normed]')
    plt.plot(self.input_indices, inputs[n, :, plot_col_index],
             label='Inputs', marker='.', zorder=-10)

    if self.label_columns:
      label_col_index = self.label_columns_indices.get(plot_col, None)
    else:
      label_col_index = plot_col_index

    if label_col_index is None:
      continue

    plt.scatter(self.label_indices, labels[n, :, label_col_index],
                edgecolors='k', label='Labels', c='#2ca02c', s=64)
    if model is not None:
      predictions = model(inputs)
      plt.scatter(self.label_indices, predictions[n, :, label_col_index],
                  marker='X', edgecolors='k', label='Predictions',
                  c='#ff7f0e', s=64)

    if n == 0:
      plt.legend()

  plt.xlabel('date')

WindowGenerator.plot = plot
w2.plot()

In [None]:
def make_dataset(self, data):
  data = np.array(data, dtype=np.float32)
  ds = tf.keras.utils.timeseries_dataset_from_array(
      data=data,
      targets=None,
      sequence_length=self.total_window_size,
      sequence_stride=1,
      shuffle=True,
      batch_size=32,)

  ds = ds.map(self.split_window)

  return ds

WindowGenerator.make_dataset = make_dataset

In [None]:
@property
def train(self):
  return self.make_dataset(self.train_df)

@property
def val(self):
  return self.make_dataset(self.val_df)

@property
def test(self):
  return self.make_dataset(self.test_df)

@property
def example(self):
  """Get and cache an example batch of `inputs, labels` for plotting."""
  result = getattr(self, '_example', None)
  if result is None:
    # No example batch was found, so get one from the `.train` dataset
    result = next(iter(self.train))
    # And cache it for next time
    self._example = result
  return result

WindowGenerator.train = train
WindowGenerator.val = val
WindowGenerator.test = test
WindowGenerator.example = example

w2.train.element_spec
for example_inputs, example_labels in w2.train.take(1):
  print(f'Inputs shape (batch, time, features): {example_inputs.shape}')
  print(f'Labels shape (batch, time, features): {example_labels.shape}')