In [39]:
import akshare as ak 
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [27]:
sz_index_df = ak.index_zh_a_hist(symbol="000001", period="daily", start_date="19910701", end_date="20200831")

In [28]:
# Drop the columns
sz_index_df = sz_index_df.drop(['振幅', '换手率'],axis=1)
# Then reorder the remaining columns
sz_index_df = sz_index_df[['日期', '开盘', '最高','最低','收盘','成交量','成交额','涨跌额','涨跌幅']]
# Directly assign the new column names
sz_index_df.columns = ['date','Opening price', 'Highest price', 'Lowest price','Closing price', 'Volume(share)', 'Turnover(RMB)', 'Ups and downs', 'Change(%)']
# volumn*100
sz_index_df['Volume(share)']=sz_index_df['Volume(share)']*100
# Set date as index
sz_index_df = sz_index_df.set_index('date')

In [41]:
# Split the data into training and test sets
split_point = len(sz_index_df) - 500
train_df = sz_index_df.iloc[:split_point]
test_df = sz_index_df.iloc[split_point:]

In [42]:
# standardize data
scaler = StandardScaler()
scaler.fit(train_df)
train_set_standardized = scaler.transform(train_df)
test_set_standardized = scaler.transform(test_df)


In [54]:
def create_sequences_and_targets(data, time_steps=10, n_features=8):
    """
    Create sequences of time_steps length and corresponding targets from standardized data.

    Parameters:
    data (array-like): Input two-dimensional standardized data (num_samples, num_features).
    time_steps (int): The size of the time step sequences.
    n_features (int): The number of features in each time step.

    Returns:
    X, y: Tuple of numpy arrays
          X is three-dimensional data of shape (None, time_steps, n_features) for the CNN-LSTM input.
          y is a one-dimensional array of targets, which are the next values following each sequence.
    """
    X, y = [], []

    for i in range(len(data) - time_steps):
        X.append(data[i:i + time_steps, :n_features])
        y.append(data[i + time_steps, :n_features])  # Assuming we're predicting the next full feature set

    return np.array(X), np.array(y)

# Assuming 'train_set_standardized' is your standardized training data as a two-dimensional NumPy array.
# Now let's transform 'train_set_standardized' into the required three-dimensional shape and get the targets.
train_sequences, train_targets = create_sequences_and_targets(train_set_standardized)
test_sequences, test_targets = create_sequences_and_targets(test_set_standardized)
