In [None]:
import librosa
import pandas as pd
import numpy as np

In [None]:
df = pd.read_feather('CRSP_daily_data_for_project(Technical_Analysis).feather')

cqt_window_size = 30
n_bins = 20  

def create_cqt_df(stock_df):
    stock_returns = stock_df['vwretx'].values

    # 数据预处理：确保所有值是有限的
    stock_returns = np.nan_to_num(stock_returns, nan=0.0, posinf=0.0, neginf=0.0)

    sr = 1
    fmin = 0.01  # 最小频率
    hop_length = 1
    cqt_result = librosa.cqt(stock_returns, n_bins=n_bins, sr=sr, hop_length=hop_length, fmin=fmin)
    cqt_result_db = librosa.amplitude_to_db(np.abs(cqt_result), ref=np.mean)
    cqt_df = pd.DataFrame(cqt_result_db.T, columns=[f'CQT_{i+1}' for i in range(cqt_result_db.shape[0])])
    cqt_df = cqt_df.loc[:len(stock_df)-1, :]  # 确保数据长度与原始数据匹配
    cqt = pd.concat([stock_df['date'], cqt_df], axis=1)
    return cqt

In [None]:
stock_codes = df['PERMNO'].unique()
unique_dates = df['date'].unique()

num_dates = len(unique_dates)
max_shape = (len(stock_codes), num_dates, cqt_window_size, n_bins)
final_4d_array = np.full(max_shape, np.nan)

date_to_index = {date: idx for idx, date in enumerate(unique_dates)}


# Function to create rolling windows
def create_rolling_windows(data, cqt_window_size):
    windows = []
    date_indices = []
    for i in range(len(data) - cqt_window_size + 1):
        window = data.iloc[i:i + cqt_window_size].values[:, 1:]  
        windows.append(window)
        start_date = data.iloc[i]['date']
        if start_date in date_to_index:
            date_indices.append(date_to_index[start_date])
    return np.array(windows), date_indices

In [None]:
for stock_idx, stock_code in enumerate(stock_codes):
    stock_data = df[df['PERMNO'] == stock_code].reset_index(drop=True)
    
    # create CQT data
    cqt_data = create_cqt_df(stock_data)
    
    rolling_windows_3d, date_indices = create_rolling_windows(cqt_data, cqt_window_size)
    
    # fill the 4d array
    for window_idx, (window_data, date_idx) in enumerate(zip(rolling_windows_3d, date_indices)):
        if window_data.shape == (cqt_window_size, n_bins):
            final_4d_array[stock_idx, date_idx, :, :] = window_data


final_4d_array