In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

In [3]:
para_list = ['LATP', 'LONP','RALT','GS','TAS','IVV','BLAC','CTAC','FPAC','LATG','N1_1','FLAP','PTCH','ROLL','AOA1',\
            'AIL_1','ELEV_1','RUDD','LOC','GLS','ALT','LATG','PTRM','LONG','OIT_1']

def load_data(read_url):
    print('Loading data ...')
    train_y_list = np.load(read_url + 'train_y_list.npy', allow_pickle=True)
    test_y_list = np.load(read_url + 'test_y_list.npy', allow_pickle=True)
    train_X_list = np.load(read_url + 'train_x_list.npy', allow_pickle=True)
    test_X_list = np.load(read_url + 'test_x_list.npy', allow_pickle=True)

    print("Data shapes: ")
    print(f"train_x_list: {train_X_list.shape}")
    print(f"training data within each fold: {train_X_list[0].shape}")
    print(f"train_y_list: {train_y_list.shape}")
    print(f"training label within each fold: {train_y_list[0].shape}")
    
    return train_y_list, test_y_list, train_X_list, test_X_list

def reshape_data(data):
    # Ensure data is a 3D array: (samples, timesteps, features)
    num_samples, num_timesteps, num_features = data.shape
    
    # Flatten the data to 2D for standardization
    flattened_data = data.reshape(num_samples * num_timesteps, num_features)
    return flattened_data, (num_samples, num_timesteps, num_features)

def unflatten_data(flat_data, original_shape):
    num_samples, num_timesteps, num_features = original_shape
    return flat_data.reshape(num_samples, num_timesteps, num_features)

def preprocess_data(train_X_list, test_X_list):
    print('Preprocessing data ...')
    # Find indices of 'TAS' and 'GS'
    excluded_tags = ['TAS', 'GS']
    excluded_index = [para_list.index(tag) for tag in excluded_tags if tag in para_list]
    
    # Calculate indices to keep
    keep_index = np.sort(list(set(np.arange(train_X_list[0].shape[-1])) - set(excluded_index)))

    # Filter the data
    train_x_list_filtered = [data[:, :, keep_index] for data in train_X_list]
    test_x_list_filtered = [data[:, :, keep_index] for data in test_X_list]
    
    # Initialize the scaler
    scaler = StandardScaler()

    # Standardize each 3D array in the list
    for i in range(len(train_X_list)):  # Adjust this range based on the actual length of your lists
        # Reshape train and test data
        flattened_train_data, train_shape = reshape_data(train_x_list_filtered[i])
        flattened_test_data, test_shape = reshape_data(test_x_list_filtered[i])

        # Fit scaler on train data and transform both train and test data
        scaler.fit(flattened_train_data)
        train_standardized = scaler.transform(flattened_train_data)
        test_standardized = scaler.transform(flattened_test_data)

        # Unflatten data back to 3D
        train_x_list_filtered[i] = unflatten_data(train_standardized, train_shape)
        test_x_list_filtered[i] = unflatten_data(test_standardized, test_shape)

    return train_x_list_filtered, test_x_list_filtered