In [6]:
import scipy.io
import pickle
import os
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random
from IPython.display import display, HTML

import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.layers import Input, Conv1D, BatchNormalization, Activation, add, LSTM, Dense, Dropout,GRU, Bidirectional, MaxPooling1D
from tensorflow.keras.backend import clear_session

from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, classification_report
from sklearn.decomposition import PCA

from scipy.io import loadmat

tf.random.set_seed(42)
np.random.seed(42)

In [7]:
directory = 'D:/OneDrive/DXLab_Vu/BaiBao/Trong nuoc/2024/Paper-2024-1DCNNLSTM-Khung/Code\Data'

all_data = {}

# Iterate over all files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.mat'):
        filepath = os.path.join(directory, filename)
        
        # Load the .mat file and add its contents to the dictionary
        mat_data = loadmat(filepath)
        
        # Use filename (without extension) as key for the data
        key = os.path.splitext(filename)[0]
        row_means = np.mean(mat_data['acceleration'],1)
        rows_2_keep = row_means != 0
        all_data[key] = mat_data['acceleration'][rows_2_keep]
        
keys_to_stack = [f'spaceframe{i}' for i in range(1,11)]
input_data = np.stack([all_data[key] for key in keys_to_stack], axis=0)

# Create the corresponding labels
output_labels = np.linspace(0,10,11)  # Using 0 and 1 as class labels for binary cross-entropy
label = output_labels

input_data = input_data[:,:,:10000]
input_data.shape, output_labels.shape

((10, 24, 10000), (11,))

In [8]:
def check_for_different_shapes(arrays):
    """
    Kiểm tra xem các mảng trong danh sách có kích thước không đồng nhất không.

    Parameters:
        arrays (list): Danh sách các mảng NumPy.

    Returns:
        list: Danh sách các mảng không đồng nhất.
    """
    inhomogeneous_arrays = []
    expected_shape = None
    for array in arrays:
        if expected_shape is None:
            expected_shape = array.shape
        elif array.shape != expected_shape:
            inhomogeneous_arrays.append(array)
    return inhomogeneous_arrays

def augment_time_series_data(input_data, labels, num_augmentations=5):
    """
    Augment time series data.

    :param input_data: Original time series data array.
    :param labels: Corresponding labels for the data.
    :param num_augmentations: Number of augmented samples to generate per original sample.

    :return: Augmented data array and corresponding labels.
    """
    augmented_data = []
    augmented_labels = []

    num_samples, num_channels, sequence_length = input_data.shape

    for i in range(num_samples):
        for _ in range(num_augmentations):
            # Choose a random augmentation technique
            augmentation_type = random.choices(['noise', 'reverse', 'crop_pad', 'time_warp', 'random_shift'],
                                               weights=[0.6, 0.1, 0.1, 0.1, 0.1])[0]

            if augmentation_type == 'noise':
                # Add random noise
                noise = np.random.normal(0, 0.00005, input_data[i].shape)
                augmented_sample = input_data[i] + noise

            elif augmentation_type == 'reverse':
                # Reverse the sequence
                augmented_sample = np.flip(input_data[i], axis=-1)

            elif augmentation_type == 'crop_pad':
                # Crop and pad the sequence
                crop_size = random.randint(1, sequence_length // 100)
                padded_sample = np.pad(input_data[i], ((0, 0), (crop_size, 0)), mode='constant', constant_values=0)
                augmented_sample = padded_sample[:, :-crop_size]

            elif augmentation_type == 'time_warp':
                # Time warping
                start_idx = random.randint(0, sequence_length // 2)
                end_idx = random.randint(start_idx, sequence_length)
                warped_segment = np.mean(input_data[i][:, start_idx:end_idx], axis=1, keepdims=True)
                augmented_sample = np.concatenate((warped_segment, input_data[i][:, end_idx:]), axis=1)

            elif augmentation_type == 'random_shift':
                # Random shifting
                shift_amount = random.randint(-(sequence_length // 10), sequence_length // 10)
                augmented_sample = np.roll(input_data[i], shift_amount, axis=-1)

            if augmented_sample.shape == (num_channels, sequence_length):
                augmented_data.append(augmented_sample)
                augmented_labels.append(labels[i])
            else:
                print("Invalid shape:", augmented_sample.shape)

    # Convert to numpy arrays
    # Sử dụng hàm
    inhomogeneous_arrays = check_for_different_shapes(augmented_data)
    if inhomogeneous_arrays:
        print("Các mảng không đồng nhất:")
        for array in inhomogeneous_arrays:
            print(array.shape)
    else:
        print("Tất cả các mảng có kích thước giống nhau.")

    return np.array(augmented_data), np.array(augmented_labels)

# Sử dụng hàm
augmented_data, augmented_labels = augment_time_series_data(input_data, output_labels, num_augmentations=30)
print(augmented_data.shape, augmented_labels.shape)

Invalid shape: (24, 8942)
Invalid shape: (24, 5682)
Invalid shape: (24, 5492)
Invalid shape: (24, 5792)
Invalid shape: (24, 1971)
Invalid shape: (24, 5581)
Invalid shape: (24, 1874)
Invalid shape: (24, 7889)
Invalid shape: (24, 4121)
Invalid shape: (24, 5474)
Invalid shape: (24, 1069)
Invalid shape: (24, 4163)
Invalid shape: (24, 2764)
Invalid shape: (24, 985)
Invalid shape: (24, 2335)
Invalid shape: (24, 1864)
Invalid shape: (24, 2912)
Invalid shape: (24, 7259)
Invalid shape: (24, 5081)
Invalid shape: (24, 6907)
Invalid shape: (24, 2692)
Invalid shape: (24, 6947)
Invalid shape: (24, 1326)
Invalid shape: (24, 5816)
Invalid shape: (24, 6992)
Invalid shape: (24, 4235)
Tất cả các mảng có kích thước giống nhau.
(274, 24, 10000) (274,)


In [9]:
import numpy as np

def reshape_time_series_data_v8(input_data, label_data, segments_per_new_sample, segment_length):
    """
    Reshape time series data and corresponding labels into a specified shape.

    :param input_data: Original time series data array.
    :param label_data: Corresponding labels for the data.
    :param segments_per_new_sample: Number of segments per new sample.
    :param segment_length: Length of each segment.

    :return: Reshaped data array and corresponding labels.
    """
    num_samples_original, num_channels, length_original = input_data.shape

    # Validate the feasibility of reshaping
    if length_original % segment_length != 0:
        raise ValueError("Segment length must evenly divide the original length.")

    total_segments_per_original_sample = (length_original // segment_length) * num_channels
    num_samples_new = (num_samples_original * total_segments_per_original_sample) // segments_per_new_sample

    # Validate if reshaping is possible
    if (num_samples_original * total_segments_per_original_sample) % segments_per_new_sample != 0:
        raise ValueError("Reshaping not possible with the given dimensions.")

    # Initialize reshaped data and labels
    new_shape = (num_samples_new, segments_per_new_sample, segment_length)
    reshaped_data = np.zeros(new_shape)
    reshaped_labels = np.zeros(num_samples_new)

    # Reshape the data and labels
    count = 0
    for i in range(num_samples_original):
        segment_count = 0
        for j in range(num_channels):
            for k in range(length_original // segment_length):
                start_idx = k * segment_length
                end_idx = start_idx + segment_length
                reshaped_data[count, segment_count % segments_per_new_sample, :] = input_data[i, j, start_idx:end_idx]
                if (segment_count + 1) % segments_per_new_sample == 0:
                    reshaped_labels[count] = label_data[i]  # Assign corresponding label
                    count += 1
                segment_count += 1

    return reshaped_data, reshaped_labels

# Example usage
segments_per_new_sample = 10
segment_length = 2000

# Assume 'augmented_data' and 'augmented_labels' are your input data and labels
reshaped_data, reshaped_labels = reshape_time_series_data_v8(augmented_data, augmented_labels, segments_per_new_sample, segment_length)
print(reshaped_data.shape, reshaped_labels.shape)

(3288, 10, 2000) (3288,)


In [26]:
reshaped_data.shape[1]

10

In [30]:
reshaped_data.shape[0]

3288

In [39]:

X_2d = reshaped_data.reshape(-1, reshaped_data.shape[2])  # (1000 * 10, 2000)

# Áp dụng PCA để giảm số timesteps từ 2000 xuống 500
n_components = 500
pca = PCA(n_components=n_components)
X_2d_reduced = pca.fit_transform(X_2d)  # (1000 * 10, 500)

# Chuyển lại thành dạng 3D
X_reduced = X_2d_reduced.reshape(reshaped_data.shape[0], reshaped_data.shape[1], n_components)  # (1000, 10, 500)
print("Kích thước dữ liệu sau PCA:", X_reduced.shape)

Kích thước dữ liệu sau PCA: (3288, 10, 500)


In [40]:
input_train = reshaped_data
output_train = reshaped_labels

# input_train = X_reduced
# output_train = reshaped_labels

# input_train = augmented_data
# output_train = augmented_labels

X_train, X_temp, y_train, y_temp = train_test_split(input_train, output_train, test_size=0.4, random_state=42)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

print("X_train's shape:" + str(X_train.shape))
print("y_train's shape:" + str(y_train.shape))
print("X_test's shape:" + str(X_test.shape))
print("y_test's shape:" + str(y_test.shape))
print("X_val's shape:" + str(X_valid.shape))
print("y_val's shape:" + str(y_valid.shape))

X_train's shape:(1972, 10, 2000)
y_train's shape:(1972,)
X_test's shape:(658, 10, 2000)
y_test's shape:(658,)
X_val's shape:(658, 10, 2000)
y_val's shape:(658,)


In [41]:
label=np.unique(y_train)
print('Label = ' + str(label))
num_classes = len(np.unique(y_train))
print('No. Labels: ' + str(num_classes))

Label = [0. 1. 2. 3. 4. 5. 6. 7. 8. 9.]
No. Labels: 10


In [42]:
clear_session()

def build_CNN_BiGRU_model(input_shape, num_classes):
    # Định nghĩa input tensor
    input_tensor = Input(shape=input_shape)  # input_shape: (timesteps, features), ví dụ (10, 2000)

    # 1D CNN layers để trích xuất đặc trưng không gian
    x = Conv1D(filters=128, kernel_size=3, activation='relu', padding='same')(input_tensor)
    x = MaxPooling1D(pool_size=2)(x)  # Giảm kích thước chuỗi (timesteps) xuống một nửa
    x = Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(x)
    x = Dropout(0.3)(x)  # Thêm dropout để giảm overfitting

    # BiGRU layers để học thông tin tuần tự
    x = Bidirectional(GRU(200, return_sequences=True))(x)  # Lớp BiGRU đầu tiên
    x = Bidirectional(GRU(200, return_sequences=True, dropout=0.5))(x)  # Lớp BiGRU thứ hai với dropout
    x = Flatten()(x)  # Chuyển thành vector 1D để kết nối với Dense layers

    # Dense layers để phân loại
    x = Dense(100, activation='relu')(x)
    output_tensor = Dense(num_classes, activation='softmax')(x)  # Lớp đầu ra với softmax
    model = tf.keras.Model(inputs=input_tensor, outputs=output_tensor)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Run the model on GPU if available
with tf.device('/GPU:0'):
    DCNN_BiGRU_model = build_CNN_BiGRU_model((X_train.shape[1], X_train.shape[2]), num_classes)  # Adjusted input shape to have 3 dimensions
    
    DCNN_BiGRU_model.summary()

    # Early stopping callback
    early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=100, restore_best_weights=True)

    # Train the model
    history_DCNN_BiGRU = DCNN_BiGRU_model.fit(X_train, y_train, batch_size=32, epochs=100, validation_data=(X_valid, y_valid))

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 10, 2000)]        0         
                                                                 
 conv1d (Conv1D)             (None, 10, 128)           768128    
                                                                 
 max_pooling1d (MaxPooling1D  (None, 5, 128)           0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, 5, 64)             24640     
                                                                 
 dropout (Dropout)           (None, 5, 64)             0         
                                                                 
 bidirectional (Bidirectiona  (None, 5, 400)           319200    
 l)                                                          