### Bibliotecas

In [236]:
"""
%pip install aeon
%pip install tsfresh
%pip install tslearn
%pip install tensorflow
%pip install keras

"""

'\n%pip install aeon\n%pip install tsfresh\n%pip install tslearn\n%pip install tensorflow\n%pip install keras\n\n'

In [237]:
import pandas as pd
import numpy as np

from aeon.datasets import load_classification
from aeon.datasets.tsc_data_lists import univariate_equal_length
from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import PiecewiseAggregateApproximation, SymbolicAggregateApproximation

import pywt

from sklearn.model_selection import LeaveOneOut
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.linear_model import RidgeClassifierCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.naive_bayes import GaussianNB

from scipy.fftpack import fft
from numba import jit
from tqdm import tqdm
import timeit
from datetime import timedelta

import warnings
warnings.filterwarnings("ignore")

### Time Series Classifier Dynamic Series Representation

In [238]:
class TimeSeriesClassifier:
    def __init__(self, random_state=42):
        self.random_state = random_state
        self.trained_models = {}
        """_summary_

        Returns:
            _type_: _description_
        """
    
    @staticmethod  
    def choose_wavelet(X):
        min_variance = float('inf')
        best_wavelet = None
        candidate_wavelets = ['db1', 'db2', 'db3', 'db4', 'db5', 'db6', 'db7', 'db8', 'db9']

        for wavelet_type in candidate_wavelets:
            _, coeffs_cD = pywt.dwt(X, wavelet_type, axis=1)
            total_variance = np.var(coeffs_cD)

            if total_variance < min_variance:
                min_variance = total_variance
                best_wavelet = wavelet_type
        return str(best_wavelet)


    @staticmethod
    def transform_data_math(X, wavelet):
        n_sax_symbols = int(X.shape[1] / 4)
        n_paa_segments = int(X.shape[1] / 4)

        X_fft = np.abs(fft(X, axis=1))

        coeffs_cA, coeffs_cD = pywt.dwt(X, wavelet=wavelet, axis=1, mode='constant')
        X_dwt = np.hstack((coeffs_cA, coeffs_cD))

        paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
        X_paa_ = paa.inverse_transform(paa.fit_transform(X))
        X_paa = X_paa_.reshape(X_paa_.shape[0], -1)
        stats_PAA = np.hstack([np.mean(X_paa, axis=1).reshape(-1,1),
                           np.std(X_paa, axis=1).reshape(-1,1),
                           np.max(X_paa, axis=1).reshape(-1,1),
                           np.min(X_paa, axis=1).reshape(-1,1),
                           ])

        sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
        X_sax_ = sax.inverse_transform(sax.fit_transform(X))
        X_sax = X_sax_.reshape(X_sax_.shape[0], -1)
        stats_SAX = np.hstack([np.mean(X_sax, axis=1).reshape(-1,1),
                           np.std(X_sax, axis=1).reshape(-1,1),
                           np.max(X_sax, axis=1).reshape(-1,1),
                           np.min(X_sax, axis=1).reshape(-1,1),
                           ])

        data_X = TimeSeriesScalerMeanVariance().fit_transform(X)
        data_X.resize(data_X.shape[0], data_X.shape[1])
        stats_X = np.hstack([np.mean(data_X, axis=1).reshape(-1,1),
                         np.std(data_X, axis=1).reshape(-1,1),
                         np.max(data_X, axis=1).reshape(-1,1),
                         np.min(data_X, axis=1).reshape(-1,1),
                         ])

        data_FFT = TimeSeriesScalerMeanVariance().fit_transform(X_fft)
        data_FFT.resize(data_FFT.shape[0], data_FFT.shape[1])
        stats_FFT = np.hstack([np.mean(data_FFT, axis=1).reshape(-1,1),
                           np.std(data_FFT, axis=1).reshape(-1,1),
                           np.max(data_FFT, axis=1).reshape(-1,1),
                           np.min(data_FFT, axis=1).reshape(-1,1),
                           ])

        data_DWT = TimeSeriesScalerMeanVariance().fit_transform(X_dwt)
        data_DWT.resize(data_DWT.shape[0], data_DWT.shape[1])
        stats_DWT = np.hstack([np.mean(data_DWT, axis=1).reshape(-1,1),
                           np.std(data_DWT, axis=1).reshape(-1,1),
                           np.max(data_DWT, axis=1).reshape(-1,1),
                           np.min(data_DWT, axis=1).reshape(-1,1),
                           ])

        return {
            "TS": np.hstack([data_X, stats_X]),
            "FFT": np.hstack([data_FFT, stats_FFT]),
            "DWT": np.hstack([data_DWT, stats_DWT]),
            "PAA": np.hstack([X_paa, stats_PAA]),
            "SAX": np.hstack([X_sax, stats_SAX])
        }

    
    @staticmethod
    def select_model(option, random_state):
        if option == '1nn':
            return KNeighborsTimeSeriesClassifier(distance='euclidean', n_neighbors=1, n_jobs=-1)
        elif option == '3nn':
            return KNeighborsTimeSeriesClassifier(distance='dtw', n_neighbors=3, n_jobs=-1)
        elif option == 'svm':
            return SVC(C = 100, gamma=0.01, kernel='linear', probability=True)
        elif option == 'gbc':
            return GradientBoostingClassifier(n_estimators=5, random_state=random_state)
        elif option == 'nb':
            return GaussianNB()
        elif option == 'exrf':
            return ExtraTreesClassifier(n_estimators=200, criterion="entropy", bootstrap=True, max_features="sqrt", oob_score=True, n_jobs=-1, random_state=None)
        elif option == 'rd':
            return RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
        else:
            return RandomForestClassifier(n_estimators=200, criterion="gini", max_features="sqrt", n_jobs=-1, random_state=None)
    
    @jit
    def train_with_meta_classifier(X_train, y_train, base_option='random_forest', meta_option='1nn', random_state=42, wavelet=None):
        trained_models = {}  # Salvar modelos treinados para cada transformação
        X_train_transformed = TimeSeriesClassifier.transform_data_math(X_train, wavelet)  # Transformar todo o conjunto de treino
        loo = LeaveOneOut()

    # Treinar um modelo para cada transformação e salvar no dicionário
        for rep, X_trans in tqdm(X_train_transformed.items(), ascii=True, desc="Training Models", colour='red'):
            model = TimeSeriesClassifier.select_model(base_option, random_state)
            for train_index, _ in loo.split(X_trans):
                model.fit(X_trans[train_index], y_train[train_index])
            trained_models[rep] = model  # Salvar o modelo treinado

        # Preparar dados para o meta-classificador
        meta_features = []  # Inicializar um vetor para armazenar as somas de probabilidades para cada classe
        for i in range(0, X_train.shape[0]):
            for rep, model in trained_models.items():
                proba = model.predict_proba(X_train_transformed[rep][i].reshape(1, -1))
                proba /= np.sum(proba)
            meta_features.append(proba.flatten())  # Adicione as probabilidades para cada classe

        # Treinar o meta-classificador
        meta_classifier = TimeSeriesClassifier.select_model(meta_option, random_state)
        meta_classifier.fit(meta_features, y_train)

        return trained_models, meta_classifier
    
    @jit
    def predict_with_meta_classifier(X_test, trained_base_models, trained_meta_classifier, wavelet=None):
        predictions = []
        meta_features_test = []  # Inicialize uma lista para armazenar todos os meta-recursos dos dados de teste

        for i in tqdm(range(len(X_test)), ascii=True, desc="Testing Instances"):
            x_instance = X_test[i].reshape(1, -1)
            x_transformed = TimeSeriesClassifier.transform_data_math(x_instance, wavelet)

            instance_features = []
            for rep, (model, _) in trained_base_models.items():  # Ajuste para percorrer os modelos treinados e os scores médios
                proba = model.predict_proba(x_transformed[rep][0].reshape(1, -1))  # Ajuste aqui para pegar o primeiro elemento
                instance_features.extend(proba.flatten())  # Estender a lista com todas as probabilidades

            meta_feature = np.array(instance_features).reshape(1, -1)
            predictions.append(trained_meta_classifier.predict(meta_feature)[0])  # Adicionar a previsão à lista de previsões

            meta_features_test.append(meta_feature.flatten())  # Adicionar meta-recursos da instância atual à lista

    # Converter a lista de meta-recursos dos dados de teste em um array numpy
        meta_features_test = np.array(meta_features_test)

    # Salvar todos os meta-recursos dos dados de teste em um arquivo CSV
        np.savetxt("meta-features-test.csv", meta_features_test, delimiter=",")

        return predictions



### Testando um único modelo - Random Forest como extrator e SVM como meta-classificador

In [239]:
dataset_quali_list = [ 'Beef', 'Car', 'CBF', 'Coffee', 'DiatomSizeReduction', 'ECG200', 'ECGFiveDays', 'FaceFour','GunPoint', 'Lightning2', 'Lightning7', 'MedicalImages', 'MoteStrain', 'OliveOil', 'SonyAIBORobotSurface1','SonyAIBORobotSurface2', 'SyntheticControl', 'Trace', 'TwoPatterns']
dataset_full_list = ['MixedShapesRegularTrain','SmallKitchenAppliances','ProximalPhalanxOutlineCorrect','WordSynonyms', 'RefrigerationDevices','CinCECGTorso','ChlorineConcentration','ToeSegmentation1','TwoLeadECG','ProximalPhalanxTW','WormsTwoClass','DistalPhalanxOutlineCorrect','InsectWingbeatSound','NonInvasiveFetalECGThorax2','CricketX','Haptics','EOGVerticalSignal','MixedShapesSmallTrain','SemgHandGenderCh2','ToeSegmentation2','NonInvasiveFetalECGThorax1','FreezerSmallTrain','OSULeaf','HandOutlines','DistalPhalanxOutlineAgeGroup','FreezerRegularTrain']
rapidos = ['SwedishLeaf', 'ProximalPhalanxOutlineAgeGroup', 'GunPointOldVersusYoung', 'Wine', 'Yoga', 'ArrowHead', 'InsectEPGSmallTrain','PowerCons','InsectEPGRegularTrain','GunPointMaleVersusFemale','ItalyPowerDemand','BME','Meat','Earthquakes','BirdChicken','BeetleFly','ACSF1']
problematicos = ['Crop','EthanolLevel','ElectricDevices','FordB','ShapesAll','StarLightCurves','Phoneme', 'Computers','InlineSkate','PigAirwayPressure', 'PigCVP','FordA','MedicalImages','PigArtPressure', 'UWaveGestureLibraryX','UWaveGestureLibraryY', 'UWaveGestureLibraryZ', 'UWaveGestureLibraryAll', 'TwoPatterns']

In [240]:
data_unique = ['CBF']

In [241]:
accuracy_data = []

for dataset_name in data_unique:
    X_train, y_train = load_classification(dataset_name, split="TRAIN")
    X_test, y_test = load_classification(dataset_name, split="test")
    
    # Achatando os dados para 2D, pois alguns algoritmos esperam 2D
    X_train_flat = X_train.reshape(X_train.shape[0], -1)
    X_test_flat = X_test.reshape(X_test.shape[0], -1)
    
    best_wavelet_train = TimeSeriesClassifier.choose_wavelet(X_train_flat)
    best_wavelet_test = TimeSeriesClassifier.choose_wavelet(X_test_flat)
    
    dataset_accuracies = []
    trained_base_models, meta_classifier = TimeSeriesClassifier.train_with_meta_classifier(X_train_flat, y_train, base_option='exrf', meta_option='rd', wavelet=best_wavelet_train)
    predictions_test_meta = TimeSeriesClassifier.predict_with_meta_classifier(X_test_flat, trained_base_models, meta_classifier, wavelet=best_wavelet_test)
    test_accuracy_meta = np.mean(predictions_test_meta == y_test)
    dataset_accuracies.append(test_accuracy_meta)

    print(f"Acurácia {dataset_name}: {test_accuracy_meta}")
    accuracy_data.append({'Dataset Name': dataset_name, 'Accuracy': test_accuracy_meta})

accuracy_df = pd.DataFrame(accuracy_data)


[A
[A
[A
[A
[A
Training Models: 100%|[31m##########[0m| 5/5 [01:28<00:00, 17.80s/it]

[A

ValueError: 

In [None]:
def print_grouped(df, group_size=5):
    for i, group in df.groupby(np.arange(len(df)) // group_size):
        display(group)

In [None]:
print_grouped(accuracy_df)

In [None]:
accuracy_df.to_csv('Model_RD_EXRFOOBWAVELT_.csv', index=False)