In [None]:
"""!pip install aeon
!pip install sktime
!pip install tsfresh
!pip install tslearn
!pip install PyWavelets"""

### To Do list


*   Comparar os resultados do 1NN contra o SVM+RF
*   Comparar os resultados dos classificadores Feature Based com o SVM+RF
*   Comparar os resultados do MetaClf_Conc contra o MetaClf_Dict



In [None]:
import pandas as pd
import numpy as np

from aeon.datasets import load_classification
from aeon.datasets.tsc_data_lists import univariate_equal_length
from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier, ShapeDTW, ElasticEnsemble

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import PiecewiseAggregateApproximation, SymbolicAggregateApproximation

import pywt

from sklearn.metrics import accuracy_score
from sklearn.model_selection import LeaveOneOut
from sklearn.svm import SVC
from sklearn.linear_model import RidgeClassifierCV, SGDClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, VotingClassifier
from sklearn.naive_bayes import GaussianNB

from scipy.fftpack import fft
from numba import jit
from tqdm import tqdm
import timeit
from datetime import timedelta

import warnings
warnings.filterwarnings("ignore")

In [None]:
# Transform data using TimeSeriesScalerMeanVariance and concatenate all transformed data
@jit
def transform_data(X):
    n_sax_symbols = int(X.shape[1] / 4)
    n_paa_segments = int(X.shape[1] / 4)

    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, 'db1', axis=1)
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)

    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)

    data_clean = np.concatenate((X, X_fft, X_dwt), axis=1)
    data_conct = TimeSeriesScalerMeanVariance().fit_transform(data_clean)
    data_concat = np.concatenate((data_conct,X_paa, X_sax), axis=1)
    data_concat.resize(data_concat.shape[0], data_concat.shape[1])
    
    return data_concat

In [None]:
"""@jit
def transform_data(X):
    n_sax_symbols = int(X.shape[1] / 4)
    n_paa_segments = int(X.shape[1] / 4)

    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, 'db1', axis=1)
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.fit_transform(X)
    X_paa_inv = paa.inverse_transform(X_paa_)
    X_paa = X_paa_inv.reshape(X_paa_inv.shape[0], -1)

    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.fit_transform(X)
    X_sax_inv = sax.inverse_transform(X_sax_)
    X_sax = X_sax_inv.reshape(X_sax_inv.shape[0], -1)

    # Calculating statistics once
    data_mean = X.mean(axis=1).reshape(-1, 1)
    data_std = X.std(axis=1).reshape(-1, 1)
    data_max = X.max(axis=1).reshape(-1, 1)
    data_min = X.min(axis=1).reshape(-1, 1)
    

    data = np.concatenate((X, X_fft, X_dwt, X_paa, X_sax, data_mean, data_std, data_max, data_min), axis=1)
    data_concat = TimeSeriesScalerMeanVariance().fit_transform(data)
    data_concat.resize(data.shape[0], data.shape[1])

    return data_concat
"""

In [None]:
"""@jit
def transform_data(X):
    n_sax_symbols = int(X.shape[1] / 4)
    n_paa_segments = int(X.shape[1] / 4)

    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, 'db1', axis=1)
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.fit_transform(X)
    X_paa_inv = paa.inverse_transform(X_paa_)
    X_paa = X_paa_inv.reshape(X_paa_inv.shape[0], -1)

    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.fit_transform(X)
    X_sax_inv = sax.inverse_transform(X_sax_)
    X_sax = X_sax_inv.reshape(X_sax_inv.shape[0], -1)

    # Calculating statistics for each transformation
    fft_mean = X_fft.mean(axis=1).reshape(-1, 1)
    fft_std = X_fft.std(axis=1).reshape(-1, 1)
    fft_max = X_fft.max(axis=1).reshape(-1, 1)
    fft_min = X_fft.min(axis=1).reshape(-1, 1)

    dwt_mean = X_dwt.mean(axis=1).reshape(-1, 1)
    dwt_std = X_dwt.std(axis=1).reshape(-1, 1)
    dwt_max = X_dwt.max(axis=1).reshape(-1, 1)
    dwt_min = X_dwt.min(axis=1).reshape(-1, 1)

    paa_mean = X_paa.mean(axis=1).reshape(-1, 1)
    paa_std = X_paa.std(axis=1).reshape(-1, 1)
    paa_max = X_paa.max(axis=1).reshape(-1, 1)
    paa_min = X_paa.min(axis=1).reshape(-1, 1)

    sax_mean = X_sax.mean(axis=1).reshape(-1, 1)
    sax_std = X_sax.std(axis=1).reshape(-1, 1)
    sax_max = X_sax.max(axis=1).reshape(-1, 1)
    sax_min = X_sax.min(axis=1).reshape(-1, 1)

    # Concatenating statistics with the transformed data
    data = np.concatenate((X, X_fft, X_dwt, X_paa, X_sax,
                           fft_mean, fft_std, fft_max, fft_min,
                           dwt_mean, dwt_std, dwt_max, dwt_min,
                           paa_mean, paa_std, paa_max, paa_min,
                           sax_mean, sax_std, sax_max, sax_min), axis=1)

    # Calculating statistics for all concatenated data
    data_mean = data.mean(axis=1).reshape(-1, 1)
    data_std = data.std(axis=1).reshape(-1, 1)
    data_max = data.max(axis=1).reshape(-1, 1)
    data_min = data.min(axis=1).reshape(-1, 1)

    # Concatenating statistics with the transformed data
    data_concat = np.concatenate((data, data_mean, data_std, data_max, data_min), axis=1)

    data_concat = TimeSeriesScalerMeanVariance().fit_transform(data_concat)
    data_concat.resize(data_concat.shape[0], data_concat.shape[1])

    return data_concat
"""

In [None]:
@jit
def select_model(option, random_state):
    if option == '1nn':
        return KNeighborsTimeSeriesClassifier(distance='euclidean',
                                              n_neighbors=1,
                                              n_jobs=-1)
    elif option == '3nn':
        return KNeighborsTimeSeriesClassifier(distance='dtw',
                                              n_neighbors=3,
                                              n_jobs=-1)
    elif option == 'svm':
        return SVC(C = 100,
                   gamma=0.01,
                   kernel='linear',
                   probability=True)
    elif option == 'gbc':
        return GradientBoostingClassifier(n_estimators=5,
                                          random_state=random_state)
    elif option == 'nb':
        return GaussianNB()
    elif option == 'exrf':
        return ExtraTreesClassifier(n_estimators=200,
                                    criterion="entropy",
                                    max_features="sqrt",
                                    oob_score=True,
                                    bootstrap=True,
                                    n_jobs=-1,
                                    random_state=None)
    elif option == 'rd':
        return RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
    elif option == 'sgbd':
        return SGDClassifier(max_iter=1000, n_jobs=-1, loss='perceptron', penalty='elasticnet')
    else:
         return RandomForestClassifier(n_estimators=200,
                                      criterion="entropy",
                                      max_features="gini",
                                      n_jobs=-1,
                                      random_state=None)

In [None]:
meta_distance_based = VotingClassifier(estimators=[
    ('nn', KNeighborsTimeSeriesClassifier(distance='euclidean', n_neighbors=1, n_jobs=-1)),
    ('nndtw', KNeighborsTimeSeriesClassifier(distance='dtw', n_neighbors=1, n_jobs=-1)),
    ('nnddtw', KNeighborsTimeSeriesClassifier(distance='ddtw', n_neighbors=1, n_jobs=-1)),
    ('nnwdtw', KNeighborsTimeSeriesClassifier(distance='wdtw', n_neighbors=1, n_jobs=-1)),
    ('nnwddtw', KNeighborsTimeSeriesClassifier(distance='wddtw', n_neighbors=1, n_jobs=-1)),
    ('nnlcss', KNeighborsTimeSeriesClassifier(distance='lcss', n_neighbors=1, n_jobs=-1)),
    ('nnerp', KNeighborsTimeSeriesClassifier(distance='erp', n_neighbors=1, n_jobs=-1)),
    ('nnmsm', KNeighborsTimeSeriesClassifier(distance='msm', n_neighbors=1, n_jobs=-1))
    ], voting='hard')

In [None]:
@jit
def train_with_meta_classifier(X_train, y_train, base_option='exrf', meta_option='rd', random_state=42):
    X_train_transformed = transform_data(X_train)

    loo = LeaveOneOut()
    loo.get_n_splits(X_train_transformed)

    # Treinar um modelo para todos os dados transformados
    model = select_model(base_option, random_state)
    for train_index, test_index in tqdm(loo.split(X_train_transformed), colour='red', desc="Training"):
        X_train_fold, _ = X_train_transformed[train_index], X_train_transformed[test_index]
        y_train_fold, _ = y_train[train_index], y_train[test_index]
        model.fit(X_train_fold, y_train_fold)

    # Preparar dados para o meta-classificador
    meta_features = []
    for X_trans in X_train_transformed:
        instance_features = []
        proba = model.predict_proba(X_trans.reshape(1, -1)) # Reshape para compatibilidade com predict_proba
        proba /= np.sum(proba)
        instance_features.extend(proba.flatten())
        meta_features.append(instance_features)

    meta_features = np.array(meta_features)

    # Treinar o meta-classificador
    meta_classifier = select_model(meta_option, random_state=random_state)
    meta_classifier.fit(meta_features, y_train)

    return model, meta_classifier

In [None]:
@jit
def predict_with_meta_classifier(X_test, trained_base_model, trained_meta_classifier):
    predictions = []
    meta_features_test = []  # Inicialize uma lista para armazenar todos os meta-recursos dos dados de teste

    for i in tqdm(range(len(X_test)), ascii=True, colour='green', desc="Testing"):
        x_instance = X_test[i].reshape(1, -1)
        x_transformed = transform_data(x_instance)

        instance_features = []
        for X_trans in x_transformed:  # Iterar sobre as diferentes transformações
            proba = trained_base_model.predict_proba(X_trans.reshape(1, -1))
            proba /= np.sum(proba)
            instance_features.extend(proba.flatten())  # Estender a lista com todas as probabilidades

        meta_feature = np.array(instance_features).reshape(1, -1)
        predictions.append(trained_meta_classifier.predict(meta_feature)[0])  # Adicionar a previsão à lista de previsões

        meta_features_test.append(meta_feature.flatten())  # Adicionar meta-recursos da instância atual à lista

    # Converter a lista de meta-recursos dos dados de teste em um array numpy
    meta_features_test = np.array(meta_features_test)

    return predictions

In [None]:

#Caso o teste seja aplicado a todos os conjuntos de dados da UCR
univariate_list = list(univariate_equal_length)
univariate_list.sort()

In [None]:
dataset_quali_list = ['Adiac', 'Beef', 'Car', 'CBF', 'Coffee', 'DiatomSizeReduction', 'ECG200', 'ECGFiveDays', 'FaceFour','GunPoint','Lightning2', 'Lightning7', 'MedicalImages', 'MoteStrain', 'OliveOil', 'SonyAIBORobotSurface1','SonyAIBORobotSurface2', 'SyntheticControl', 'Trace', 'TwoPatterns']
dataset_full_list= ['Worms','FaceAll','SyntheticControl','SemgHandMovementCh2','Herring','GunPointAgeSpan','SmoothSubspace','SemgHandSubjectCh2','LargeKitchenAppliances','Plane','Fish','ScreenType','PhalangesOutlinesCorrect','CricketZ','MiddlePhalanxOutlineAgeGroup','ECG5000','Chinatown','ShapeletSim','MiddlePhalanxTW','Symbols','EOGHorizontalSignal','Ham','UMD','HouseTwenty','TwoPatterns','MiddlePhalanxOutlineCorrect','Wafer','Rock','DistalPhalanxTW','CricketY','SonyAIBORobotSurface1','FacesUCR','FiftyWords','Mallat','Strawberry','SwedishLeaf','ProximalPhalanxOutlineAgeGroup','DiatomSizeReduction','MixedShapesRegularTrain','Trace','ECGFiveDays','Lightning2','MoteStrain','SmallKitchenAppliances','GunPointOldVersusYoung','Wine','ECG200','ProximalPhalanxOutlineCorrect','WordSynonyms', 'RefrigerationDevices','Lightning7','Yoga','FaceFour','CinCECGTorso','Beef','OliveOil','ChlorineConcentration','ArrowHead','ToeSegmentation1','TwoLeadECG','ProximalPhalanxTW','InsectEPGSmallTrain','WormsTwoClass','PowerCons','Coffee','InsectEPGRegularTrain','GunPointMaleVersusFemale','DistalPhalanxOutlineCorrect','ItalyPowerDemand','InsectWingbeatSound','BME','NonInvasiveFetalECGThorax2','CricketX','Haptics','EOGVerticalSignal','MixedShapesSmallTrain','Meat','SemgHandGenderCh2','ToeSegmentation2','Adiac','Car','NonInvasiveFetalECGThorax1','FreezerSmallTrain','OSULeaf','GunPoint','Earthquakes','BirdChicken','HandOutlines','BeetleFly','SonyAIBORobotSurface2','CBF','ACSF1','DistalPhalanxOutlineAgeGroup','FreezerRegularTrain']
problematicos = ['Crop','EthanolLevel','ElectricDevices','FordB','ShapesAll','StarLightCurves','Phoneme', 'Computers','InlineSkate','PigAirwayPressure', 'PigCVP','FordA','MedicalImages','PigArtPressure', 'UWaveGestureLibraryX','UWaveGestureLibraryY', 'UWaveGestureLibraryZ', 'UWaveGestureLibraryAll']

In [None]:
from sklearn.preprocessing import LabelEncoder
accuracy_data = []

for dataset_name in univariate_list:
    train, train_labels = load_classification(dataset_name, split='TRAIN')
    test, test_labels = load_classification(dataset_name, split='test')

    xtrain = train.reshape(train.shape[0], -1)
    xtest = test.reshape(test.shape[0], -1)

    le = LabelEncoder()
    labels = le.fit_transform(train_labels)
    true_labels = le.transform(test_labels)

    # Treino
    trained_base_models, meta_classifier = train_with_meta_classifier(xtrain, labels, base_option='exrf', meta_option='rd', random_state=42)
    # Teste
    predictions_test_meta = predict_with_meta_classifier(xtest, trained_base_models, meta_classifier)
    # Resultado
    test_accuracy_meta = np.mean(predictions_test_meta == true_labels)

    accuracy_data.append({'Dataset Name': dataset_name, 'Accuracy': test_accuracy_meta})

accuracy_df = pd.DataFrame(accuracy_data)


In [None]:
accuracy_df

In [None]:
accuracy_df.to_csv('model_EXRF+RD+NOSTATS_OOB_CD.csv', index=False)