### Bibliotecas

In [1]:
"""%pip install aeon
%pip install tsfresh
%pip install tslearn
%pip install tensorflow
%pip install keras
%pip install pywavelets"""

'%pip install aeon\n%pip install tsfresh\n%pip install tslearn\n%pip install tensorflow\n%pip install keras\n%pip install pywavelets'

In [38]:
import pandas as pd
import numpy as np

from aeon.datasets import load_classification
from aeon.datasets.tsc_data_lists import univariate_equal_length
from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import PiecewiseAggregateApproximation, SymbolicAggregateApproximation

import pywt
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import LeaveOneOut
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.linear_model import RidgeClassifierCV, LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from aeon.classification.sklearn import RotationForestClassifier
from scipy.fftpack import fft
from numba import jit, njit
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

### Dados

In [49]:
@staticmethod
def load_data(dataset):
    # LabelEncoder para labels alvo
    le = LabelEncoder()

    # Carregar conjunto de dados do repositório UCR
    X_train, y_train = load_classification(dataset, split="TRAIN")
    X_test, y_test = load_classification(dataset, split="test")

    # Formatar o conjunto de dados para 2D
    features_train = X_train.reshape(X_train.shape[0], -1)
    features_test = X_test.reshape(X_test.shape[0], -1)

    # Ajustar e transformar as labels alvo
    target_train = le.fit_transform(y_train)
    target_test = le.transform(y_test)

    return features_train, features_test, target_train, target_test


### Função de transformação dos dados (2D)

In [50]:
def choose_wavelet(X):
    min_variance = float('inf')
    best_wavelet = None
    candidate_wavelets = ['db1', 'db2', 'db3', 'db4', 'db5', 'db6', 'db7', 'db8', 'db9']

    for wavelet_type in candidate_wavelets:
        _, coeffs_cD = pywt.dwt(X, wavelet_type, axis=1)
        total_variance = np.var(coeffs_cD)

        if total_variance < min_variance:
            min_variance = total_variance
            best_wavelet = wavelet_type
    return str(best_wavelet)


@jit
def transform_data_math(X, wavelet):
    n_sax_symbols = int(X.shape[1] / 4)
    n_paa_segments = int(X.shape[1] / 4)

    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, wavelet=wavelet, axis=1, mode='periodization')
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)
    stats_PAA = np.hstack([np.mean(X_paa, axis=1).reshape(-1,1),
                           np.std(X_paa, axis=1).reshape(-1,1),
                           np.max(X_paa, axis=1).reshape(-1,1),
                           np.min(X_paa, axis=1).reshape(-1,1),
                           ])

    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)
    stats_SAX = np.hstack([np.mean(X_sax, axis=1).reshape(-1,1),
                           np.std(X_sax, axis=1).reshape(-1,1),
                           np.max(X_sax, axis=1).reshape(-1,1),
                           np.min(X_sax, axis=1).reshape(-1,1),
                           ])

    data_X = TimeSeriesScalerMeanVariance().fit_transform(X)
    data_X.resize(data_X.shape[0], data_X.shape[1])
    stats_X = np.hstack([np.mean(data_X, axis=1).reshape(-1,1),
                         np.std(data_X, axis=1).reshape(-1,1),
                         np.max(data_X, axis=1).reshape(-1,1),
                         np.min(data_X, axis=1).reshape(-1,1),
                         ])

    data_FFT = TimeSeriesScalerMeanVariance().fit_transform(X_fft)
    data_FFT.resize(data_FFT.shape[0], data_FFT.shape[1])
    stats_FFT = np.hstack([np.mean(data_FFT, axis=1).reshape(-1,1),
                           np.std(data_FFT, axis=1).reshape(-1,1),
                           np.max(data_FFT, axis=1).reshape(-1,1),
                           np.min(data_FFT, axis=1).reshape(-1,1),
                           ])

    data_DWT = TimeSeriesScalerMeanVariance().fit_transform(X_dwt)
    data_DWT.resize(data_DWT.shape[0], data_DWT.shape[1])
    stats_DWT = np.hstack([np.mean(data_DWT, axis=1).reshape(-1,1),
                           np.std(data_DWT, axis=1).reshape(-1,1),
                           np.max(data_DWT, axis=1).reshape(-1,1),
                           np.min(data_DWT, axis=1).reshape(-1,1),
                           ])

    return {
        "TS": np.hstack([data_X, stats_X]),
        "FFT": np.hstack([data_FFT, stats_FFT]),
        "DWT": np.hstack([data_DWT, stats_DWT]),
        "PAA": np.hstack([X_paa, stats_PAA]),
        "SAX": np.hstack([X_sax, stats_SAX])
    }


### Seleção do modelo extrator e modelo classificador

In [51]:
@staticmethod
def select_model(option, random_state):
    if option == '1nn':
        return KNeighborsTimeSeriesClassifier(distance='euclidean', n_neighbors=1, n_jobs=-1)
    elif option == '3nn':
        return KNeighborsTimeSeriesClassifier(distance='dtw', n_neighbors=3, n_jobs=-1)
    elif option == 'svm':
        return SVC(C = 1, gamma=0.1, kernel='linear', probability=True, cache_size=200, max_iter=-1, decision_function_shape='ovr', tol=1e-3)
    elif option == 'gbc':
        return GradientBoostingClassifier(n_estimators=5, random_state=random_state)
    elif option == 'nb':
        return GaussianNB()
    elif option == 'lr':
        return LogisticRegression(n_jobs=-1, max_iter=5000, solver="liblinear", dual=True, penalty="l2", random_state=random_state)
    elif option == 'rrf':
        return RotationForestClassifier(n_jobs=-1, random_state=None)
    elif option == 'exrf':
        return ExtraTreesClassifier(n_estimators=200, criterion="entropy", max_features="sqrt", n_jobs=-1, random_state=None)
    elif option == 'rd':
        return RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
    else:
        return RandomForestClassifier(n_estimators=200, criterion="gini", max_features="sqrt", n_jobs=-1, random_state=None)


### Treino do modelos extrator e classificador - (CalibrationProba)

In [52]:
@jit
def train_with_meta_classifier(X_train, y_train, base_option='None', meta_option='None', random_state=42, wavelet=None):
    num_classes = len(np.unique(y_train))
    trained_models = {}  # Salvar modelos treinados para cada transformação
    X_train_transformed = transform_data_math(X_train, wavelet)  # Transformar todo o conjunto de treino

    loo = LeaveOneOut()

    # Treinar um modelo para cada transformação e salvar no dicionário
    for rep, X_trans in tqdm(X_train_transformed.items(), ascii=True, colour='red', desc="Training Base Models"):
        model = select_model(base_option, random_state)
        scores = []
        for train_index, _ in loo.split(X_trans):
            model.fit(X_trans[train_index], y_train[train_index])
            score = model.score(X_trans[train_index], y_train[train_index])  # Score do modelo nos dados de treino
            scores.append(score)
        avg_score = np.mean(scores)
        trained_models[rep] = (model, avg_score)  # Salvar o modelo treinado e a média dos scores

    # Preparar dados para o meta-classificador
    meta_features = []
    for i in range(X_train.shape[0]):
        instance_features = []
        for rep, (model, _) in trained_models.items():
            proba = model.predict_proba(X_train_transformed[rep][i].reshape(1, -1))
            instance_features.extend(proba.flatten())
        meta_features.append(instance_features)

    meta_features = np.array(meta_features)

    # Calibrar as probabilidades dos classificadores base
    calibrated_classifiers = []
    for rep, (model, _) in trained_models.items():
        calibrated_classifier = CalibratedClassifierCV(model, method='sigmoid', cv='prefit', n_jobs=-1)
        calibrated_classifier.fit(X_train_transformed[rep], y_train)
        calibrated_classifiers.append((rep, calibrated_classifier))

    # Preparar dados calibrados para o meta-classificador
    calibrated_meta_features = []
    for i in range(X_train.shape[0]):
        instance_features = []
        for rep, calibrated_classifier in calibrated_classifiers:
            proba = calibrated_classifier.predict_proba(X_train_transformed[rep][i].reshape(1, -1))
            instance_features.extend(proba.flatten())
        calibrated_meta_features.append(instance_features)

    calibrated_meta_features = np.array(calibrated_meta_features)

    # Treinar o meta-classificador (utilizando MLP como exemplo)
    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(calibrated_meta_features, y_train)

    return calibrated_classifiers, meta_classifier


### Predicao do meta-classificador - (CalibrationProba)

In [53]:
@jit
def predict_with_meta_classifier(X_test, calibrated_base_models, trained_meta_classifier, wavelet=None):
    predictions = []
    meta_features_test = []  # Inicialize uma lista para armazenar todos os meta-recursos dos dados de teste

    for i in tqdm(range(len(X_test)), ascii=True, colour='green', desc="Testing Instances"):
        x_instance = X_test[i].reshape(1, -1)
        x_transformed = transform_data_math(x_instance, wavelet)

        instance_features = []
        for rep, calibrated_classifier in calibrated_base_models:
            proba = calibrated_classifier.predict_proba(x_transformed[rep][0].reshape(1, -1))  # Ajuste aqui para pegar o primeiro elemento
            instance_features.extend(proba.flatten())  # Estender a lista com todas as probabilidades

        meta_feature = np.array(instance_features).reshape(1, -1)
        predictions.append(trained_meta_classifier.predict(meta_feature)[0])  # Adicionar a previsão à lista de previsões

        meta_features_test.append(meta_feature.flatten())  # Adicionar meta-recursos da instância atual à lista

    # Converter a lista de meta-recursos dos dados de teste em um array numpy
    meta_features_test = np.array(meta_features_test)
    return predictions


### Train/Predict (ArgmaxProba)

In [54]:
"""def combine_and_predict(X_transformed, trained_models):
    num_instances = len(next(iter(X_transformed.values())))  # Number of instances from the first transformed data
    num_classes = len(trained_models[next(iter(trained_models))].classes_)  # Number of classes from first model
    combined_probabilities = np.zeros((num_instances, num_classes))

    for transformation_type, X_trans in X_transformed.items():
        model = trained_models[transformation_type]
        proba = model.predict_proba(X_trans)  # Get probabilities for all instances
        combined_probabilities += proba

    combined_probabilities_reshaped = combined_probabilities.reshape(num_instances, -1, num_classes)
    predicted_classes = np.argmax(combined_probabilities_reshaped, axis=1) + 1  # Adding 1 to start classes from 1 instead of 0
    return predicted_classes

def train_with_meta_classifier(X_train, y_train, base_option='1nn', meta_option='rf', random_state=123, wavelet=None):
    trained_models = {}  # Salvar modelos treinados para cada transformação
    X_train_transformed = transform_data_math(X_train, wavelet)  # Transformar todo o conjunto de treino
    loo = LeaveOneOut()

    # Treinar um modelo para cada transformação e salvar no dicionário
    for rep, X_trans in tqdm(X_train_transformed.items(), ascii=True, colour='red', desc="Training Models"):
        model = select_model(base_option, random_state)
        for train_index, _ in loo.split(X_trans):
            model.fit(X_trans[train_index], y_train[train_index])
        trained_models[rep] = model  # Salvar o modelo treinado

    avg_proba = combine_and_predict(X_train_transformed, trained_models)
    # Train meta-classifier
    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(avg_proba, y_train)

    return trained_models, meta_classifier

def predict_with_meta_classifier(X_test, trained_models, trained_meta_classifier, wavelet=None):
    predictions = []
    meta_features_test = []
    for i in tqdm(range(len(X_test)), ascii=True, colour='green', desc="Testing Instances"):
        x_instance = X_test[i].reshape(1,-1)
        x_transformed = transform_data_math(x_instance, wavelet)
        avg_proba = combine_and_predict(x_transformed, trained_models)
        meta_feature = avg_proba
        predictions.append(trained_meta_classifier.predict(meta_feature)[0])
        meta_features_test.append(meta_feature)
    meta_features_test = np.array(meta_features_test)
    return predictions
"""

'def combine_and_predict(X_transformed, trained_models):\n    num_instances = len(next(iter(X_transformed.values())))  # Number of instances from the first transformed data\n    num_classes = len(trained_models[next(iter(trained_models))].classes_)  # Number of classes from first model\n    combined_probabilities = np.zeros((num_instances, num_classes))\n\n    for transformation_type, X_trans in X_transformed.items():\n        model = trained_models[transformation_type]\n        proba = model.predict_proba(X_trans)  # Get probabilities for all instances\n        combined_probabilities += proba\n\n    combined_probabilities_reshaped = combined_probabilities.reshape(num_instances, -1, num_classes)\n    predicted_classes = np.argmax(combined_probabilities_reshaped, axis=1) + 1  # Adding 1 to start classes from 1 instead of 0\n    return predicted_classes\n\ndef train_with_meta_classifier(X_train, y_train, base_option=\'1nn\', meta_option=\'rf\', random_state=123, wavelet=None):\n    train

### Testando um único modelo - Random Forest como extrator e SVM como meta-classificador

In [55]:
dataset_quali_list = ['Adiac', 'Beef', 'Car', 'CBF', 'Coffee', 'DiatomSizeReduction', 'ECG200', 'ECGFiveDays', 'FaceFour','GunPoint', 'Lightning2', 'Lightning7', 'MedicalImages', 'MoteStrain', 'OliveOil', 'SonyAIBORobotSurface1','SonyAIBORobotSurface2', 'SyntheticControl', 'Trace']
dataset_full_list = ['Worms','FaceAll','SemgHandMovementCh2','Herring','GunPointAgeSpan','SmoothSubspace','SemgHandSubjectCh2','LargeKitchenAppliances','Plane','Fish','ScreenType','PhalangesOutlinesCorrect','CricketZ','MiddlePhalanxOutlineAgeGroup','ECG5000','Chinatown','ShapeletSim','MiddlePhalanxTW','Symbols','EOGHorizontalSignal','Ham','UMD','HouseTwenty','MiddlePhalanxOutlineCorrect','Wafer','Rock','DistalPhalanxTW','CricketY','FacesUCR','FiftyWords','Mallat','Strawberry','SwedishLeaf','ProximalPhalanxOutlineAgeGroup','MixedShapesRegularTrain','SmallKitchenAppliances','GunPointOldVersusYoung','Wine','ProximalPhalanxOutlineCorrect','WordSynonyms', 'RefrigerationDevices','Yoga','CinCECGTorso','ChlorineConcentration','ArrowHead','ToeSegmentation1','TwoLeadECG','ProximalPhalanxTW','InsectEPGSmallTrain','WormsTwoClass','PowerCons','InsectEPGRegularTrain','GunPointMaleVersusFemale','DistalPhalanxOutlineCorrect','ItalyPowerDemand','InsectWingbeatSound','BME','NonInvasiveFetalECGThorax2','CricketX','Haptics','EOGVerticalSignal','MixedShapesSmallTrain','Meat','SemgHandGenderCh2','ToeSegmentation2','NonInvasiveFetalECGThorax1','FreezerSmallTrain','OSULeaf','Earthquakes','BirdChicken','HandOutlines','BeetleFly','ACSF1','DistalPhalanxOutlineAgeGroup','FreezerRegularTrain']
problematicos = ['Crop','EthanolLevel','ElectricDevices','FordB','ShapesAll','StarLightCurves','Phoneme', 'Computers','InlineSkate','PigAirwayPressure', 'PigCVP','FordA','MedicalImages','PigArtPressure', 'UWaveGestureLibraryX','UWaveGestureLibraryY', 'UWaveGestureLibraryZ', 'UWaveGestureLibraryAll', 'TwoPatterns']

In [56]:
dataunique = ['Beef', 'Car', 'CBF', 'Coffee','DiatomSizeReduction']

In [66]:
for dataset_name in univariate_equal_length:
    Acc = []
    dataset_accuracies = []
    # Carregue os dados de treinamento e teste
    features_train, features_test, target_train, target_test = load_data(dataset_name)
    best_wavelet = choose_wavelet(features_train)

    trained_models, meta_classifier = train_with_meta_classifier(features_train, target_train, base_option='svm', meta_option='lr', random_state=123, wavelet=best_wavelet)
    
    predictions = predict_with_meta_classifier(features_test, trained_models, meta_classifier, wavelet=best_wavelet)
    
    test_accuracy_meta = np.mean(predictions == target_test)
    
    dataset_accuracies.append(test_accuracy_meta)
    
    print(f"Acurácia {dataset_name}: {test_accuracy_meta}")
    
    Acc.append({'Dataset Name': dataset_name, 'Accuracy': test_accuracy_meta})

accuracy_df = pd.DataFrame(Acc)


Training Base Models: 100%|[31m##########[0m| 5/5 [00:03<00:00,  1.26it/s]
Testing Instances: 100%|[32m##########[0m| 60/60 [00:03<00:00, 19.87it/s]


Acurácia Car: 0.8833333333333333


Training Base Models: 100%|[31m##########[0m| 5/5 [05:39<00:00, 67.95s/it]
Testing Instances: 100%|[32m##########[0m| 276/276 [00:10<00:00, 26.42it/s]


Acurácia DistalPhalanxOutlineCorrect: 0.7391304347826086


Training Base Models: 100%|[31m##########[0m| 5/5 [00:01<00:00,  3.31it/s]
Testing Instances: 100%|[32m##########[0m| 54/54 [00:02<00:00, 25.31it/s]


Acurácia Wine: 0.6296296296296297


Training Base Models: 100%|[31m##########[0m| 5/5 [01:13<00:00, 14.72s/it]
Testing Instances: 100%|[32m##########[0m| 205/205 [00:08<00:00, 24.81it/s]


Acurácia ProximalPhalanxTW: 0.8146341463414634


Training Base Models: 100%|[31m##########[0m| 5/5 [11:28<00:00, 137.72s/it]
Testing Instances: 100%|[32m##########[0m| 390/390 [00:19<00:00, 19.57it/s]


Acurácia CricketZ: 0.441025641025641


Training Base Models: 100%|[31m##########[0m| 5/5 [01:42<00:00, 20.42s/it]
Testing Instances: 100%|[32m##########[0m| 154/154 [00:06<00:00, 24.90it/s]


Acurácia MiddlePhalanxTW: 0.6038961038961039


Training Base Models: 100%|[31m##########[0m| 5/5 [07:16<00:00, 87.35s/it]
Testing Instances: 100%|[32m##########[0m| 391/391 [00:52<00:00,  7.50it/s]


Acurácia Adiac: 0.7519181585677749


Training Base Models: 100%|[31m##########[0m| 5/5 [00:00<00:00, 12.11it/s]
Testing Instances: 100%|[32m##########[0m| 900/900 [00:34<00:00, 25.92it/s]


Acurácia CBF: 0.8822222222222222


Training Base Models: 100%|[31m##########[0m| 5/5 [1:43:54<00:00, 1246.87s/it]
Testing Instances: 100%|[32m##########[0m| 500/500 [00:30<00:00, 16.33it/s]


Acurácia EthanolLevel: 0.712


Training Base Models: 100%|[31m##########[0m| 5/5 [00:18<00:00,  3.64s/it]
Testing Instances: 100%|[32m##########[0m| 2425/2425 [02:02<00:00, 19.81it/s]


Acurácia MixedShapesSmallTrain: 0.8032989690721649


Training Base Models: 100%|[31m##########[0m| 5/5 [18:54<00:00, 226.81s/it]
Testing Instances: 100%|[32m##########[0m| 375/375 [00:17<00:00, 21.77it/s]


Acurácia SmallKitchenAppliances: 0.4826666666666667


Training Base Models: 100%|[31m##########[0m| 5/5 [00:10<00:00,  2.11s/it]
Testing Instances: 100%|[32m##########[0m| 180/180 [00:06<00:00, 26.59it/s]


Acurácia PowerCons: 1.0


Training Base Models: 100%|[31m##########[0m| 5/5 [02:29<00:00, 29.80s/it]
Testing Instances: 100%|[32m##########[0m| 208/208 [00:49<00:00,  4.18it/s]


Acurácia PigAirwayPressure: 0.18269230769230768


Training Base Models:   0%|[31m          [0m| 0/5 [00:00<?, ?it/s]

In [None]:
accuracy_df

In [None]:
accuracy_df.to_csv('model_SVM+LR+CProba.csv', index=False)