### Bibliotecas

In [None]:
"""%pip install aeon
%pip install tsfresh
%pip install tslearn
%pip install tensorflow
%pip install keras
%pip install pywavelets"""

In [98]:
import pandas as pd
import numpy as np

from aeon.datasets import load_classification
from aeon.datasets.tsc_data_lists import univariate_equal_length
from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier, ShapeDTW, ElasticEnsemble

from tsfresh import extract_features, select_features
from tsfresh.feature_extraction import MinimalFCParameters

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import PiecewiseAggregateApproximation, SymbolicAggregateApproximation

import pywt
from sklearn.calibration import CalibratedClassifierCV

from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import LeaveOneOut
from sklearn.preprocessing import LabelEncoder
from sklearn.svm import SVC
from sklearn.linear_model import RidgeClassifierCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB

from scipy.fftpack import fft
from numba import jit
from tqdm import tqdm
import timeit
from datetime import timedelta

import warnings
warnings.filterwarnings("ignore")

### Dados

In [2]:
def load_data(dataset):
    # LabelEncoder para labels alvo
    le = LabelEncoder()

    # Carregar conjunto de dados do repositório UCR
    X_train, y_train = load_classification(dataset, split="TRAIN")
    X_test, y_test = load_classification(dataset, split="test")

    # Formatar o conjunto de dados para 2D
    features_train = X_train.reshape(X_train.shape[0], -1)
    features_test = X_test.reshape(X_test.shape[0], -1)

    # Ajustar e transformar as labels alvo
    target_train = le.fit_transform(y_train)
    target_test = le.transform(y_test)

    return features_train, features_test, target_train, target_test


### Função de transformação dos dados (2D)

In [3]:
def choose_wavelet(X):
    min_variance = float('inf')
    best_wavelet = None
    candidate_wavelets = ['db1', 'db2', 'db3', 'db4', 'db5', 'db6', 'db7', 'db8', 'db9']

    for wavelet_type in candidate_wavelets:
        _, coeffs_cD = pywt.dwt(X, wavelet_type, axis=1)
        total_variance = np.var(coeffs_cD)

        if total_variance < min_variance:
            min_variance = total_variance
            best_wavelet = wavelet_type
    return str(best_wavelet)


@jit
def transform_data_math(X, wavelet):
    n_sax_symbols = int(X.shape[1] / 4)
    n_paa_segments = int(X.shape[1] / 4)

    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, wavelet=wavelet, axis=1, mode='constant')
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)
    stats_PAA = np.hstack([np.mean(X_paa, axis=1).reshape(-1,1),
                           np.std(X_paa, axis=1).reshape(-1,1),
                           np.max(X_paa, axis=1).reshape(-1,1),
                           np.min(X_paa, axis=1).reshape(-1,1),
                           ])

    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)
    stats_SAX = np.hstack([np.mean(X_sax, axis=1).reshape(-1,1),
                           np.std(X_sax, axis=1).reshape(-1,1),
                           np.max(X_sax, axis=1).reshape(-1,1),
                           np.min(X_sax, axis=1).reshape(-1,1),
                           ])

    data_X = TimeSeriesScalerMeanVariance().fit_transform(X)
    data_X.resize(data_X.shape[0], data_X.shape[1])
    stats_X = np.hstack([np.mean(data_X, axis=1).reshape(-1,1),
                         np.std(data_X, axis=1).reshape(-1,1),
                         np.max(data_X, axis=1).reshape(-1,1),
                         np.min(data_X, axis=1).reshape(-1,1),
                         ])

    data_FFT = TimeSeriesScalerMeanVariance().fit_transform(X_fft)
    data_FFT.resize(data_FFT.shape[0], data_FFT.shape[1])
    stats_FFT = np.hstack([np.mean(data_FFT, axis=1).reshape(-1,1),
                           np.std(data_FFT, axis=1).reshape(-1,1),
                           np.max(data_FFT, axis=1).reshape(-1,1),
                           np.min(data_FFT, axis=1).reshape(-1,1),
                           ])

    data_DWT = TimeSeriesScalerMeanVariance().fit_transform(X_dwt)
    data_DWT.resize(data_DWT.shape[0], data_DWT.shape[1])
    stats_DWT = np.hstack([np.mean(data_DWT, axis=1).reshape(-1,1),
                           np.std(data_DWT, axis=1).reshape(-1,1),
                           np.max(data_DWT, axis=1).reshape(-1,1),
                           np.min(data_DWT, axis=1).reshape(-1,1),
                           ])

    return {
        "TS": np.hstack([data_X, stats_X]),
        "FFT": np.hstack([data_FFT, stats_FFT]),
        "DWT": np.hstack([data_DWT, stats_DWT]),
        "PAA": np.hstack([X_paa, stats_PAA]),
        "SAX": np.hstack([X_sax, stats_SAX])
    }

### Seleção do modelo extrator e modelo classificador

In [120]:
def select_model(option, random_state):
    if option == '1nn':
        return KNeighborsTimeSeriesClassifier(distance='euclidean', n_neighbors=1, n_jobs=-1)
    elif option == '3nn':
        return KNeighborsTimeSeriesClassifier(distance='dtw', n_neighbors=3, n_jobs=-1)
    elif option == 'svm':
        return SVC(C = 1, gamma=0.1, kernel='linear', probability=True, cache_size=200, max_iter=-1, decision_function_shape='ovr', tol=1e-3)
    elif option == 'gbc':
        return GradientBoostingClassifier(n_estimators=5, random_state=random_state)
    elif option == 'nb':
        return AdaBoostClassifier(algorithm='SAMME', n_estimators=200)
    elif option == 'exrf':
        return ExtraTreesClassifier(n_estimators=200, criterion="entropy", max_features="sqrt", n_jobs=-1, random_state=None)
    elif option == 'rd':
        return RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
    else:
        return RandomForestClassifier(n_estimators=200, criterion="gini", max_features="sqrt", n_jobs=-1, random_state=None)


### Treino do modelos extrator e classificador - (CalibrationProba)

In [128]:
@jit
def train_with_meta_classifier(X_train, y_train, base_option='None', meta_option='None', random_state=42, wavelet=None):
    num_classes = len(np.unique(y_train))
    trained_models = {}  # Salvar modelos treinados para cada transformação
    X_train_transformed = transform_data_math(X_train, wavelet)  # Transformar todo o conjunto de treino

    loo = LeaveOneOut()

    # Treinar um modelo para cada transformação e salvar no dicionário
    for rep, X_trans in tqdm(X_train_transformed.items(), ascii=True, colour='red', desc="Training Base Models"):
        model = select_model(base_option, random_state)
        scores = []
        for train_index, _ in loo.split(X_trans):
            model.fit(X_trans[train_index], y_train[train_index])
            score = model.score(X_trans[train_index], y_train[train_index])  # Score do modelo nos dados de treino
            scores.append(score)
        avg_score = np.mean(scores)
        trained_models[rep] = (model, avg_score)  # Salvar o modelo treinado e a média dos scores

    # Preparar dados para o meta-classificador
    meta_features = []
    for i in range(X_train.shape[0]):
        instance_features = []
        for rep, (model, _) in trained_models.items():
            proba = model.predict_proba(X_train_transformed[rep][i].reshape(1, -1))
            proba /= np.sum(proba)
            instance_features.extend(proba.flatten())
        meta_features.append(instance_features)

    meta_features = np.array(meta_features)

    # Calibrar as probabilidades dos classificadores base
    calibrated_classifiers = []
    for rep, (model, _) in trained_models.items():
        calibrated_classifier = CalibratedClassifierCV(model, method='isotonic', cv='prefit')
        calibrated_classifier.fit(X_train_transformed[rep], y_train)
        calibrated_classifiers.append((rep, calibrated_classifier))

    # Preparar dados calibrados para o meta-classificador
    calibrated_meta_features = []
    for i in range(X_train.shape[0]):
        instance_features = []
        for rep, calibrated_classifier in calibrated_classifiers:
            proba = calibrated_classifier.predict_proba(X_train_transformed[rep][i].reshape(1, -1))
            proba /= np.sum(proba)
            instance_features.extend(proba.flatten())
        calibrated_meta_features.append(instance_features)

    calibrated_meta_features = np.array(calibrated_meta_features)

    # Treinar o meta-classificador (utilizando MLP como exemplo)
    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(calibrated_meta_features, y_train)

    return calibrated_classifiers, meta_classifier


### Predicao do meta-classificador - (CalibrationProba)

In [129]:
@jit
def predict_with_meta_classifier(X_test, calibrated_base_models, trained_meta_classifier, wavelet=None):
    predictions = []
    meta_features_test = []  # Inicialize uma lista para armazenar todos os meta-recursos dos dados de teste

    for i in tqdm(range(len(X_test)), ascii=True, colour='green', desc="Testing Instances"):
        x_instance = X_test[i].reshape(1, -1)
        x_transformed = transform_data_math(x_instance, wavelet)

        instance_features = []
        for rep, calibrated_classifier in calibrated_base_models:
            proba = calibrated_classifier.predict_proba(x_transformed[rep][0].reshape(1, -1))  # Ajuste aqui para pegar o primeiro elemento
            proba /= np.sum(proba)
            instance_features.extend(proba.flatten())  # Estender a lista com todas as probabilidades

        meta_feature = np.array(instance_features).reshape(1, -1)
        predictions.append(trained_meta_classifier.predict(meta_feature)[0])  # Adicionar a previsão à lista de previsões

        meta_features_test.append(meta_feature.flatten())  # Adicionar meta-recursos da instância atual à lista

    # Converter a lista de meta-recursos dos dados de teste em um array numpy
    meta_features_test = np.array(meta_features_test)
    return predictions


### Train/Predict (MeanProba)

In [93]:
def combine_and_predict(X_transformed, trained_models):
    num_instances = len(next(iter(X_transformed.values())))  # Number of instances from the first transformed data
    num_classes = len(trained_models[next(iter(trained_models))].classes_)  # Number of classes from first model
    combined_probabilities = np.zeros((num_instances, num_classes))

    for transformation_type, X_trans in X_transformed.items():
        model = trained_models[transformation_type]
        proba = model.predict_proba(X_trans)  # Get probabilities for all instances
        combined_probabilities += proba

    combined_probabilities_reshaped = combined_probabilities.reshape(num_instances, -1, num_classes)
    predicted_classes = np.argmax(combined_probabilities_reshaped, axis=1) + 1  # Adding 1 to start classes from 1 instead of 0
    return predicted_classes

def train_with_meta_classifier(X_train, y_train, base_option='1nn', meta_option='rf', random_state=123, wavelet=None):
    trained_models = {}  # Salvar modelos treinados para cada transformação
    X_train_transformed = transform_data_math(X_train, wavelet)  # Transformar todo o conjunto de treino
    loo = LeaveOneOut()

    # Treinar um modelo para cada transformação e salvar no dicionário
    for rep, X_trans in tqdm(X_train_transformed.items(), ascii=True, colour='red', desc="Training Models"):
        model = select_model(base_option, random_state)
        for train_index, _ in loo.split(X_trans):
            model.fit(X_trans[train_index], y_train[train_index])
        trained_models[rep] = model  # Salvar o modelo treinado

    avg_proba = combine_and_predict(X_train_transformed, trained_models)
    # Train meta-classifier
    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(avg_proba, y_train)

    return trained_models, meta_classifier

def predict_with_meta_classifier(X_test, trained_models, trained_meta_classifier, wavelet=None):
    predictions = []
    meta_features_test = []
    for i in tqdm(range(len(X_test)), ascii=True, colour='green', desc="Testing Instances"):
        x_instance = X_test[i].reshape(1,-1)
        x_transformed = transform_data_math(x_instance, wavelet)
        avg_proba = combine_and_predict(x_transformed, trained_models)
        meta_feature = avg_proba
        predictions.append(trained_meta_classifier.predict(meta_feature)[0])
        meta_features_test.append(meta_feature)
    meta_features_test = np.array(meta_features_test)
    return predictions


### Testando um único modelo - Random Forest como extrator e SVM como meta-classificador

In [94]:
dataset_quali_list = ['Adiac', 'Beef', 'Car', 'CBF', 'Coffee', 'DiatomSizeReduction', 'ECG200', 'ECGFiveDays', 'FaceFour','GunPoint', 'Lightning2', 'Lightning7', 'MedicalImages', 'MoteStrain', 'OliveOil', 'SonyAIBORobotSurface1','SonyAIBORobotSurface2', 'SyntheticControl', 'Trace']
dataset_full_list = ['Worms','FaceAll','SemgHandMovementCh2','Herring','GunPointAgeSpan','SmoothSubspace','SemgHandSubjectCh2','LargeKitchenAppliances','Plane','Fish','ScreenType','PhalangesOutlinesCorrect','CricketZ','MiddlePhalanxOutlineAgeGroup','ECG5000','Chinatown','ShapeletSim','MiddlePhalanxTW','Symbols','EOGHorizontalSignal','Ham','UMD','HouseTwenty','MiddlePhalanxOutlineCorrect','Wafer','Rock','DistalPhalanxTW','CricketY','FacesUCR','FiftyWords','Mallat','Strawberry','SwedishLeaf','ProximalPhalanxOutlineAgeGroup','MixedShapesRegularTrain','SmallKitchenAppliances','GunPointOldVersusYoung','Wine','ProximalPhalanxOutlineCorrect','WordSynonyms', 'RefrigerationDevices','Yoga','CinCECGTorso','ChlorineConcentration','ArrowHead','ToeSegmentation1','TwoLeadECG','ProximalPhalanxTW','InsectEPGSmallTrain','WormsTwoClass','PowerCons','InsectEPGRegularTrain','GunPointMaleVersusFemale','DistalPhalanxOutlineCorrect','ItalyPowerDemand','InsectWingbeatSound','BME','NonInvasiveFetalECGThorax2','CricketX','Haptics','EOGVerticalSignal','MixedShapesSmallTrain','Meat','SemgHandGenderCh2','ToeSegmentation2','NonInvasiveFetalECGThorax1','FreezerSmallTrain','OSULeaf','Earthquakes','BirdChicken','HandOutlines','BeetleFly','ACSF1','DistalPhalanxOutlineAgeGroup','FreezerRegularTrain']
problematicos = ['Crop','EthanolLevel','ElectricDevices','FordB','ShapesAll','StarLightCurves','Phoneme', 'Computers','InlineSkate','PigAirwayPressure', 'PigCVP','FordA','MedicalImages','PigArtPressure', 'UWaveGestureLibraryX','UWaveGestureLibraryY', 'UWaveGestureLibraryZ', 'UWaveGestureLibraryAll', 'TwoPatterns']

In [95]:
dataunique = ['Beef', 'Car', 'CBF', 'Coffee','DiatomSizeReduction']

In [133]:
from sklearn.utils import resample


features_train, features_test, target_train, target_test = load_data('CBF')

class BaggingClassifier:
    def __init__(self, base_estimator, n_estimators=10, max_samples=1.0):
        self.base_estimator = base_estimator
        self.n_estimators = n_estimators
        self.max_samples = max_samples
        self.estimators = []

    def fit(self, X, y):
        for _ in range(self.n_estimators):
            # Amostragem com reposição dos dados
            X_subset, y_subset = resample(X, y, replace=True, n_samples=int(self.max_samples * len(X)))
            # Treinamento do classificador base
            estimator = self.base_estimator.fit(X_subset, y_subset)
            self.estimators.append(estimator)

    def predict(self, X):
        # Fazer previsões com cada classificador e retornar a maioria delas
        predictions = np.array([estimator.predict(X) for estimator in self.estimators])
        return np.mean(predictions, axis=0)


# Criar e treinar modelo Bagging usando árvore de decisão como base
base_estimator = select_model('exrf', random_state=123)
bagging_model = BaggingClassifier(base_estimator, n_estimators=10, max_samples=0.8)
bagging_model.fit(features_train, target_train)

# Fazer previsões
y_pred = bagging_model.predict(features_test)

# Avaliar desempenho
accuracy = accuracy_score(target_test, y_pred)
print("Accuracy:", accuracy)


Accuracy: 0.8711111111111111


In [134]:
for dataset_name in dataunique:
    Acc = []
    dataset_accuracies = []
    # Carregue os dados de treinamento e teste
    features_train, features_test, target_train, target_test = load_data(dataset_name)
    best_wavelet = choose_wavelet(features_train)

    trained_models, meta_classifier = train_with_meta_classifier(features_train, target_train, base_option='bagging_model', meta_option='rd', random_state=123, wavelet=best_wavelet)
    
    predictions = predict_with_meta_classifier(features_test, trained_models, meta_classifier, wavelet=best_wavelet)
    
    test_accuracy_meta = np.mean(predictions == target_test)
    
    dataset_accuracies.append(test_accuracy_meta)
    
    print(f"Acurácia {dataset_name}: {test_accuracy_meta}")
    
    Acc.append({'Dataset Name': dataset_name, 'Accuracy': test_accuracy_meta})

accuracy_df = pd.DataFrame(Acc)


Training Base Models: 100%|[31m##########[0m| 5/5 [00:44<00:00,  8.87s/it]
Testing Instances: 100%|[32m##########[0m| 30/30 [00:04<00:00,  6.62it/s]


Acurácia Beef: 0.7


Training Base Models: 100%|[31m##########[0m| 5/5 [01:30<00:00, 18.16s/it]
Testing Instances: 100%|[32m##########[0m| 60/60 [00:09<00:00,  6.48it/s]


Acurácia Car: 0.6666666666666666


Training Base Models: 100%|[31m##########[0m| 5/5 [00:44<00:00,  8.81s/it]
Testing Instances: 100%|[32m##########[0m| 900/900 [02:14<00:00,  6.69it/s]


Acurácia CBF: 0.9011111111111111


Training Base Models: 100%|[31m##########[0m| 5/5 [00:41<00:00,  8.29s/it]
Testing Instances: 100%|[32m##########[0m| 28/28 [00:04<00:00,  6.88it/s]


Acurácia Coffee: 1.0


Training Base Models: 100%|[31m##########[0m| 5/5 [00:23<00:00,  4.73s/it]
Testing Instances: 100%|[32m##########[0m| 306/306 [00:45<00:00,  6.69it/s]

Acurácia DiatomSizeReduction: 0.9411764705882353





### Meta-Classificador - Hipótese

In [140]:
from sklearn.svm import SVC
from sklearn.linear_model import RidgeClassifierCV
from sklearn.metrics import accuracy_score
import numpy as np

# Carregar o conjunto de dados
features_train, features_test, target_train, target_test = load_data('CBF')

# Treinar SVMs com diferentes kernels
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
svm_models = {}
for kernel in kernels:
    svm_models[kernel] = SVC(kernel=kernel)
    svm_models[kernel].fit(features_train, target_train)

# Obter previsões e pontuações de confiança dos SVMs
predictions = {}
scores = {}
for kernel in kernels:
    predictions[kernel] = svm_models[kernel].predict(features_test)
    scores[kernel] = svm_models[kernel].decision_function(features_test)

# Treinar o Ridge Classifier usando as pontuações de decisão dos modelos SVM nos dados de treinamento
ridge_classifier = RidgeClassifierCV()
ridge_classifier.fit(np.column_stack(list(scores.values())), target_train)

# Obter as pontuações de decisão dos modelos SVM nos dados de teste
scores_test = {}
for kernel in kernels:
    scores_test[kernel] = svm_models[kernel].decision_function(features_test)

# Fazer a classificação final usando as pontuações de decisão dos modelos SVM nos dados de teste
y_pred = ridge_classifier.predict(np.column_stack(list(scores_test.values())))

# Avaliar o desempenho do modelo
print('Acurácia:', accuracy_score(target_test, y_pred))



ValueError: Found input variables with inconsistent numbers of samples: [900, 30]

In [138]:
print("Número de amostras em features_train:", len(features_train))
print("Número de amostras em target_train:", len(target_train))
print("Número de amostras em features_test:", len(features_test))
print("Número de amostras em target_test:", len(target_test))

Número de amostras em features_train: 30
Número de amostras em target_train: 30
Número de amostras em features_test: 900
Número de amostras em target_test: 900
