### Bibliotecas

In [None]:
"""
%pip install aeon
%pip install tsfresh
%pip install tslearn
%pip install tensorflow
%pip install keras

"""

In [1]:
import pandas as pd
import numpy as np

from aeon.datasets import load_classification
from aeon.datasets.tsc_data_lists import univariate_equal_length
from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier, ShapeDTW, ElasticEnsemble

from tsfresh import extract_features, select_features
from tsfresh.feature_extraction import MinimalFCParameters

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import PiecewiseAggregateApproximation, SymbolicAggregateApproximation

import pywt

from sklearn.metrics import accuracy_score

from sklearn.model_selection import LeaveOneOut
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import RidgeClassifierCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, ExtraTreesClassifier
from sklearn.naive_bayes import GaussianNB

from scipy.fftpack import fft
from numba import jit
from tqdm import tqdm
import timeit
from datetime import timedelta

import warnings
warnings.filterwarnings("ignore")

### Time Series Classifier Dynamic Series Representation

In [10]:
class TimeSeriesClassifier:
    def __init__(self, random_state=42):
        self.random_state = random_state
        self.trained_models = {}
        """_summary_

        Returns:
            _type_: _description_
        """
    @jit
    def transform_data_math(X):
        n_sax_symbols = int(X.shape[1] / 4)
        n_paa_segments = int(X.shape[1] / 4)
    
        X_fft = np.abs(fft(X, axis=1))
    
        coeffs_cA, coeffs_cD = pywt.dwt(X, 'db1', axis=1)
        X_dwt = np.hstack((coeffs_cA, coeffs_cD))
    
        paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
        X_paa_ = paa.inverse_transform(paa.fit_transform(X))
        X_paa = X_paa_.reshape(X_paa_.shape[0], -1)
        stats_PAA = np.hstack([np.mean(X_paa, axis=1).reshape(-1,1),
                               np.std(X_paa, axis=1).reshape(-1,1),
                               np.max(X_paa, axis=1).reshape(-1,1),
                               np.min(X_paa, axis=1).reshape(-1,1),
                               ])
    
        sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
        X_sax_ = sax.inverse_transform(sax.fit_transform(X))
        X_sax = X_sax_.reshape(X_sax_.shape[0], -1)
        stats_SAX = np.hstack([np.mean(X_sax, axis=1).reshape(-1,1),
                               np.std(X_sax, axis=1).reshape(-1,1),
                               np.max(X_sax, axis=1).reshape(-1,1),
                               np.min(X_sax, axis=1).reshape(-1,1),
                               ])
    
        data_X = TimeSeriesScalerMeanVariance().fit_transform(X)
        data_X.resize(data_X.shape[0], data_X.shape[1])
        stats_X = np.hstack([np.mean(data_X, axis=1).reshape(-1,1),
                             np.std(data_X, axis=1).reshape(-1,1),
                             np.max(data_X, axis=1).reshape(-1,1),
                             np.min(data_X, axis=1).reshape(-1,1),
                             ])
    
        data_FFT = TimeSeriesScalerMeanVariance().fit_transform(X_fft)
        data_FFT.resize(data_FFT.shape[0], data_FFT.shape[1])
        stats_FFT = np.hstack([np.mean(data_FFT, axis=1).reshape(-1,1),
                               np.std(data_FFT, axis=1).reshape(-1,1),
                               np.max(data_FFT, axis=1).reshape(-1,1),
                               np.min(data_FFT, axis=1).reshape(-1,1),
                               ])
    
        data_DWT = TimeSeriesScalerMeanVariance().fit_transform(X_dwt)
        data_DWT.resize(data_DWT.shape[0], data_DWT.shape[1])
        stats_DWT = np.hstack([np.mean(data_DWT, axis=1).reshape(-1,1),
                               np.std(data_DWT, axis=1).reshape(-1,1),
                               np.max(data_DWT, axis=1).reshape(-1,1),
                               np.min(data_DWT, axis=1).reshape(-1,1),
                               ])
    
        return {
            "TS": np.hstack([data_X, stats_X]),
            "FFT": np.hstack([data_FFT, stats_FFT]),
            "DWT": np.hstack([data_DWT, stats_DWT]),
            "PAA": np.hstack([X_paa, stats_PAA]),
            "SAX": np.hstack([X_sax, stats_SAX])
        }
    
    @jit
    def select_model(option, random_state):
        if option == '1nn':
            return KNeighborsTimeSeriesClassifier(distance='euclidean', n_neighbors=1, n_jobs=-1)
        elif option == '3nn':
            return KNeighborsTimeSeriesClassifier(distance='dtw', n_neighbors=3, n_jobs=-1)
        elif option == 'svm':
            return SVC(C = 100, gamma=0.01, kernel='linear', probability=True)
        elif option == 'gbc':
            return GradientBoostingClassifier(n_estimators=5, random_state=random_state)
        elif option == 'nb':
            return GaussianNB()
        elif option == 'shape':
            return ShapeDTW(n_neighbors=1)
        elif option == 'ee':
            return ElasticEnsemble(n_jobs=-1,
                                   random_state=random_state,
                                   majority_vote=True)
        elif option == 'exrf':
            return ExtraTreesClassifier(n_estimators=200,
                                        criterion="entropy",
                                        class_weight="balanced",
                                        max_features="sqrt",
                                        n_jobs=-1,
                                        random_state=None)
        elif option == 'rd':
            return RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
        else:
            return RandomForestClassifier(n_estimators=200,
                                          criterion="gini",
                                          class_weight="balanced_subsample",
                                          max_features="sqrt",
                                          n_jobs=-1,
                                          random_state=None)
    
    @jit
    def train_with_meta_classifier(X_train, y_train, base_option='random_forest', meta_option='1nn', random_state=42):
        trained_models = {}  # Salvar modelos treinados para cada transformação
        X_train_transformed = TimeSeriesClassifier.transform_data_math(X_train)  # Transformar todo o conjunto de treino
        loo = LeaveOneOut()
        num_classes = len(np.unique(y_train))  # Número de classes
    
        # Treinar um modelo para cada transformação e salvar no dicionário
        for rep, X_trans in tqdm(X_train_transformed.items(), ascii=True, colour='red', desc="Training Models"):
            model = TimeSeriesClassifier.select_model(base_option, random_state)
            scores = []
            for train_index, _ in loo.split(X_trans):
                model.fit(X_trans[train_index], y_train[train_index])
                score = model.score(X_trans[train_index], y_train[train_index])  # Score do modelo nos dados de treino
                scores.append(score)
            avg_score = np.mean(scores)
            trained_models[rep] = model  # Salvar o modelo treinado
    
        # Preparar dados para o meta-classificador
        meta_features = np.zeros((X_train.shape[0], num_classes))  # Inicializar um vetor para armazenar as somas de probabilidades para cada classe
        for i in range(X_train.shape[0]):
            for rep, model in trained_models.items():
                proba = model.predict_proba(X_train_transformed[rep][i].reshape(1, -1))
                meta_features[i] += proba.flatten()  # Adicione as probabilidades para cada classe
    
        # Treinar o meta-classificador
        meta_classifier = TimeSeriesClassifier.select_model(meta_option, random_state)
        meta_classifier.fit(meta_features, y_train)
    
        return trained_models, meta_classifier
    
    @jit
    def predict_with_meta_classifier(X_test, trained_base_models, trained_meta_classifier):
        predictions = []
        meta_features_test = []  # Inicialize uma lista para armazenar todos os meta-recursos dos dados de teste
    
        for i in tqdm(range(len(X_test)), ascii=True, colour='green', desc="Predict"):
            x_instance = X_test[i].reshape(1, -1)
            x_transformed = TimeSeriesClassifier.transform_data_math(x_instance)
    
            instance_features = np.zeros(trained_meta_classifier.n_features_in_)  # Inicialize um vetor para armazenar as características do meta-classificador
            for rep, model in trained_base_models.items():
                proba = model.predict_proba(x_transformed[rep][0].reshape(1, -1))
                instance_features += proba.flatten()  # Adicione as probabilidades para cada classe
    
            meta_feature = np.array(instance_features).reshape(1, -1)
            predictions.append(trained_meta_classifier.predict(meta_feature)[0])  # Adicionar a previsão à lista de previsões
    
            meta_features_test.append(meta_feature.flatten())  # Adicionar meta-recursos da instância atual à lista
    
        # Converter a lista de meta-recursos dos dados de teste em um array numpy
        meta_features_test = np.array(meta_features_test)
            
        return predictions



### Testando um único modelo - Random Forest como extrator e SVM como meta-classificador

In [11]:
dataset_quali_list = [ 'Beef', 'Car', 'CBF', 'Coffee', 'DiatomSizeReduction', 'ECG200', 'ECGFiveDays', 'FaceFour','GunPoint', 'Lightning2', 'Lightning7', 'MedicalImages', 'MoteStrain', 'OliveOil', 'SonyAIBORobotSurface1','SonyAIBORobotSurface2', 'SyntheticControl', 'Trace', 'TwoPatterns']
dataset_full_list = ['MixedShapesRegularTrain','SmallKitchenAppliances','ProximalPhalanxOutlineCorrect','WordSynonyms', 'RefrigerationDevices','CinCECGTorso','ChlorineConcentration','ToeSegmentation1','TwoLeadECG','ProximalPhalanxTW','WormsTwoClass','DistalPhalanxOutlineCorrect','InsectWingbeatSound','NonInvasiveFetalECGThorax2','CricketX','Haptics','EOGVerticalSignal','MixedShapesSmallTrain','SemgHandGenderCh2','ToeSegmentation2','NonInvasiveFetalECGThorax1','FreezerSmallTrain','OSULeaf','HandOutlines','DistalPhalanxOutlineAgeGroup','FreezerRegularTrain']
rapidos = ['SwedishLeaf', 'ProximalPhalanxOutlineAgeGroup', 'GunPointOldVersusYoung', 'Wine', 'Yoga', 'ArrowHead', 'InsectEPGSmallTrain','PowerCons','InsectEPGRegularTrain','GunPointMaleVersusFemale','ItalyPowerDemand','BME','Meat','Earthquakes','BirdChicken','BeetleFly','ACSF1']
problematicos = ['Crop','EthanolLevel','ElectricDevices','FordB','ShapesAll','StarLightCurves','Phoneme', 'Computers','InlineSkate','PigAirwayPressure', 'PigCVP','FordA','MedicalImages','PigArtPressure', 'UWaveGestureLibraryX','UWaveGestureLibraryY', 'UWaveGestureLibraryZ', 'UWaveGestureLibraryAll', 'TwoPatterns']

In [12]:
accuracy_data = []

for dataset_name in dataset_quali_list:
    X_train, y_train = load_classification(dataset_name, split="TRAIN")
    X_test, y_test = load_classification(dataset_name, split="test")
    
    # Achatando os dados para 2D, pois alguns algoritmos esperam 2D
    X_train_flat = X_train.reshape(X_train.shape[0], -1)
    X_test_flat = X_test.reshape(X_test.shape[0], -1)
    
    dataset_accuracies = []
    trained_base_models, meta_classifier = TimeSeriesClassifier.train_with_meta_classifier(X_train_flat, y_train, base_option='1nn', meta_option='rd')
    predictions_test_meta = TimeSeriesClassifier.predict_with_meta_classifier(X_test_flat, trained_base_models, meta_classifier)
    test_accuracy_meta = np.mean(predictions_test_meta == y_test)
    dataset_accuracies.append(test_accuracy_meta)

    print(f"Acurácia {dataset_name}: {test_accuracy_meta}")
    accuracy_data.append({'Dataset Name': dataset_name, 'Accuracy': test_accuracy_meta})

accuracy_df = pd.DataFrame(accuracy_data)

Training Models: 100%|[31m##########[0m| 5/5 [00:01<00:00,  4.83it/s]
Predict: 100%|[32m##########[0m| 30/30 [00:00<00:00, 119.96it/s]


Acurácia Beef: 0.6666666666666666


Training Models: 100%|[31m##########[0m| 5/5 [00:01<00:00,  3.53it/s]
Predict: 100%|[32m##########[0m| 60/60 [00:00<00:00, 123.08it/s]


Acurácia Car: 0.7333333333333333


Training Models: 100%|[31m##########[0m| 5/5 [00:00<00:00, 16.79it/s]
Predict: 100%|[32m##########[0m| 900/900 [00:04<00:00, 204.67it/s]


Acurácia CBF: 0.8822222222222222


Training Models: 100%|[31m##########[0m| 5/5 [00:00<00:00, 16.82it/s]
Predict: 100%|[32m##########[0m| 28/28 [00:00<00:00, 169.71it/s]


Acurácia Coffee: 1.0


Training Models: 100%|[31m##########[0m| 5/5 [00:00<00:00, 44.27it/s]
Predict: 100%|[32m##########[0m| 306/306 [00:01<00:00, 162.38it/s]


Acurácia DiatomSizeReduction: 0.934640522875817


Training Models: 100%|[31m##########[0m| 5/5 [00:04<00:00,  1.00it/s]
Predict: 100%|[32m##########[0m| 100/100 [00:00<00:00, 186.53it/s]


Acurácia ECG200: 0.87


Training Models: 100%|[31m##########[0m| 5/5 [00:00<00:00, 27.22it/s]
Predict: 100%|[32m##########[0m| 861/861 [00:04<00:00, 203.13it/s]


Acurácia ECGFiveDays: 0.8536585365853658


Training Models: 100%|[31m##########[0m| 5/5 [00:00<00:00, 23.03it/s]
Predict: 100%|[32m##########[0m| 88/88 [00:00<00:00, 146.57it/s]


Acurácia FaceFour: 0.7954545454545454


Training Models: 100%|[31m##########[0m| 5/5 [00:00<00:00,  5.90it/s]
Predict: 100%|[32m##########[0m| 150/150 [00:00<00:00, 202.94it/s]


Acurácia GunPoint: 0.9133333333333333


Training Models: 100%|[31m##########[0m| 5/5 [00:01<00:00,  3.51it/s]
Predict: 100%|[32m##########[0m| 61/61 [00:00<00:00, 117.77it/s]


Acurácia Lightning2: 0.819672131147541


Training Models: 100%|[31m##########[0m| 5/5 [00:01<00:00,  2.56it/s]
Predict: 100%|[32m##########[0m| 73/73 [00:00<00:00, 164.76it/s]


Acurácia Lightning7: 0.6027397260273972


Training Models: 100%|[31m##########[0m| 5/5 [06:27<00:00, 77.44s/it]
Predict: 100%|[32m##########[0m| 760/760 [00:11<00:00, 65.39it/s]


Acurácia MedicalImages: 0.6986842105263158


Training Models: 100%|[31m##########[0m| 5/5 [00:00<00:00, 10.20it/s]
Predict: 100%|[32m##########[0m| 1252/1252 [00:14<00:00, 87.15it/s] 


Acurácia MoteStrain: 0.8490415335463258


Training Models: 100%|[31m##########[0m| 5/5 [00:00<00:00,  6.00it/s]
Predict: 100%|[32m##########[0m| 30/30 [00:00<00:00, 46.51it/s]


Acurácia OliveOil: 0.8666666666666667


Training Models: 100%|[31m##########[0m| 5/5 [00:00<00:00, 12.26it/s]
Predict: 100%|[32m##########[0m| 601/601 [00:06<00:00, 87.61it/s] 


Acurácia SonyAIBORobotSurface1: 0.7104825291181365


Training Models: 100%|[31m##########[0m| 5/5 [00:00<00:00,  7.39it/s]
Predict: 100%|[32m##########[0m| 953/953 [00:11<00:00, 82.29it/s] 


Acurácia SonyAIBORobotSurface2: 0.8730325288562435


Training Models: 100%|[31m##########[0m| 5/5 [02:05<00:00, 25.04s/it]
Predict: 100%|[32m##########[0m| 300/300 [00:01<00:00, 175.29it/s]


Acurácia SyntheticControl: 0.9633333333333334


Training Models: 100%|[31m##########[0m| 5/5 [00:05<00:00,  1.08s/it]
Predict: 100%|[32m##########[0m| 100/100 [00:00<00:00, 144.72it/s]


Acurácia Trace: 0.79


Training Models: 100%|[31m##########[0m| 5/5 [1:07:17<00:00, 807.57s/it]
Predict: 100%|[32m##########[0m| 4000/4000 [00:36<00:00, 111.07it/s]

Acurácia TwoPatterns: 0.9315





In [16]:
accuracy_df

Unnamed: 0,Dataset Name,Accuracy
0,Beef,0.666667
1,Car,0.733333
2,CBF,0.882222
3,Coffee,1.0
4,DiatomSizeReduction,0.934641
5,ECG200,0.87
6,ECGFiveDays,0.853659
7,FaceFour,0.795455
8,GunPoint,0.913333
9,Lightning2,0.819672


In [14]:
#accuracy_df.to_csv('model_votingCLF+NN.csv', index=False)

### Gráfico das diferenças de dados

In [15]:
import matplotlib.pyplot as plt

y1 = y_hat  # depois da transformação
y2 = y_test

z1 = y_hat_ #antes da transformação
z2 = y_test

#suavizar os dados do gráfico
window_size = 15
y1_smoothed = pd.Series(y1).rolling(window=window_size).mean()
y2_smoothed = pd.Series(y2).rolling(window=window_size).mean()
z1_smoothed = pd.Series(z1).rolling(window=window_size).mean()
z2_smoothed = pd.Series(z2).rolling(window=window_size).mean()

fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(15, 5), layout='constrained')

# Conjunto de validação do classificador
axs[0].set_title('Antes da transformação')
axs[0].plot(z1_smoothed, label='Treino')
axs[0].plot(z2_smoothed, label='Teste')
axs[0].set_xlabel('Tempo (s)')
axs[0].set_ylabel('Treino')
axs[0].grid(True)

# Conjunto de validação do meta-classificador
axs[1].set_title('Depois da transformação')
axs[1].plot(y1_smoothed, label='Treino')
axs[1].plot(y2_smoothed, label='Teste')
axs[1].set_xlabel('Tempo (s)')
axs[1].set_ylabel('Treino')
axs[1].grid(True)

plt.legend()
plt.show()



NameError: name 'y_hat' is not defined

In [None]:
w1 = y_hat  # meta-classificador
w2 = y_hat_ #classificação

# Suavizar os dados do gráfico
window_size = 15
w1_smoothed = pd.Series(w1).rolling(window=window_size).mean()
w2_smoothed = pd.Series(w2).rolling(window=window_size).mean()

# Plotar os dados
plt.figure(figsize=(10, 6))
plt.plot(w1_smoothed, label='w1 (Classificação usando meta-caracteristicas)')
plt.plot(w2_smoothed, label='w2 (classificação utilizando dados brutos)')
plt.xlabel('Tempo (s)')
plt.ylabel('Valores suavizados')
plt.title('Comparação entre os resultados de um SVM')
plt.legend()
plt.grid(True)
plt.show()


### Treino em loop de todas as opções de classificadores disponiveis no Select Model.

In [None]:
algos = ['1nn', '3nn', 'svm', 'nb', 'gbc', 'ee', 'shape', 'rf', 'rd']
for algo in algos:

    print(f'Meta-classificador com modelo extrator {algo.upper()}')

    # Training
    try:
        trained_base_models, meta_classifier = train_with_meta_classifier(X_train, y_train, base_option='svm', meta_option=algo)
        # Testing
        predictions_test_meta = predict_with_meta_classifier(X_test, trained_base_models, meta_classifier)
        test_accuracy_meta = np.mean(predictions_test_meta == y_test)

        print(f'Acurácia do teste usando o meta-classificador com modelo extrator {algo}: {test_accuracy_meta}')
    except Exception as e:
        print(f"Ocorreu um erro no teste com o {algo}: {e}")
    print("-------------------------------")