### Bibliotecas

In [None]:
"""
Librarys:
Python - 3.11.7
Numba - 0.58.1
Pyarrow - 14.0.0
Pandas - 1.5.3
Numpy - 1.26.3
Pywt - 1.5.0
Aeon - 0.7.0
Scipy - 1.11.4
Matplotlib - 3.8.0
Sklearn - 1.2.2
Tqdm - 4.66.1
tsfresh - 0.20.1
tslearn - 0.6.3
Feature-engine - 1.7

%pip install aeon
%pip install tsfresh
%pip install tslearn
%pip install Matplotlib
%pip install Sklearn
%pip install pywavelets
%pip install Numba
%pip install Pyarrow
%pip install Pandas
%pip install Numpy
%pip install Scipy
%pip install Tqdm
%pip install feature-engine
"""

In [3]:
import pandas as pd
import numpy as np
import pywt

from aeon.datasets import load_classification
from aeon.datasets.tsc_data_lists import univariate_equal_length
from aeon.classification.interval_based import SupervisedTimeSeriesForest, TimeSeriesForestClassifier, DrCIFClassifier
from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier
from aeon.classification.base import BaseClassifier

from sklearn.svm import SVC
from sklearn.linear_model import RidgeClassifierCV, LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier

from feature_engine.encoding import LabelEncoder

from tsfresh import extract_features
from tsfresh.feature_extraction import MinimalFCParameters

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import PiecewiseAggregateApproximation, SymbolicAggregateApproximation

from scipy.fftpack import fft
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

### Função para carregar os dados do repositório UCR

In [None]:
def load_data(dataset):
    # LabelEncoder para labels alvo
    le = LabelEncoder()

    # Carregar conjunto de dados do repositório UCR
    X_train, y_train = load_classification(dataset, split="TRAIN")
    X_test, y_test = load_classification(dataset, split="test")

    # Formatar o conjunto de dados para 2D
    features_train = X_train.reshape(X_train.shape[0], -1)
    features_test = X_test.reshape(X_test.shape[0], -1)

    # Ajustar e transformar as labels alvo
    target_train = le.fit_transform(y_train)
    target_test = le.transform(y_test)

    return features_train, features_test, target_train, target_test


### Função das diferentes representações de dados

In [4]:
def choose_wavelet(X):
    min_variance = float('inf')
    best_wavelet = None
    candidate_wavelets = ['db1', 'db2', 'db3', 'db4', 'db5', 'db6', 'db7', 'db8', 'db9']

    for wavelet_type in candidate_wavelets:
        _, coeffs_cD = pywt.dwt(X, wavelet_type, axis=1)
        total_variance = np.var(coeffs_cD)

        if total_variance < min_variance:
            min_variance = total_variance
            best_wavelet = wavelet_type
    return str(best_wavelet)


In [5]:
def transform_data_math(X, wavelet='db1'):
    n_sax_symbols = int(X.shape[1] / 4)
    n_paa_segments = int(X.shape[1] / 4)

    # FFT Transformation
    X_fft = np.abs(fft(X, axis=1))
    
    # DWT Transformation
    coeffs_cA, coeffs_cD = pywt.dwt(X, wavelet=wavelet, axis=1, mode='constant')
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    # PAA Transformation
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)
    df_PAA = pd.DataFrame(X_paa)
    
    # SAX Transformation
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)
    df_SAX = pd.DataFrame(X_sax)

    # Original Data
    data_X = TimeSeriesScalerMeanVariance().fit_transform(X)
    data_X.resize(data_X.shape[0], data_X.shape[1])
    df_X = pd.DataFrame(data_X)

    # FFT Data
    data_FFT = TimeSeriesScalerMeanVariance().fit_transform(X_fft)
    data_FFT.resize(data_FFT.shape[0], data_FFT.shape[1])
    df_FFT = pd.DataFrame(data_FFT)

    # DWT Data
    data_DWT = TimeSeriesScalerMeanVariance().fit_transform(X_dwt)
    data_DWT.resize(data_DWT.shape[0], data_DWT.shape[1])
    df_DWT = pd.DataFrame(data_DWT)

    # Adding IDs to DataFrames
    df_X['id'] = df_FFT['id'] = df_DWT['id'] = df_PAA['id'] = df_SAX['id'] = range(len(df_X))
    
    # Merging all DataFrames on 'id'
    final_df = df_X.merge(df_FFT, on='id', suffixes=('_X', '_FFT'))
    final_df = final_df.merge(df_DWT, on='id', suffixes=('', '_DWT'))
    final_df = final_df.merge(df_PAA, on='id', suffixes=('', '_PAA'))
    final_df = final_df.merge(df_SAX, on='id', suffixes=('', '_SAX'))
    
    
    return final_df

### Modelo de Seleção dinâmica

In [None]:
class CombinedDecisionForest:
    def __init__(self):
        self.clf1 = RandomForestClassifier()
        self.clf2 = ExtraTreesClassifier()
        self.clf3 = SupervisedTimeSeriesForest()
        self.clf4 = TimeSeriesForestClassifier()
        self.meta_clf = RidgeClassifierCV(alphas=np.logspace(-3,3,10))
        self.classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]
        self.clf_weights = None

    def fit(self, X, y):
        for clf in self.classifiers:
            clf.fit(X, y)

        train_preds = [clf.predict(X) for clf in self.classifiers]
        accuracies = [accuracy_score(y, preds) for preds in train_preds]

        self.clf_weights = np.array(accuracies)
        self.clf_weights /= np.sum(self.clf_weights)

    def predict_proba(self, X):
        probs = [clf.predict_proba(X) for clf in self.classifiers]
        combined_probs = np.sum([prob * weight for prob, weight in zip(probs, self.clf_weights)], axis=0)
        return combined_probs / np.sum(combined_probs, axis=1, keepdims=True)

    def predict(self, X):
        proba = self.predict_proba(X)
        return np.argmax(proba, axis=1)

### Treino & Predição

In [None]:
accuracy_data = []
for dataset_name in univariate_equal_length:
    X_train, X_test, target_train, target_test = load_data(dataset_name)
    
    #Training
    best_wl = choose_wavelet(X_train)
    features_train = transform_series(X_train, best_wl)
    
    #Testing
    features_test = transform_series(X_test, best_wl)
    
    feature_extractor = CombinedDecisionForest()
    feature_extractor.fit(features_train, target_train)
        
    train_features = feature_extractor.predict_proba(features_train)
    test_features = feature_extractor.predict_proba(features_test)
        
    meta_model = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))

    meta_model.fit(train_features, target_train)
        
    predictions = meta_model.predict(test_features)
        
    accuracy = accuracy_score(target_test, predictions)
        
    accuracy_data.append({'Dataset Name': dataset_name, 'Accuracy': accuracy})
    
    print(f"Acurácia {dataset_name}: {accuracy}")
    
accuracy_df = pd.DataFrame(accuracy_data)