### Bibliotecas

In [13]:
"""%pip install aeon
%pip install tsfresh
%pip install tslearn
%pip install tensorflow
%pip install keras
%pip install pywavelets"""

'%pip install aeon\n%pip install tsfresh\n%pip install tslearn\n%pip install tensorflow\n%pip install keras\n%pip install pywavelets'

In [14]:
import pandas as pd
import numpy as np

from aeon.datasets import load_classification
from aeon.datasets.tsc_data_lists import univariate_equal_length
from aeon.classification.interval_based import SupervisedTimeSeriesForest, TimeSeriesForestClassifier

from tsfresh import extract_features
from tsfresh.feature_extraction import MinimalFCParameters

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import PiecewiseAggregateApproximation, SymbolicAggregateApproximation

import pywt
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from scipy.fftpack import fft
from numba import jit
import timeit

import warnings
warnings.filterwarnings("ignore")

### Loading data

In [15]:
@staticmethod
def load_data(dataset):
    # LabelEncoder para labels alvo
    le = LabelEncoder()

    # Carregar conjunto de dados do repositório UCR
    X_train, y_train = load_classification(dataset, split="TRAIN")
    X_test, y_test = load_classification(dataset, split="test")

    # Formatar o conjunto de dados para 2D
    features_train = X_train.reshape(X_train.shape[0], -1)
    features_test = X_test.reshape(X_test.shape[0], -1)

    # Ajustar e transformar as labels alvo
    target_train = le.fit_transform(y_train)
    target_test = le.transform(y_test)

    return features_train, features_test, target_train, target_test


### Function transform data

In [16]:
def choose_wavelet(X):
    min_variance = float('inf')
    best_wavelet = None
    candidate_wavelets = ['db1', 'db2', 'db3', 'db4', 'db5', 'db6', 'db7', 'db8', 'db9']

    for wavelet_type in candidate_wavelets:
        _, coeffs_cD = pywt.dwt(X, wavelet_type, axis=1)
        total_variance = np.var(coeffs_cD)

        if total_variance < min_variance:
            min_variance = total_variance
            best_wavelet = wavelet_type
    return str(best_wavelet)


@jit
def transform_data_math(X, wavelet):
    n_sax_symbols = int(X.shape[1] / 4)
    n_paa_segments = int(X.shape[1] / 4)

    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, wavelet=wavelet, axis=1, mode='constant')
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)
    df_PAA = pd.DataFrame(X_paa)
    df_PAA['id'] = range(len(df_PAA))
    
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)
    df_SAX = pd.DataFrame(X_sax)
    df_SAX['id'] = range(len(df_SAX))

    data_X = TimeSeriesScalerMeanVariance().fit_transform(X)
    data_X.resize(data_X.shape[0], data_X.shape[1])
    df_X = pd.DataFrame(data_X)
    df_X['id'] = range(len(df_X))

    data_FFT = TimeSeriesScalerMeanVariance().fit_transform(X_fft)
    data_FFT.resize(data_FFT.shape[0], data_FFT.shape[1])
    df_FFT = pd.DataFrame(data_FFT)
    df_FFT['id'] = range(len(df_FFT))

    data_DWT = TimeSeriesScalerMeanVariance().fit_transform(X_dwt)
    data_DWT.resize(data_DWT.shape[0], data_DWT.shape[1])
    df_DWT = pd.DataFrame(data_DWT)
    df_DWT['id'] = range(len(df_DWT))
    
    extracted_features_dict = {}

    # Loop through each transformed DataFrame and extract features
    for name, df in [('X', df_X), ('FFT', df_FFT), ('DWT', df_DWT), ('PAA', df_PAA), ('SAX', df_SAX)]:
        features = extract_features(df, default_fc_parameters=MinimalFCParameters(), disable_progressbar=True, column_id='id')
        extracted_features_dict[name] = features
    return extracted_features_dict

### AmazonForestClassifier

In [17]:
class CombinedDecisionForest:
    def __init__(self):
        self.clf1 = RandomForestClassifier()
        self.clf2 = ExtraTreesClassifier()
        self.clf3 = SupervisedTimeSeriesForest()
        self.clf4 = TimeSeriesForestClassifier()
        self.clf_weights = None

    def fit(self, X, y):
        classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]  # Lista de classificadores

        for clf in classifiers:
            clf.fit(X, y) # Calculando os pesos com base na acurácia
        train_preds = [clf.predict(X) for clf in classifiers]
        accuracies = [accuracy_score(y, preds) for preds in train_preds]

        self.clf_weights = np.array(accuracies) ** 4
        self.clf_weights /= np.sum(self.clf_weights) # Normalização dos pesos

    def predict_proba(self, X):
        classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]  # Lista de classificadores

        probs = [clf.predict_proba(X) for clf in classifiers]
        combined_probs = np.sum([prob * weight for prob, weight in zip(probs, self.clf_weights)], axis=0)

        return combined_probs / np.sum(combined_probs, axis=1, keepdims=True)

    def predict(self, X):
        proba = self.predict_proba(X)
        return np.argmax(proba, axis=1)

### Train/Predict

In [18]:
@jit
def train_with_meta_classifier(X_train, y_train, wavelet=None):
    total_time = 0
    start = timeit.default_timer()
    extracted_features_dict = transform_data_math(X_train, wavelet=wavelet)  # Get extracted features
    stop = timeit.default_timer()
    total_time += stop - start
    print('Extraction train data (seconds): ', total_time) # Get extracted features
    # Prepare data for the meta-classifier
    meta_features = np.hstack([extracted_features.values for extracted_features in extracted_features_dict.values()])
    # Train a meta-classifier
    meta_classifier = CombinedDecisionForest()
    meta_classifier.fit(meta_features, y_train)

    return meta_classifier


In [19]:
@jit
def predict_with_meta_classifier(X_test, trained_meta_classifier, wavelet=None):
    total_time = 0
    start = timeit.default_timer()
    extracted_features_dict = transform_data_math(X_test, wavelet=wavelet)  # Get extracted features
    stop = timeit.default_timer()
    total_time += stop - start
    print('Extraction test data (seconds): ', total_time)
    # Prepare data for the meta-classifier
    meta_features_test = np.hstack([extracted_features.values for extracted_features in extracted_features_dict.values()])
    # Train a meta-classifier
    predictions = trained_meta_classifier.predict(meta_features_test)

    return predictions


### Validando o modelo

In [20]:
dataset_quali_list = ['Adiac', 'Beef', 'Car', 'CBF', 'Coffee', 'DiatomSizeReduction', 'ECG200', 'ECGFiveDays', 'FaceFour','GunPoint', 'Lightning2', 'Lightning7', 'MedicalImages', 'MoteStrain', 'OliveOil', 'SonyAIBORobotSurface1','SonyAIBORobotSurface2', 'SyntheticControl', 'Trace', 'TwoPatterns']
dataset_full_list = ['Worms','FaceAll','SemgHandMovementCh2','Herring','GunPointAgeSpan','SmoothSubspace','SemgHandSubjectCh2','LargeKitchenAppliances','Plane','Fish','ScreenType','PhalangesOutlinesCorrect','CricketZ','MiddlePhalanxOutlineAgeGroup','ECG5000','Chinatown','ShapeletSim','MiddlePhalanxTW','Symbols','EOGHorizontalSignal','Ham','UMD','HouseTwenty','MiddlePhalanxOutlineCorrect','Wafer','Rock','DistalPhalanxTW','CricketY','FacesUCR','FiftyWords','Mallat','Strawberry','SwedishLeaf','ProximalPhalanxOutlineAgeGroup','MixedShapesRegularTrain','SmallKitchenAppliances','GunPointOldVersusYoung','Wine','ProximalPhalanxOutlineCorrect','WordSynonyms', 'RefrigerationDevices','Yoga','CinCECGTorso','ChlorineConcentration','ArrowHead','ToeSegmentation1','TwoLeadECG','ProximalPhalanxTW','InsectEPGSmallTrain','WormsTwoClass','PowerCons','InsectEPGRegularTrain','GunPointMaleVersusFemale','DistalPhalanxOutlineCorrect','ItalyPowerDemand','InsectWingbeatSound','BME','NonInvasiveFetalECGThorax2','CricketX','Haptics','EOGVerticalSignal','MixedShapesSmallTrain','Meat','SemgHandGenderCh2','ToeSegmentation2','NonInvasiveFetalECGThorax1','FreezerSmallTrain','OSULeaf','Earthquakes','BirdChicken','HandOutlines','BeetleFly','ACSF1','DistalPhalanxOutlineAgeGroup','FreezerRegularTrain']
problematicos = ['Crop','EthanolLevel','ElectricDevices','FordB','ShapesAll','StarLightCurves','Phoneme', 'Computers','InlineSkate','PigAirwayPressure', 'PigCVP','FordA','MedicalImages','PigArtPressure', 'UWaveGestureLibraryX','UWaveGestureLibraryY', 'UWaveGestureLibraryZ', 'UWaveGestureLibraryAll', 'TwoPatterns']

In [21]:
accuracy_data = []
for dataset_name in dataset_quali_list:
    # Carregue os dados de treinamento e teste
    features_train, features_test, target_train, target_test = load_data(dataset_name)
    best_wavelet = choose_wavelet(features_train)

    meta_classifier = train_with_meta_classifier(features_train, target_train, wavelet=best_wavelet)
    
    predictions = predict_with_meta_classifier(features_test, meta_classifier, wavelet=best_wavelet)
    
    test_accuracy_meta = np.mean(predictions == target_test)
        
    accuracy_data.append({'Dataset Name': dataset_name, 'Accuracy': test_accuracy_meta})
    
    print(f"Accuracy {dataset_name}: {test_accuracy_meta}")
    
accuracy_df = pd.DataFrame(accuracy_data)

Extraction train data (seconds):  43.788181200041436
Extraction test data (seconds):  38.24764590000268
Accuracy Adiac: 0.7953964194373402
Extraction train data (seconds):  19.630803500069305
Extraction test data (seconds):  19.46398389991373
Accuracy Beef: 0.8333333333333334
Extraction train data (seconds):  26.701001000008546
Extraction test data (seconds):  26.299387399922125
Accuracy Car: 0.7333333333333333
Extraction train data (seconds):  15.778611799934879
Extraction test data (seconds):  53.63676190003753
Accuracy CBF: 0.9633333333333334
Extraction train data (seconds):  19.824045700021088
Extraction test data (seconds):  19.028037199983373
Accuracy Coffee: 1.0
Extraction train data (seconds):  18.19912780006416
Extraction test data (seconds):  51.445221600006334
Accuracy DiatomSizeReduction: 0.9509803921568627
Extraction train data (seconds):  17.75305020005908
Extraction test data (seconds):  17.657014700002037
Accuracy ECG200: 0.84
Extraction train data (seconds):  16.313168

In [24]:
accuracy_df

Unnamed: 0,Dataset Name,Accuracy
0,Adiac,0.795396
1,Beef,0.833333
2,Car,0.733333
3,CBF,0.963333
4,Coffee,1.0
5,DiatomSizeReduction,0.95098
6,ECG200,0.84
7,ECGFiveDays,0.998839
8,FaceFour,0.920455
9,GunPoint,0.946667


In [26]:
accuracy_df.to_csv('ResultsAmazonForest.csv')