### Bibliotecas

In [1]:
"""%pip install aeon
%pip install tsfresh
%pip install tslearn
%pip install tensorflow
%pip install keras
%pip install pywavelets"""

'%pip install aeon\n%pip install tsfresh\n%pip install tslearn\n%pip install tensorflow\n%pip install keras\n%pip install pywavelets'

In [2]:
import pandas as pd
import numpy as np

from aeon.datasets import load_classification
from aeon.datasets.tsc_data_lists import univariate_equal_length
from aeon.classification.interval_based import SupervisedTimeSeriesForest, TimeSeriesForestClassifier

from tsfresh import extract_features, select_features
from tsfresh.feature_extraction import MinimalFCParameters

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import PiecewiseAggregateApproximation, SymbolicAggregateApproximation

import pywt
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from scipy.fftpack import fft
from numba import jit
import timeit

import warnings
warnings.filterwarnings("ignore")

### Loading data

In [3]:
@staticmethod
def load_data(dataset):
    # LabelEncoder para labels alvo
    le = LabelEncoder()

    # Carregar conjunto de dados do repositório UCR
    X_train, y_train = load_classification(dataset, split="TRAIN")
    X_test, y_test = load_classification(dataset, split="test")

    # Formatar o conjunto de dados para 2D
    features_train = X_train.reshape(X_train.shape[0], -1)
    features_test = X_test.reshape(X_test.shape[0], -1)

    # Ajustar e transformar as labels alvo
    target_train = le.fit_transform(y_train)
    target_test = le.transform(y_test)

    return features_train, features_test, target_train, target_test


### Function transform data

In [4]:
def choose_wavelet(X):
    min_variance = float('inf')
    best_wavelet = None
    candidate_wavelets = ['db1', 'db2', 'db3', 'db4', 'db5', 'db6', 'db7', 'db8', 'db9']

    for wavelet_type in candidate_wavelets:
        _, coeffs_cD = pywt.dwt(X, wavelet_type, axis=1)
        total_variance = np.var(coeffs_cD)

        if total_variance < min_variance:
            min_variance = total_variance
            best_wavelet = wavelet_type
    return str(best_wavelet)


@jit
def transform_data_math(X, wavelet):
    n_sax_symbols = int(X.shape[1] / 4)
    n_paa_segments = int(X.shape[1] / 4)

    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, wavelet=wavelet, axis=1, mode='constant')
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)
    df_PAA = pd.DataFrame(X_paa)
    df_PAA['id'] = range(len(df_PAA))
    
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)
    df_SAX = pd.DataFrame(X_sax)
    df_SAX['id'] = range(len(df_SAX))

    data_X = TimeSeriesScalerMeanVariance().fit_transform(X)
    data_X.resize(data_X.shape[0], data_X.shape[1])
    df_X = pd.DataFrame(data_X)
    df_X['id'] = range(len(df_X))

    data_FFT = TimeSeriesScalerMeanVariance().fit_transform(X_fft)
    data_FFT.resize(data_FFT.shape[0], data_FFT.shape[1])
    df_FFT = pd.DataFrame(data_FFT)
    df_FFT['id'] = range(len(df_FFT))

    data_DWT = TimeSeriesScalerMeanVariance().fit_transform(X_dwt)
    data_DWT.resize(data_DWT.shape[0], data_DWT.shape[1])
    df_DWT = pd.DataFrame(data_DWT)
    df_DWT['id'] = range(len(df_DWT))
    
    extracted_features_dict = {}

    # Loop through each transformed DataFrame and extract features
    for name, df in [('X', df_X), ('FFT', df_FFT), ('DWT', df_DWT), ('PAA', df_PAA), ('SAX', df_SAX)]:
        features = extract_features(df, default_fc_parameters=MinimalFCParameters(), disable_progressbar=True, column_id='id')
        extracted_features_dict[name] = features
    return extracted_features_dict

In [5]:
features_train, features_test, target_train, target_test = load_data('Adiac')
wavelet = choose_wavelet(features_train)

laika = transform_data_math(features_train, wavelet)

### AmazonForestClassifier

In [5]:
class CombinedDecisionForest:
    def __init__(self):
        self.clf1 = RandomForestClassifier()
        self.clf2 = ExtraTreesClassifier()
        self.clf3 = SupervisedTimeSeriesForest()
        self.clf4 = TimeSeriesForestClassifier()
        self.classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]
        self.clf_weights = None

    def fit(self, X, y):
        for clf in self.classifiers:
            clf.fit(X, y)

        train_preds = [clf.predict(X) for clf in self.classifiers]
        accuracies = [accuracy_score(y, preds) for preds in train_preds]

        self.clf_weights = np.array(accuracies)
        self.clf_weights /= np.sum(self.clf_weights)

    def predict_proba(self, X):
        probs = [clf.predict_proba(X) for clf in self.classifiers]
        combined_probs = np.sum([prob * weight for prob, weight in zip(probs, self.clf_weights)], axis=0)
        return combined_probs / np.sum(combined_probs, axis=1, keepdims=True)

    def predict(self, X):
        proba = self.predict_proba(X)
        return np.argmax(proba, axis=1)

In [6]:
"""class CombinedDecisionForest:
    def __init__(self):
        self.clf1 = RandomForestClassifier()
        self.clf2 = ExtraTreesClassifier()
        self.clf3 = SupervisedTimeSeriesForest()
        self.clf4 = TimeSeriesForestClassifier()
        self.clf_weights = None

    def fit(self, X, y):
        classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]  # Lista de classificadores

        for clf in classifiers:
            clf.fit(X, y) # Calculando os pesos com base na acurácia
        train_preds = [clf.predict(X) for clf in classifiers]
        accuracies = [accuracy_score(y, preds) for preds in train_preds]

        self.clf_weights = np.array(accuracies) ** 4
        self.clf_weights /= np.sum(self.clf_weights) # Normalização dos pesos

    def predict_proba(self, X):
        classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]  # Lista de classificadores

        probs = [clf.predict_proba(X) for clf in classifiers]
        combined_probs = np.sum([prob * weight for prob, weight in zip(probs, self.clf_weights)], axis=0)

        return combined_probs / np.sum(combined_probs, axis=1, keepdims=True)

    def predict(self, X):
        proba = self.predict_proba(X)
        return np.argmax(proba, axis=1)"""

'class CombinedDecisionForest:\n    def __init__(self):\n        self.clf1 = RandomForestClassifier()\n        self.clf2 = ExtraTreesClassifier()\n        self.clf3 = SupervisedTimeSeriesForest()\n        self.clf4 = TimeSeriesForestClassifier()\n        self.clf_weights = None\n\n    def fit(self, X, y):\n        classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]  # Lista de classificadores\n\n        for clf in classifiers:\n            clf.fit(X, y) # Calculando os pesos com base na acurácia\n        train_preds = [clf.predict(X) for clf in classifiers]\n        accuracies = [accuracy_score(y, preds) for preds in train_preds]\n\n        self.clf_weights = np.array(accuracies) ** 4\n        self.clf_weights /= np.sum(self.clf_weights) # Normalização dos pesos\n\n    def predict_proba(self, X):\n        classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]  # Lista de classificadores\n\n        probs = [clf.predict_proba(X) for clf in classifiers]\n        combined_probs 

### Train/Predict

In [7]:
@jit
def train_with_meta_classifier(X_train, y_train, wavelet=None):
    total_time = 0
    start = timeit.default_timer()
    extracted_features_dict = transform_data_math(X_train, wavelet=wavelet)  # Get extracted features
    stop = timeit.default_timer()
    total_time += stop - start
    print('Extraction train data (seconds): ', total_time) # Get extracted features
    # Prepare data for the meta-classifier
    meta_features = np.hstack([extracted_features.values for extracted_features in extracted_features_dict.values()])
    # Train a meta-classifier
    meta_classifier = CombinedDecisionForest()
    meta_classifier.fit(meta_features, y_train)

    return meta_classifier


In [8]:
@jit
def predict_with_meta_classifier(X_test, trained_meta_classifier, wavelet=None):
    total_time = 0
    start = timeit.default_timer()
    extracted_features_dict = transform_data_math(X_test, wavelet=wavelet)  # Get extracted features
    stop = timeit.default_timer()
    total_time += stop - start
    print('Extraction test data (seconds): ', total_time)
    # Prepare data for the meta-classifier
    meta_features_test = np.hstack([extracted_features.values for extracted_features in extracted_features_dict.values()])
    # Train a meta-classifier
    predictions = trained_meta_classifier.predict(meta_features_test)

    return predictions


### Validando o modelo

In [9]:
dataset_quali_list = ['Adiac', 'Beef', 'Car', 'CBF', 'Coffee', 'DiatomSizeReduction', 'ECG200', 'ECGFiveDays', 'FaceFour','GunPoint', 'Lightning2', 'Lightning7', 'MedicalImages', 'MoteStrain', 'OliveOil', 'SonyAIBORobotSurface1','SonyAIBORobotSurface2', 'SyntheticControl', 'Trace', 'TwoPatterns']
dataset_full_list = ['Worms','FaceAll','SemgHandMovementCh2','Herring','GunPointAgeSpan','SmoothSubspace','SemgHandSubjectCh2','LargeKitchenAppliances','Plane','Fish','ScreenType','PhalangesOutlinesCorrect','CricketZ','MiddlePhalanxOutlineAgeGroup','ECG5000','Chinatown','ShapeletSim','MiddlePhalanxTW','Symbols','EOGHorizontalSignal','Ham','UMD','HouseTwenty','MiddlePhalanxOutlineCorrect','Wafer','Rock','DistalPhalanxTW','CricketY','FacesUCR','FiftyWords','Mallat','Strawberry','SwedishLeaf','ProximalPhalanxOutlineAgeGroup','MixedShapesRegularTrain','SmallKitchenAppliances','GunPointOldVersusYoung','Wine','ProximalPhalanxOutlineCorrect','WordSynonyms', 'RefrigerationDevices','Yoga','CinCECGTorso','ChlorineConcentration','ArrowHead','ToeSegmentation1','TwoLeadECG','ProximalPhalanxTW','InsectEPGSmallTrain','WormsTwoClass','PowerCons','InsectEPGRegularTrain','GunPointMaleVersusFemale','DistalPhalanxOutlineCorrect','ItalyPowerDemand','InsectWingbeatSound','BME','NonInvasiveFetalECGThorax2','CricketX','Haptics','EOGVerticalSignal','MixedShapesSmallTrain','Meat','SemgHandGenderCh2','ToeSegmentation2','NonInvasiveFetalECGThorax1','FreezerSmallTrain','OSULeaf','Earthquakes','BirdChicken','HandOutlines','BeetleFly','ACSF1','DistalPhalanxOutlineAgeGroup','FreezerRegularTrain']
problematicos = ['Crop','EthanolLevel','ElectricDevices','FordB','ShapesAll','StarLightCurves','Phoneme', 'Computers','InlineSkate','PigAirwayPressure', 'PigCVP','FordA','MedicalImages','PigArtPressure', 'UWaveGestureLibraryX','UWaveGestureLibraryY', 'UWaveGestureLibraryZ', 'UWaveGestureLibraryAll', 'TwoPatterns']

In [10]:
"""accuracy_data = []
for dataset_name in dataset_quali_list:
    # Carregue os dados de treinamento e teste
    features_train, features_test, target_train, target_test = load_data(dataset_name)
    best_wavelet = choose_wavelet(features_train)

    meta_classifier = train_with_meta_classifier(features_train, target_train, wavelet=best_wavelet)
    
    predictions = predict_with_meta_classifier(features_test, meta_classifier, wavelet=best_wavelet)
    
    test_accuracy_meta = np.mean(predictions == target_test)
        
    accuracy_data.append({'Dataset Name': dataset_name, 'Accuracy': test_accuracy_meta})
    
    print(f"Accuracy {dataset_name}: {test_accuracy_meta}")
    
accuracy_df = pd.DataFrame(accuracy_data)"""

'accuracy_data = []\nfor dataset_name in dataset_quali_list:\n    # Carregue os dados de treinamento e teste\n    features_train, features_test, target_train, target_test = load_data(dataset_name)\n    best_wavelet = choose_wavelet(features_train)\n\n    meta_classifier = train_with_meta_classifier(features_train, target_train, wavelet=best_wavelet)\n    \n    predictions = predict_with_meta_classifier(features_test, meta_classifier, wavelet=best_wavelet)\n    \n    test_accuracy_meta = np.mean(predictions == target_test)\n        \n    accuracy_data.append({\'Dataset Name\': dataset_name, \'Accuracy\': test_accuracy_meta})\n    \n    print(f"Accuracy {dataset_name}: {test_accuracy_meta}")\n    \naccuracy_df = pd.DataFrame(accuracy_data)'

In [11]:
#accuracy_df

In [12]:
#accuracy_df.to_csv('ResultsAmazonForest_corrigido.csv')

### DynamicAmazonClassifier

In [16]:
from sklearn.linear_model import RidgeClassifierCV

accuracy_data = []
for dataset_name in univariate_equal_length:
    # Carregue os dados de treinamento e teste
    features_train, features_test, target_train, target_test = load_data(dataset_name)
    best_wavelet = choose_wavelet(features_train)

    feature_extractor = CombinedDecisionForest()
    feature_extractor.fit(features_train, target_train)
    
    train_features = feature_extractor.predict_proba(features_train)
    test_features = feature_extractor.predict_proba(features_test)
    
    meta_model = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
    
    meta_model.fit(train_features, target_train)
    
    predictions = meta_model.predict(test_features)
    
    accuracy = accuracy_score(target_test, predictions)
        
    accuracy_data.append({'Dataset Name': dataset_name, 'Accuracy': accuracy})
    
    print(f"Accuracy {dataset_name}: {accuracy}")
    
accuracy_df = pd.DataFrame(accuracy_data)

Accuracy UMD: 0.9861111111111112
Accuracy ToeSegmentation2: 0.8846153846153846
Accuracy FordB: 0.7876543209876543
Accuracy ProximalPhalanxTW: 0.8146341463414634
Accuracy Plane: 1.0
Accuracy Rock: 0.8
Accuracy PigArtPressure: 0.4326923076923077
Accuracy Wine: 0.7222222222222222
Accuracy EOGVerticalSignal: 0.5138121546961326
Accuracy FordA: 0.9416666666666667
Accuracy SwedishLeaf: 0.9392
Accuracy Ham: 0.6952380952380952
Accuracy FiftyWords: 0.756043956043956
Accuracy EOGHorizontalSignal: 0.5303867403314917
Accuracy GunPointAgeSpan: 0.990506329113924
Accuracy Meat: 0.9333333333333333
Accuracy NonInvasiveFetalECGThorax2: 0.9297709923664123
Accuracy Computers: 0.764
Accuracy PowerCons: 1.0
Accuracy SemgHandGenderCh2: 0.9666666666666667
Accuracy FaceFour: 0.9772727272727273
Accuracy StarLightCurves: 0.9734094220495386
Accuracy Worms: 0.6493506493506493
Accuracy FreezerRegularTrain: 0.9898245614035087
Accuracy GunPoint: 0.96
Accuracy ChlorineConcentration: 0.74140625
Accuracy ItalyPowerDemand

In [24]:
accuracy_df

Unnamed: 0,Dataset Name,Accuracy
0,UMD,0.986111
1,ToeSegmentation2,0.884615
2,FordB,0.787654
3,ProximalPhalanxTW,0.814634
4,Plane,1.000000
...,...,...
107,WormsTwoClass,0.714286
108,CricketZ,0.738462
109,Phoneme,0.268987
110,Adiac,0.762148


In [19]:
accuracy_df.to_csv('ModelAM_Full_Test.csv')