### Bibliotecas

In [None]:
"""
%pip install aeon
%pip install tsfresh
%pip install tslearn
%pip install tensorflow
%pip install keras
%pip install pywavelets
"""

In [2]:
import pandas as pd
import numpy as np

from aeon.datasets import load_classification
from aeon.datasets.tsc_data_lists import univariate_equal_length
from aeon.classification.interval_based import SupervisedTimeSeriesForest, TimeSeriesForestClassifier

from tsfresh import extract_features, select_features
from tsfresh.feature_extraction import MinimalFCParameters

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import PiecewiseAggregateApproximation, SymbolicAggregateApproximation

import pywt
from sklearn import pipeline
from sklearn.linear_model import RidgeClassifierCV
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from scipy.fftpack import fft
from numba import jit
import timeit
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

### Loading data

In [3]:
def load_data(dataset):
    # LabelEncoder para labels alvo
    le = LabelEncoder()

    # Carregar conjunto de dados do repositório UCR
    X_train, y_train = load_classification(dataset, split="TRAIN")
    X_test, y_test = load_classification(dataset, split="test")

    # Formatar o conjunto de dados para 2D
    features_train = X_train.reshape(X_train.shape[0], -1)
    features_test = X_test.reshape(X_test.shape[0], -1)

    # Ajustar e transformar as labels alvo
    target_train = le.fit_transform(y_train)
    target_test = le.transform(y_test)

    return features_train, features_test, target_train, target_test


### Function transform data

In [4]:
def choose_wavelet(X):
    min_variance = float('inf')
    best_wavelet = None
    candidate_wavelets = ['db1', 'db2', 'db3', 'db4', 'db5', 'db6', 'db7', 'db8', 'db9']

    for wavelet_type in candidate_wavelets:
        _, coeffs_cD = pywt.dwt(X, wavelet_type, axis=1)
        total_variance = np.var(coeffs_cD)

        if total_variance < min_variance:
            min_variance = total_variance
            best_wavelet = wavelet_type
    return str(best_wavelet)


In [5]:
def transform_data_math(X, wavelet):
    n_sax_symbols = int(X.shape[1] / 4)
    n_paa_segments = int(X.shape[1] / 4)

    # FFT Transformation
    X_fft = np.abs(fft(X, axis=1))
    
    # DWT Transformation
    coeffs_cA, coeffs_cD = pywt.dwt(X, wavelet=wavelet, axis=1, mode='constant')
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    # PAA Transformation
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)
    df_PAA = pd.DataFrame(X_paa)
    
    # SAX Transformation
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)
    df_SAX = pd.DataFrame(X_sax)

    # Original Data
    data_X = TimeSeriesScalerMeanVariance().fit_transform(X)
    data_X.resize(data_X.shape[0], data_X.shape[1])
    df_X = pd.DataFrame(data_X)

    # FFT Data
    data_FFT = TimeSeriesScalerMeanVariance().fit_transform(X_fft)
    data_FFT.resize(data_FFT.shape[0], data_FFT.shape[1])
    df_FFT = pd.DataFrame(data_FFT)

    # DWT Data
    data_DWT = TimeSeriesScalerMeanVariance().fit_transform(X_dwt)
    data_DWT.resize(data_DWT.shape[0], data_DWT.shape[1])
    df_DWT = pd.DataFrame(data_DWT)

    # Adding IDs to DataFrames
    df_X['id'] = df_FFT['id'] = df_DWT['id'] = df_PAA['id'] = df_SAX['id'] = range(len(df_X))
    
    # Merging all DataFrames on 'id'
    final_df = df_X.merge(df_FFT, on='id', suffixes=('_X', '_FFT'))
    final_df = final_df.merge(df_DWT, on='id', suffixes=('', '_DWT'))
    final_df = final_df.merge(df_PAA, on='id', suffixes=('', '_PAA'))
    final_df = final_df.merge(df_SAX, on='id', suffixes=('', '_SAX'))
    
    
    return final_df

### AmazonForestClassifier

In [6]:
class CombinedDecisionForest:
    def __init__(self):
        self.clf1 = RandomForestClassifier()
        self.clf2 = ExtraTreesClassifier()
        self.clf3 = SupervisedTimeSeriesForest()
        self.clf4 = TimeSeriesForestClassifier()
        self.meta_clf = RidgeClassifierCV(alphas=np.logspace(-3,3,10))
        self.classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]
        self.clf_weights = None

    def fit(self, X, y):
        for clf in self.classifiers:
            clf.fit(X, y)

        train_preds = [clf.predict(X) for clf in self.classifiers]
        accuracies = [accuracy_score(y, preds) for preds in train_preds]

        self.clf_weights = np.array(accuracies)
        self.clf_weights /= np.sum(self.clf_weights)

    def predict_proba(self, X):
        probs = [clf.predict_proba(X) for clf in self.classifiers]
        combined_probs = np.sum([prob * weight for prob, weight in zip(probs, self.clf_weights)], axis=0)
        return combined_probs / np.sum(combined_probs, axis=1, keepdims=True)

    def predict(self, X):
        proba = self.predict_proba(X)
        return np.argmax(proba, axis=1)

### Train/Predict

In [7]:
def train_classifier(X_train, y_train, wavelet=None):
    trained_models = {}  # Salvar modelos treinados para cada transformação
    X_train_transformed = transform_data_math(X_train, wavelet=wavelet)  # Transformar todo o conjunto de treino
    # Treinar um modelo para cada transformação e salvar no dicionário
    for rep, X_trans in X_train_transformed.items():
        model = CombinedDecisionForest()
        model.fit(X_trans, y_train)
        trained_models[rep] = model
        
    # Preparar dados para o meta-classificador
    meta_features = []
    for i in range(X_train.shape[0]):
        instance_features = []
        for rep, model in trained_models.items():
            proba = model.predict_proba(np.array(X_train_transformed[rep].iloc[i,:]).reshape(1, -1))
            instance_features.extend(proba.flatten())
        meta_features.append(instance_features)
    
    meta_features = np.array(meta_features)
    
    meta_classifier = RidgeClassifierCV(np.logspace(-3,3,10))
    meta_classifier.fit(meta_features, y_train)
    
    return trained_models, meta_classifier

def predict_classifier(X_test, trained_base_model, trained_meta_classifier, wavelet=None):
    predictions = []
    meta_features_test = []

    for i in tqdm(range(len(X_test)), ascii=True, colour='green', desc="Testing"):
        x_instance = X_test[i].reshape(1, -1)
        x_transformed = transform_data_math(x_instance, wavelet=wavelet)

        instance_features = []
        for rep, X_trans in x_transformed.items():
            proba = trained_base_model[rep].predict_proba(X_trans.values.reshape(1, -1))
            instance_features.extend(proba.flatten())

        meta_feature = np.array(instance_features).reshape(1, -1)
        predictions.append(trained_meta_classifier.predict(meta_feature)[0])

        meta_features_test.append(meta_feature.flatten())

    meta_features_test = np.array(meta_features_test)

    return predictions


# AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA

In [43]:
train_X, teste_X, y_train, y_test = load_data("Beef")

In [44]:
from tsfresh import extract_features

def extract_tsfresh_features(df):
    extracted_features = extract_features(df, default_fc_parameters=MinimalFCParameters(), column_id='id')
    
    return extracted_features

In [45]:
#Training
best_wl = choose_wavelet(train_X)
features_train = transform_data_math(train_X, best_wl)
meta_X = extract_tsfresh_features(features_train)

Feature Extraction: 100%|██████████| 30/30 [00:13<00:00,  2.18it/s]


In [46]:
#Testing
features_test = transform_data_math(teste_X, best_wl)
meta_ts = extract_tsfresh_features(features_test)

Feature Extraction: 100%|██████████| 30/30 [00:12<00:00,  2.50it/s]


In [68]:
meta_X.columns.tolist()

['0_X__sum_values',
 '0_X__median',
 '0_X__mean',
 '0_X__length',
 '0_X__standard_deviation',
 '0_X__variance',
 '0_X__root_mean_square',
 '0_X__maximum',
 '0_X__absolute_maximum',
 '0_X__minimum',
 '1_X__sum_values',
 '1_X__median',
 '1_X__mean',
 '1_X__length',
 '1_X__standard_deviation',
 '1_X__variance',
 '1_X__root_mean_square',
 '1_X__maximum',
 '1_X__absolute_maximum',
 '1_X__minimum',
 '2_X__sum_values',
 '2_X__median',
 '2_X__mean',
 '2_X__length',
 '2_X__standard_deviation',
 '2_X__variance',
 '2_X__root_mean_square',
 '2_X__maximum',
 '2_X__absolute_maximum',
 '2_X__minimum',
 '3_X__sum_values',
 '3_X__median',
 '3_X__mean',
 '3_X__length',
 '3_X__standard_deviation',
 '3_X__variance',
 '3_X__root_mean_square',
 '3_X__maximum',
 '3_X__absolute_maximum',
 '3_X__minimum',
 '4_X__sum_values',
 '4_X__median',
 '4_X__mean',
 '4_X__length',
 '4_X__standard_deviation',
 '4_X__variance',
 '4_X__root_mean_square',
 '4_X__maximum',
 '4_X__absolute_maximum',
 '4_X__minimum',
 '5_X__sum

In [47]:
#Original
clf = CombinedDecisionForest()
clf.fit(meta_X, y_train)
y_hat = clf.predict(meta_ts)
accuracy2 = accuracy_score(y_test, y_hat)
print(f"Accuracy: {accuracy2}")

KeyboardInterrupt: 

### Meta-Classificador

In [37]:
from sklearn.metrics import accuracy_score

class CombinedMetaClassifier:
    def __init__(self):
        self.clf1 = RandomForestClassifier()
        self.clf2 = ExtraTreesClassifier()
        self.clf3 = SupervisedTimeSeriesForest()
        self.clf4 = TimeSeriesForestClassifier()
        self.meta_clf = RandomForestClassifier()
        self.classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]

    def fit(self, X, y):
        # Treinar classificadores base
        base_probabilities = []
        for clf in self.classifiers:
            clf.fit(X, y)
            if hasattr(clf, "predict_proba"):
                probs = clf.predict_proba(X)
            else:
                # Converter previsões para probabilidades (caso o classificador não suporte predict_proba)
                preds = clf.predict(X)
                probs = np.zeros((preds.size, len(np.unique(y))))
                probs[np.arange(preds.size), preds] = 1
            base_probabilities.append(probs)
        
        # Stack probabilities para criar meta features
        meta_features = np.hstack(base_probabilities)
        
        # Treinar meta-classificador
        self.meta_clf.fit(meta_features, y)

    def predict(self, X):
        # Obter previsões probabilísticas dos classificadores base
        base_probabilities = []
        for clf in self.classifiers:
            if hasattr(clf, "predict_proba"):
                probs = clf.predict_proba(X)
            else:
                # Converter previsões para probabilidades (caso o classificador não suporte predict_proba)
                preds = clf.predict(X)
                probs = np.zeros((preds.size, clf.n_classes_))
                probs[np.arange(preds.size), preds] = 1
            base_probabilities.append(probs)
        
        # Stack probabilities para criar meta features
        meta_features = np.hstack(base_probabilities)
        
        # Previsão final usando o meta-classificador
        return self.meta_clf.predict(meta_features)
    
    def predict_proba(self, X):
        # Obter previsões probabilísticas dos classificadores base
        base_probabilities = []
        for clf in self.classifiers:
            if hasattr(clf, "predict_proba"):
                probs = clf.predict_proba(X)
            else:
                # Converter previsões para probabilidades (caso o classificador não suporte predict_proba)
                preds = clf.predict(X)
                probs = np.zeros((preds.size, clf.n_classes_))
                probs[np.arange(preds.size), preds] = 1
            base_probabilities.append(probs)
        
        # Stack probabilities para criar meta features
        meta_features = np.hstack(base_probabilities)
        
        # Previsão probabilística final usando o meta-classificador
        return self.meta_clf.predict_proba(meta_features)


In [38]:
model_classifier = CombinedMetaClassifier()
model_classifier.fit(meta_X, y_train)
y_hat = model_classifier.predict(meta_ts)
accuracy = accuracy_score(y_test, y_hat)
print(f"Accuracy: {accuracy}")

Accuracy: 0.6956521739130435


### Meta-Learning

In [39]:
from sklearn.model_selection import cross_val_predict

class CombinedMetaLearning:
    def __init__(self):
        self.clf1 = RandomForestClassifier()
        self.clf2 = ExtraTreesClassifier()
        self.clf3 = SupervisedTimeSeriesForest()
        self.clf4 = TimeSeriesForestClassifier()
        self.meta_clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
        self.classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]
        self.clf_weights = None

    def fit(self, X, y):
        # Treinar classificadores base
        base_predictions = []
        accuracies = []
        for clf in self.classifiers:
            clf.fit(X, y)
            preds = cross_val_predict(clf, X, y, cv=3, method='predict')
            base_predictions.append(preds.reshape(-1, 1))
            accuracies.append(accuracy_score(y, preds))
        
        # Stack predictions para criar meta features
        meta_features = np.hstack(base_predictions)
        
        # Calcular pesos com base na acurácia
        self.clf_weights = np.array(accuracies)
        self.clf_weights /= np.sum(self.clf_weights)
        
        # Treinar meta-classificador
        self.meta_clf.fit(meta_features, y)

    def predict(self, X):
        # Obter previsões dos classificadores base
        base_predictions = []
        for clf in self.classifiers:
            preds = clf.predict(X)
            base_predictions.append(preds.reshape(-1, 1))
        
        # Stack predictions para criar meta features
        meta_features = np.hstack(base_predictions)
        
        # Previsão final usando o meta-classificador
        return self.meta_clf.predict(meta_features)
    
    def predict_proba(self, X):
        # Obter previsões probabilísticas dos classificadores base
        base_probabilities = []
        for clf in self.classifiers:
            if hasattr(clf, "predict_proba"):
                probs = clf.predict_proba(X)
                base_probabilities.append(probs)
            else:
                preds = clf.predict(X)
                probs = np.zeros((preds.size, len(np.unique(preds))))
                probs[np.arange(preds.size), preds] = 1
                base_probabilities.append(probs)
        
        # Média ponderada das probabilidades dos classificadores base
        combined_probs = np.sum([prob * weight for prob, weight in zip(base_probabilities, self.clf_weights)], axis=0)
        
        return combined_probs

    def predict_with_argmax(self, X):
        combined_probs = self.predict_proba(X)
        return np.argmax(combined_probs, axis=1)

In [None]:
model_learning = CombinedMetaLearning()
model_learning.fit(meta_X, y_train)
y_hat = model_learning.predict(meta_ts)
accuracy = accuracy_score(y_test, y_hat)
print(f"Accuracy: {accuracy}")

In [66]:
class CombinedDecisionForest:
    def __init__(self):
        self.clf1 = RandomForestClassifier()
        self.clf2 = ExtraTreesClassifier()
        self.clf3 = SupervisedTimeSeriesForest()
        self.clf4 = TimeSeriesForestClassifier()
        self.base_classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]
        self.meta_clf = TimeSeriesForestClassifier()

    def fit(self, X, y):
        # Fit base classifiers
        for clf in self.base_classifiers:
            clf.fit(X, y)
        
        # Generate predictions for the training set
        meta_features = np.column_stack([cross_val_predict(clf, X, y, method='predict_proba') for clf in self.base_classifiers])
        
        # Fit meta-classifier
        self.meta_clf.fit(meta_features, y)
    
    def predict_proba(self, X):
        # Generate predictions for the test set
        meta_features = np.column_stack([clf.predict_proba(X) for clf in self.base_classifiers])
        
        # Predict probabilities using the meta-classifier
        return self.meta_clf.predict_proba(meta_features)
    
    def predict(self, X):
        proba = self.predict_proba(X)
        return np.argmax(proba, axis=1)

In [67]:
# Exemplo de uso:
model_learning = CombinedDecisionForest()
model_learning.fit(meta_X, y_train)
y_hat = model_learning.predict(meta_ts)
accuracy = accuracy_score(y_test, y_hat)
print(f"Accuracy: {accuracy}")

Accuracy: 0.6666666666666666


In [52]:
def visualize_ensemble_prediction(X, model):
    # Generate meta-features for visualization
    meta_features = np.column_stack([clf.predict_proba(X) for clf in model.base_classifiers])
    meta_predictions = model.meta_clf.predict_proba(meta_features)
    
    # Criar um DataFrame para visualização
    df_meta = pd.DataFrame(meta_features, columns=[f'Prob Class {i+1} from Clf{j+1}' 
                                                   for j in range(len(model.base_classifiers)) 
                                                   for i in range(meta_features.shape[1] // len(model.base_classifiers))])
    df_meta['Meta Prediction Prob Class 1'] = meta_predictions[:, 0]
    df_meta['Meta Prediction Prob Class 2'] = meta_predictions[:, 1]
    df_meta['Meta Prediction'] = np.argmax(meta_predictions, axis=1) + 1  # Adicionar 1 para coincidir com a classe 1-indexada
    
    display(df_meta)

In [53]:
# Exemplo de uso:
visualize_ensemble_prediction(meta_ts, model_learning)

Unnamed: 0,Prob Class 1 from Clf1,Prob Class 2 from Clf1,Prob Class 3 from Clf1,Prob Class 4 from Clf1,Prob Class 5 from Clf1,Prob Class 1 from Clf2,Prob Class 2 from Clf2,Prob Class 3 from Clf2,Prob Class 4 from Clf2,Prob Class 5 from Clf2,...,Prob Class 4 from Clf3,Prob Class 5 from Clf3,Prob Class 1 from Clf4,Prob Class 2 from Clf4,Prob Class 3 from Clf4,Prob Class 4 from Clf4,Prob Class 5 from Clf4,Meta Prediction Prob Class 1,Meta Prediction Prob Class 2,Meta Prediction
0,0.01,0.16,0.15,0.59,0.09,0.0,0.16,0.25,0.51,0.08,...,0.77,0.02,0.01,0.015,0.165,0.795,0.015,0.03,0.03,4
1,0.72,0.04,0.06,0.12,0.06,0.9,0.03,0.02,0.03,0.02,...,0.105,0.015,0.97,0.0,0.015,0.015,0.0,0.94,0.0,1
2,0.82,0.02,0.0,0.13,0.03,0.99,0.0,0.01,0.0,0.0,...,0.005,0.0,0.995,0.0,0.0,0.005,0.0,0.95,0.0,1
3,0.7,0.01,0.0,0.27,0.02,0.97,0.0,0.0,0.03,0.0,...,0.01,0.005,0.955,0.005,0.005,0.035,0.0,0.96,0.0,1
4,0.78,0.04,0.04,0.1,0.04,0.95,0.01,0.02,0.0,0.02,...,0.06,0.02,1.0,0.0,0.0,0.0,0.0,0.93,0.0,1
5,0.55,0.09,0.17,0.0,0.19,0.65,0.05,0.17,0.0,0.13,...,0.025,0.125,0.525,0.1,0.155,0.015,0.205,0.88,0.04,1
6,0.02,0.45,0.2,0.01,0.32,0.01,0.61,0.13,0.0,0.25,...,0.0,0.205,0.005,0.62,0.23,0.005,0.14,0.02,0.48,2
7,0.0,0.57,0.15,0.01,0.27,0.0,0.6,0.1,0.07,0.23,...,0.0,0.305,0.01,0.52,0.12,0.015,0.335,0.01,0.53,2
8,0.0,0.41,0.26,0.03,0.3,0.01,0.49,0.2,0.01,0.29,...,0.01,0.34,0.01,0.385,0.135,0.005,0.465,0.0,0.46,5
9,0.45,0.24,0.17,0.01,0.13,0.4,0.32,0.12,0.02,0.14,...,0.095,0.2,0.48,0.285,0.02,0.07,0.145,0.64,0.18,1
