### Bibliotecas

In [5]:
"""
%pip install aeon
%pip install tsfresh
%pip install tslearn
%pip install tensorflow
%pip install keras
%pip install pywavelets"""


'\n%pip install aeon\n%pip install tsfresh\n%pip install tslearn\n%pip install tensorflow\n%pip install keras\n%pip install pywavelets'

In [83]:
import pandas as pd
import numpy as np

from aeon.datasets import load_classification
from aeon.datasets.tsc_data_lists import univariate_equal_length
from aeon.classification.interval_based import SupervisedTimeSeriesForest, TimeSeriesForestClassifier

from tsfresh import extract_features, select_features
from tsfresh.feature_extraction import MinimalFCParameters

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import PiecewiseAggregateApproximation, SymbolicAggregateApproximation

import pywt
from sklearn import pipeline
from sklearn.linear_model import RidgeClassifierCV
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from scipy.fftpack import fft
from numba import jit
import timeit
from tqdm import tqdm

import warnings
warnings.filterwarnings("ignore")

### Loading data

In [7]:
def load_data(dataset):
    # LabelEncoder para labels alvo
    le = LabelEncoder()

    # Carregar conjunto de dados do repositório UCR
    X_train, y_train = load_classification(dataset, split="TRAIN")
    X_test, y_test = load_classification(dataset, split="test")

    # Formatar o conjunto de dados para 2D
    features_train = X_train.reshape(X_train.shape[0], -1)
    features_test = X_test.reshape(X_test.shape[0], -1)

    # Ajustar e transformar as labels alvo
    target_train = le.fit_transform(y_train)
    target_test = le.transform(y_test)

    return features_train, features_test, target_train, target_test


### Function transform data

In [8]:
def choose_wavelet(X):
    min_variance = float('inf')
    best_wavelet = None
    candidate_wavelets = ['db1', 'db2', 'db3', 'db4', 'db5', 'db6', 'db7', 'db8', 'db9']

    for wavelet_type in candidate_wavelets:
        _, coeffs_cD = pywt.dwt(X, wavelet_type, axis=1)
        total_variance = np.var(coeffs_cD)

        if total_variance < min_variance:
            min_variance = total_variance
            best_wavelet = wavelet_type
    return str(best_wavelet)


In [36]:
def transform_data_math(X, wavelet):
    n_sax_symbols = int(X.shape[1] / 4)
    n_paa_segments = int(X.shape[1] / 4)

    # FFT Transformation
    X_fft = np.abs(fft(X, axis=1))
    
    # DWT Transformation
    coeffs_cA, coeffs_cD = pywt.dwt(X, wavelet=wavelet, axis=1, mode='constant')
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    # PAA Transformation
    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)
    df_PAA = pd.DataFrame(X_paa)
    
    # SAX Transformation
    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)
    df_SAX = pd.DataFrame(X_sax)

    # Original Data
    data_X = TimeSeriesScalerMeanVariance().fit_transform(X)
    data_X.resize(data_X.shape[0], data_X.shape[1])
    df_X = pd.DataFrame(data_X)

    # FFT Data
    data_FFT = TimeSeriesScalerMeanVariance().fit_transform(X_fft)
    data_FFT.resize(data_FFT.shape[0], data_FFT.shape[1])
    df_FFT = pd.DataFrame(data_FFT)

    # DWT Data
    data_DWT = TimeSeriesScalerMeanVariance().fit_transform(X_dwt)
    data_DWT.resize(data_DWT.shape[0], data_DWT.shape[1])
    df_DWT = pd.DataFrame(data_DWT)

    # Adding IDs to DataFrames
    df_X['id'] = df_FFT['id'] = df_DWT['id'] = df_PAA['id'] = df_SAX['id'] = range(len(df_X))
    
    # Merging all DataFrames on 'id'
    final_df = df_X.merge(df_FFT, on='id', suffixes=('_X', '_FFT'))
    final_df = final_df.merge(df_DWT, on='id', suffixes=('', '_DWT'))
    final_df = final_df.merge(df_PAA, on='id', suffixes=('', '_PAA'))
    final_df = final_df.merge(df_SAX, on='id', suffixes=('', '_SAX'))
    
    
    return final_df

### AmazonForestClassifier

In [9]:
class CombinedDecisionForest:
    def __init__(self):
        self.clf1 = RandomForestClassifier()
        self.clf2 = ExtraTreesClassifier()
        self.clf3 = SupervisedTimeSeriesForest()
        self.clf4 = TimeSeriesForestClassifier()
        self.meta_clf = RidgeClassifierCV(alphas=np.logspace(-3,3,10))
        self.classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]
        self.clf_weights = None

    def fit(self, X, y):
        for clf in self.classifiers:
            clf.fit(X, y)

        train_preds = [clf.predict(X) for clf in self.classifiers]
        accuracies = [accuracy_score(y, preds) for preds in train_preds]

        self.clf_weights = np.array(accuracies)
        self.clf_weights /= np.sum(self.clf_weights)

    def predict_proba(self, X):
        probs = [clf.predict_proba(X) for clf in self.classifiers]
        combined_probs = np.sum([prob * weight for prob, weight in zip(probs, self.clf_weights)], axis=0)
        return combined_probs / np.sum(combined_probs, axis=1, keepdims=True)

    def predict(self, X):
        proba = self.predict_proba(X)
        return np.argmax(proba, axis=1)

### Train/Predict

In [42]:
def train_classifier(X_train, y_train, wavelet=None):
    trained_models = {}  # Salvar modelos treinados para cada transformação
    X_train_transformed = transform_data_math(X_train, wavelet=wavelet)  # Transformar todo o conjunto de treino
    # Treinar um modelo para cada transformação e salvar no dicionário
    for rep, X_trans in X_train_transformed.items():
        model = CombinedDecisionForest()
        model.fit(X_trans, y_train)
        trained_models[rep] = model
        
    # Preparar dados para o meta-classificador
    meta_features = []
    for i in range(X_train.shape[0]):
        instance_features = []
        for rep, model in trained_models.items():
            proba = model.predict_proba(np.array(X_train_transformed[rep].iloc[i,:]).reshape(1, -1))
            instance_features.extend(proba.flatten())
        meta_features.append(instance_features)
    
    meta_features = np.array(meta_features)
    
    meta_classifier = RidgeClassifierCV(np.logspace(-3,3,10))
    meta_classifier.fit(meta_features, y_train)
    
    return trained_models, meta_classifier

def predict_classifier(X_test, trained_base_model, trained_meta_classifier, wavelet=None):
    predictions = []
    meta_features_test = []

    for i in tqdm(range(len(X_test)), ascii=True, colour='green', desc="Testing"):
        x_instance = X_test[i].reshape(1, -1)
        x_transformed = transform_data_math(x_instance, wavelet=wavelet)

        instance_features = []
        for rep, X_trans in x_transformed.items():
            proba = trained_base_model[rep].predict_proba(X_trans.values.reshape(1, -1))
            instance_features.extend(proba.flatten())

        meta_feature = np.array(instance_features).reshape(1, -1)
        predictions.append(trained_meta_classifier.predict(meta_feature)[0])

        meta_features_test.append(meta_feature.flatten())

    meta_features_test = np.array(meta_features_test)

    return predictions


# AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA

In [118]:
train_X, teste_X, y_train, y_test = load_data("Beef")

In [119]:
from tsfresh import extract_features

def extract_tsfresh_features(df):
    extracted_features = extract_features(df, default_fc_parameters=MinimalFCParameters(), column_id='id')
    
    return extracted_features

In [120]:
#Training
best_wl = choose_wavelet(train_X)
features_train = transform_data_math(train_X, best_wl)
meta_X = extract_tsfresh_features(features_train)

Feature Extraction: 100%|██████████| 30/30 [00:33<00:00,  1.12s/it]


In [121]:
#Testing
features_test = transform_data_math(teste_X, best_wl)
meta_ts = extract_tsfresh_features(features_test)

Feature Extraction: 100%|██████████| 30/30 [00:08<00:00,  3.58it/s]


In [122]:
#Original
clf = CombinedDecisionForest()
clf.fit(meta_X, y_train)
y_hat = clf.predict(meta_ts)
accuracy2 = accuracy_score(y_test, y_hat)
print(f"Accuracy: {accuracy2}")

Accuracy: 0.9


### Meta-Classificador

In [146]:
class CombinedMetaClassifier:
    def __init__(self):
        self.clf1 = RandomForestClassifier()
        self.clf2 = ExtraTreesClassifier()
        self.clf3 = SupervisedTimeSeriesForest()
        self.clf4 = TimeSeriesForestClassifier()
        self.meta_clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
        self.classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]

    def fit(self, X, y):
        # Treinar classificadores base
        base_predictions = []
        for clf in self.classifiers:
            clf.fit(X, y)
            preds = clf.predict(X)
            base_predictions.append(preds.reshape(-1, 1))
        
        # Stack predictions para criar meta features
        meta_features = np.hstack(base_predictions)
        
        # Treinar meta-classificador
        self.meta_clf.fit(meta_features, y)

    def predict(self, X):
        # Obter previsões dos classificadores base
        base_predictions = []
        for clf in self.classifiers:
            preds = clf.predict(X)
            base_predictions.append(preds.reshape(-1, 1))
        
        # Stack predictions para criar meta features
        meta_features = np.hstack(base_predictions)
        
        # Previsão final usando o meta-classificador
        return self.meta_clf.predict(meta_features)
    
    def predict_proba(self, X):
        # Obter previsões probabilísticas dos classificadores base
        base_probabilities = []
        for clf in self.classifiers:
            if hasattr(clf, "predict_proba"):
                probs = clf.predict_proba(X)
                base_probabilities.append(probs)
            else:
                # Converter previsões para probabilidades (caso o classificador não suporte predict_proba)
                preds = clf.predict(X)
                probs = np.zeros((preds.size, len(np.unique(preds))))
                probs[np.arange(preds.size), preds] = 1
                base_probabilities.append(probs)
        
        return np.argmax(base_probabilities, axis=1)

In [None]:
model_classifier = CombinedMetaClassifier()
model_classifier.fit(meta_X, y_train)
y_hat = model_classifier.predict_proba(meta_ts)
accuracy = accuracy_score(y_test, y_hat)
print(f"Accuracy: {accuracy}")

### Meta-Learning

In [149]:
class CombinedMetaLearning:
    def __init__(self):
        self.clf1 = RandomForestClassifier()
        self.clf2 = ExtraTreesClassifier()
        self.clf3 = SupervisedTimeSeriesForest()
        self.clf4 = TimeSeriesForestClassifier()
        self.meta_clf = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
        self.classifiers = [self.clf1, self.clf2, self.clf3, self.clf4]
        self.clf_weights = None

    def fit(self, X, y):
        # Treinar classificadores base
        base_predictions = []
        accuracies = []
        for clf in self.classifiers:
            clf.fit(X, y)
            preds = cross_val_predict(clf, X, y, cv=5, method='predict')
            base_predictions.append(preds.reshape(-1, 1))
            accuracies.append(accuracy_score(y, preds))
        
        # Stack predictions para criar meta features
        meta_features = np.hstack(base_predictions)
        
        # Calcular pesos com base na acurácia
        self.clf_weights = np.array(accuracies)
        self.clf_weights /= np.sum(self.clf_weights)
        
        # Treinar meta-classificador
        self.meta_clf.fit(meta_features, y)

    def predict(self, X):
        # Obter previsões dos classificadores base
        base_predictions = []
        for clf in self.classifiers:
            preds = clf.predict(X)
            base_predictions.append(preds.reshape(-1, 1))
        
        # Stack predictions para criar meta features
        meta_features = np.hstack(base_predictions)
        
        # Previsão final usando o meta-classificador
        return self.meta_clf.predict(meta_features)
    
    def predict_proba(self, X):
        # Obter previsões probabilísticas dos classificadores base
        base_probabilities = []
        for clf in self.classifiers:
            if hasattr(clf, "predict_proba"):
                probs = clf.predict_proba(X)
                base_probabilities.append(probs)
            else:
                preds = clf.predict(X)
                probs = np.zeros((preds.size, len(np.unique(preds))))
                probs[np.arange(preds.size), preds] = 1
                base_probabilities.append(probs)
        
        # Média ponderada das probabilidades dos classificadores base
        combined_probs = np.sum([prob * weight for prob, weight in zip(base_probabilities, self.clf_weights)], axis=0)
        
        return combined_probs

    def predict_with_argmax(self, X):
        combined_probs = self.predict_proba(X)
        return np.argmax(combined_probs, axis=1)

In [150]:
model_learning = CombinedMetaLearning()
model_learning.fit(meta_X, y_train)
y_hat = model_learning.predict_proba(meta_ts)
accuracy = accuracy_score(y_test, y_hat)
print(f"Accuracy: {accuracy}")

ValueError: Classification metrics can't handle a mix of multiclass and continuous-multioutput targets