In [None]:
!pip install aeon
!pip install sktime
!pip install tsfresh
!pip install tslearn
!pip install PyWavelets

### To Do list


*   Comparar os resultados do 1NN contra o SVM+RF
*   Comparar os resultados dos classificadores Feature Based com o SVM+RF
*   Comparar os resultados do MetaClf_Conc contra o MetaClf_Dict



In [2]:
import pandas as pd
import numpy as np

import aeon
from aeon.datasets import load_classification
from aeon.datasets.tsc_data_lists import univariate_equal_length
from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier
#from aeon.classification.convolution_based import RocketClassifier

import tsfresh
from tsfresh import extract_features, select_features
from tsfresh.feature_extraction import MinimalFCParameters

import tslearn
from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import PiecewiseAggregateApproximation, SymbolicAggregateApproximation

import os
import math
import pywt

from sklearn.linear_model import RidgeClassifierCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import LeaveOneOut, train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from scipy.fftpack import fft
from scipy.stats import norm

from tqdm import tqdm
import timeit
from datetime import timedelta

import warnings
warnings.filterwarnings("ignore")

In [4]:
# Transform data using TimeSeriesScalerMeanVariance and concatenate all transformed data
def transform_data(X, num_features=10):
    n_sax_symbols = int(X.shape[1] / num_features)
    n_paa_segments = int(X.shape[1] / num_features)

    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, 'db1', axis=1)
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)

    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)

    data = np.concatenate((X, X_fft, X_dwt, X_paa, X_sax), axis=1)
    data_concat = TimeSeriesScalerMeanVariance().fit_transform(data)
    data_concat.resize(data.shape[0], data.shape[1])

    return data_concat

In [138]:
def select_model(option, random_state):
    if option == '1nn':
        return KNeighborsTimeSeriesClassifier(distance='euclidean', n_neighbors=1)
    elif option == '5nn':
        return KNeighborsTimeSeriesClassifier(distance='dtw', n_neighbors=3)
    elif option == 'svm':
        return SVC(C=100, gamma=0.01, kernel='linear', probability=True)
    elif option == 'rd':
        return RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
    else:
        return RandomForestClassifier(n_estimators=200, n_jobs = -1, random_state=random_state)

### Agrupamento de todas as probabilidades

In [139]:
from sklearn.model_selection import cross_val_predict
def train_with_meta_classifier(X_train, y_train, base_options=['3nn'], meta_option='random_forest', random_state=42):
    X_train_transformed = transform_data(X_train)

    loo = LeaveOneOut()
    loo.get_n_splits(X_train_transformed)

    meta_features = []
    base_predictions = []
    for train_index, test_index in tqdm(loo.split(X_train_transformed), ascii=True, desc="Training Instances"):
        X_train_fold, X_test_fold = X_train_transformed[train_index], X_train_transformed[test_index]
        y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]

        base_models = []
        for option in base_options:
            model = select_model(option, random_state)
            model.fit(X_train_fold, y_train_fold)
            base_models.append(model)

        base_preds_fold = []
        for model in base_models:
            # Predict probabilities for each class using cross-validation
            proba = model.predict_proba(X_test_fold)
            base_preds_fold.append(proba)

        # Stack the predicted probabilities along the columns to form meta-features
        base_preds_fold = np.column_stack(base_preds_fold)
        meta_features.append(base_preds_fold)

        # Predict classes using the ensemble of base models
        ensemble_proba = np.mean(base_preds_fold, axis=1)
        ensemble_class = np.argmax(ensemble_proba)
        base_predictions.append(ensemble_class)

    # Train meta-classifier using meta-features and base predictions
    meta_features = np.concatenate(meta_features, axis=0)
    base_predictions = np.array(base_predictions)
    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(meta_features, base_predictions)

    return meta_classifier



### Agrupamento de todas as probabilidades com redução de dimensionalidade usando PCA

In [74]:
from sklearn.decomposition import PCA


def train_with_meta_classifier(X_train, y_train, base_options='1nn', meta_option='random_forest', random_state=42, n_components=3):
    X_train_transformed = transform_data(X_train)
    loo = LeaveOneOut()
    loo.get_n_splits(X_train_transformed)

    meta_features = []
    for train_index, test_index in tqdm(loo.split(X_train_transformed), ascii=True, desc="Training Instances"):
        X_train_fold, X_test_fold = X_train_transformed[train_index], X_train_transformed[test_index]
        y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]

        base_models = []
        base_models_scores = []
        for option in base_options:
            model = select_model(option, random_state)
            model.fit(X_train_fold, y_train_fold)
            base_models.append(model)
            score = model.score(X_test_fold, y_test_fold)
            base_models_scores.append(score)

        best_model_index = np.argmax(base_models_scores)
        best_model = base_models[best_model_index]
        proba = best_model.predict_proba(X_test_fold.reshape(1, -1))[0]  # Get predicted probabilities for the best model

        meta_features.append(proba)

    meta_features = np.array(meta_features)

    # Redução de dimensionalidade com PCA
    pca = PCA(n_components=n_components, random_state=random_state)
    meta_features_pca = pca.fit_transform(meta_features)

    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(meta_features_pca, y_train)

    return base_models, meta_classifier


### Seleção das melhores caracteristicas

In [99]:
def train_with_meta_classifier(X_train, y_train, base_options=['1nn', '3nn', 'knn', 'svm', 'rf'], meta_option='random_forest', random_state=42, n_features_per_model=10):
    X_train_transformed = transform_data(X_train)

    loo = LeaveOneOut()
    loo.get_n_splits(X_train_transformed)

    meta_features = []
    for train_index, test_index in tqdm(loo.split(X_train_transformed), ascii=True, desc="Training Instances"):
        X_train_fold, X_test_fold = X_train_transformed[train_index], X_train_transformed[test_index]
        y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]

        base_models = []
        base_models_scores = []
        base_models_features = []  # Lista para armazenar as características selecionadas de cada modelo

        for option in base_options:
            model = select_model(option, random_state)
            model.fit(X_train_fold, y_train_fold)
            base_models.append(model)

            # Calcular a importância das características (se aplicável)
            if hasattr(model, 'feature_importances_'):
                feature_importances = model.feature_importances_
                # Selecionar as características mais importantes
                top_features_indices = np.argsort(feature_importances)[::-1][:n_features_per_model]
                base_models_features.append(top_features_indices)
            else:
                # Se o modelo não tiver atributo de importância das características, selecionar aleatoriamente
                n_features = X_train_fold.shape[1]
                top_features_indices = np.random.choice(n_features, n_features_per_model, replace=False)
                base_models_features.append(top_features_indices)

            score = model.score(X_test_fold, y_test_fold)
            base_models_scores.append(score)

        best_model_index = np.argmax(base_models_scores)
        best_model_features = base_models_features[best_model_index]

        # Combinar as características selecionadas de todos os modelos
        meta_features_instance = np.concatenate(base_models_features, axis=1)
        meta_features.append(meta_features_instance)

        meta_features.append(meta_features_instance)

    meta_features = np.array(meta_features)

    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(meta_features, y_train)

    return base_models, meta_classifier


### Teste

In [140]:
def predict_with_meta_classifier(X_test, trained_base_models, trained_meta_classifier):
    predictions = []
    meta_features_test = []  # Inicialize uma lista para armazenar todos os meta-recursos dos dados de teste

    for i in tqdm(range(len(X_test)), ascii=True, desc="Testing Instances"):
        x_instance = X_test[i].reshape(1, -1)
        x_transformed = transform_data(x_instance)

        instance_features = []
        for rep, model in trained_base_models.items():
            proba = model.predict_proba(x_transformed[rep][0].reshape(1, -1))
            instance_features.extend(proba.flatten())

        meta_feature = np.array(instance_features).reshape(1, -1)
        predictions.append(trained_meta_classifier.predict(meta_feature)[0])

        meta_features_test.append(meta_feature.flatten())

    meta_features_test = np.array(meta_features_test)

    return predictions


In [147]:
from sklearn.model_selection import LeaveOneOut
from sklearn.model_selection import cross_val_predict
import numpy as np
from tqdm import tqdm

def train_with_meta_classifier(X_train, y_train, base_options=['3nn'], meta_option='random_forest', random_state=42):
    X_train_transformed = transform_data(X_train)

    loo = LeaveOneOut()
    loo.get_n_splits(X_train_transformed)

    meta_features = []
    base_models = []  # Lista para armazenar os classificadores base treinados

    for train_index, test_index in tqdm(loo.split(X_train_transformed), ascii=True, desc="Training Instances"):
        X_train_fold, X_test_fold = X_train_transformed[train_index], X_train_transformed[test_index]
        y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]

        fold_base_models = []  # Lista para armazenar os classificadores base desta dobra

        for option in base_options:
            model = select_model(option, random_state)
            model.fit(X_train_fold, y_train_fold)
            fold_base_models.append(model)

        base_models.append(fold_base_models)  # Adicione os classificadores base desta dobra à lista geral

        base_preds_fold = []
        for model in fold_base_models:
            # Predict probabilities for each class using cross-validation
            proba = model.predict_proba(X_test_fold)
            base_preds_fold.append(proba)

        # Stack the predicted probabilities along the columns to form meta-features
        base_preds_fold = np.column_stack(base_preds_fold)
        meta_features.append(base_preds_fold)

    # Train meta-classifier using meta-features
    meta_features = np.concatenate(meta_features, axis=0)
    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(meta_features, y_train)

    return base_models, meta_classifier


def predict_with_meta_classifier(X_test, trained_base_models, trained_meta_classifier):
    predictions = []
    meta_features_test = []  # Inicialize uma lista para armazenar todos os meta-recursos dos dados de teste

    for i in tqdm(range(len(X_test)), ascii=True, desc="Testing Instances"):
        x_instance = X_test[i].reshape(1, -1)
        x_transformed = transform_data(x_instance)  # Aplicar a mesma transformação que foi aplicada aos dados de treinamento

        instance_features = []
        for fold_models in trained_base_models:
            for model in fold_models:
                proba = model.predict_proba(x_transformed)
                instance_features.extend(proba.flatten())

        # Se os dados de teste tiverem menos características do que os de treinamento,
        # podemos preencher as dimensões ausentes com zeros
        num_missing_features = X_train.shape[1] - x_transformed.shape[1]
        if num_missing_features > 0:
            instance_features.extend([0] * num_missing_features)

        meta_feature = np.array(instance_features).reshape(1, -1)
        predictions.append(trained_meta_classifier.predict(meta_feature)[0])

        meta_features_test.append(meta_feature.flatten())

    meta_features_test = np.array(meta_features_test)

    return predictions



In [92]:
univariate_list = list(univariate_equal_length)
univariate_list.sort()

In [93]:
"""for name in univariate_equal_length:
    train, train_labels = load_classification(name, split='TRAIN')
    test, test_labels = load_classification(name, split='test')
    print(f'''{name}
    Train: {train.shape}
    Train Labels: {train_labels.shape}
    Teste: {test.shape}
    True Labels: {test_labels.shape}
    ''')"""


"for name in univariate_equal_length:\n    train, train_labels = load_classification(name, split='TRAIN')\n    test, test_labels = load_classification(name, split='test')\n    print(f'''{name}\n    Train: {train.shape}\n    Train Labels: {train_labels.shape}\n    Teste: {test.shape}\n    True Labels: {test_labels.shape}\n    ''')"

In [207]:
accuracy_data = []
algos = ['1nn', '3nn','svm', 'rd', 'random_forest']
dataset_quali_list = ['Adiac', 'Beef', 'Car', 'CBF', 'Coffee', 'DiatomSizeReduction', 'ECG200', 'ECGFiveDays', 'FaceFour','GunPoint', 'Lightning2', 'Lightning7', 'MedicalImages', 'MoteStrain', 'OliveOil', 'SonyAIBORobotSurface1','SonyAIBORobotSurface2', 'SyntheticControl', 'Trace', 'TwoPatterns']
teste = ['Beef']
for dataset_name in teste:
    train, train_labels = load_classification(dataset_name, split='TRAIN')
    test, test_labels = load_classification(dataset_name, split='test')

    xtrain = train.reshape(train.shape[0], -1)
    xtest = test.reshape(test.shape[0], -1)

    #data_train = transform_data(xtrain)
    #data_test = transform_data(xtest)

    for algo in algos:
      # Treino
      trained_base_models, meta_classifier = train_with_meta_classifier(xtrain, train_labels, meta_option='random_forest', random_state=42)
      # Teste
      predictions_test_meta = predict_with_meta_classifier(xtest, test_labels, trained_base_models, meta_classifier)
      # Resultado
      test_accuracy_meta = np.mean(predictions_test_meta == test_labels)

    accuracy_data.append({'Dataset Name': dataset_name, 'Accuracy': test_accuracy_meta})

accuracy_df = pd.DataFrame(accuracy_data)


ValueError: operands could not be broadcast together with shapes (30,5) (30,30) 

In [None]:
num_classes, num_features = X_train.shape

In [206]:
from sklearn.metrics import accuracy_score

def train_with_meta_classifier(X_train, y_train, base_options=['svm', 'random_forest'], meta_option='random_forest', random_state=42):
    # Treinamento dos modelos base
    num_classes, num_features = X_train.shape
    trained_base_models = []
    for option in base_options:
        model = select_model(option, random_state)
        model.fit(X_train, y_train)
        trained_base_models.append(model)

    # Calcular a acurácia de cada modelo base
    accuracies = [accuracy_score(y_train, model.predict(X_train)) for model in trained_base_models]

    # Calcular as probabilidades ponderadas para cada classe
    weighted_probabilities = np.zeros((X_train.shape[0], num_classes))  # Inicializar matriz para armazenar as probabilidades ponderadas
    for model, accuracy in zip(trained_base_models, accuracies):
        probas = model.predict_proba(X_train)
        accuracy_vector = np.ones((X_train.shape[0], num_classes)) * accuracy  # Criar matriz de acurácia para cada classe
        weighted_probabilities += probas * accuracy_vector  # Multiplicação elemento a elemento

    # Treinamento do meta-classificador usando as probabilidades ponderadas
    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(weighted_probabilities, y_train)

    return trained_base_models, meta_classifier

def predict_with_meta_classifier(X_test, y_train, trained_base_models, trained_meta_classifier):
    # Calculando as probabilidades ponderadas para os dados de teste
    num_classes, num_features = X_test.shape
    weighted_probabilities = np.zeros((X_test.shape[0], num_classes))
    for model in trained_base_models:
        probas = model.predict_proba(X_test)
        accuracies = accuracy_score(y_test, model.predict(X_test))  # Calcular acurácia do modelo para os dados de teste
        accuracy_vector = np.ones((X_test.shape[0], num_classes)) * accuracies  # Criar matriz de acurácia para cada classe
        weighted_probabilities += probas * accuracy_vector  # Multiplicação elemento a elemento

    # Previsões usando o meta-classificador
    predictions = trained_meta_classifier.predict(weighted_probabilities)

    return predictions


In [193]:
from sklearn.metrics import accuracy_score

def train_with_meta_classifier(X_train, y_train, base_options=['svm', 'random_forest', 'knn', 'gbc'], meta_option='random_forest', random_state=42):
    num_classes = X_train.shape
    # Treinamento dos modelos base
    trained_base_models = []
    for option in base_options:
        model = select_model(option, random_state)
        model.fit(X_train, y_train)
        trained_base_models.append(model)

    # Calcular a acurácia de cada modelo base
    accuracies = [accuracy_score(y_train, model.predict(X_train)) for model in trained_base_models]

    # Calcular as probabilidades ponderadas para cada classe
    weighted_probabilities = np.zeros((X_train.shape[0], num_classes))  # Inicializar matriz para armazenar as probabilidades ponderadas
    for model, accuracy in zip(trained_base_models, accuracies):
        probas = model.predict_proba(X_train)
        accuracy_vector = np.ones((probas.shape[1],)) * accuracy  # Criar vetor de acurácia para cada classe
        weighted_probabilities += probas * accuracy_vector.reshape(1, -1)  # Multiplicação elemento a elemento

    # Treinamento do meta-classificador usando as probabilidades ponderadas
    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(weighted_probabilities, y_train)

    return trained_base_models, meta_classifier

def predict_with_meta_classifier(X_test, y_train, trained_base_models, trained_meta_classifier):
    num_classes = X_test.shape
    # Calculando as probabilidades ponderadas para os dados de teste
    weighted_probabilities = np.zeros((X_test.shape[0], num_classes))
    for model in trained_base_models:
        probas = model.predict_proba(X_test)
        accuracies = accuracy_score(y_test, model.predict(X_test))  # Calcular acurácia do modelo para os dados de teste
        accuracy_vector = np.ones((probas.shape[1],)) * accuracies  # Criar vetor de acurácia para cada classe
        weighted_probabilities += probas * accuracy_vector.reshape(1, -1)  # Multiplicação elemento a elemento

    # Previsões usando o meta-classificador
    predictions = trained_meta_classifier.predict(weighted_probabilities)

    return predictions


In [183]:
from sklearn.metrics import accuracy_score

def train_with_meta_classifier(X_train, y_train, base_options=['svm', 'random_forest'], meta_option='random_forest', random_state=42):
    # Treinamento dos modelos base
    n_classes, n_features = X_train.shape

    trained_base_models = []
    for option in base_options:
        model = select_model(option, random_state)
        model.fit(X_train, y_train)
        trained_base_models.append(model)

    # Calcular a acurácia de cada modelo base
    accuracies = [accuracy_score(y_train, model.predict(X_train)) for model in trained_base_models]

    # Calcular as probabilidades ponderadas para cada classe
    weighted_probabilities = np.zeros((len(X_train), n_classes))  # Inicializar matriz para armazenar as probabilidades ponderadas
    for model, accuracy in zip(trained_base_models, accuracies):
        probas = model.predict_proba(X_train)
        weighted_probabilities += probas * accuracy

    # Treinamento do meta-classificador usando as probabilidades ponderadas
    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(weighted_probabilities, y_train)

    return trained_base_models, meta_classifier

def predict_with_meta_classifier(X_test, y_test, trained_base_models, trained_meta_classifier):
    # Calculando as probabilidades ponderadas para os dados de teste
    n_classes, n_features = X_test.shape
    weighted_probabilities = np.zeros((len(X_test), n_classes))
    for model in trained_base_models:
        probas = model.predict_proba(X_test)
        accuracy = accuracy_score(y_test, model.predict(X_test))
        weighted_probabilities += probas * accuracy

    # Previsões usando o meta-classificador
    predictions = trained_meta_classifier.predict(weighted_probabilities)

    return predictions


In [157]:
accuracy_df

Unnamed: 0,Dataset Name,Accuracy
0,Beef,0.5


In [None]:
accuracy_df.to_parquet('model_acc_Hipotese_TDV3.parquet', index=False)