### Bibliotecas

In [52]:
import pandas as pd
import numpy as np

from aeon.datasets import load_classification
from aeon.datasets.tsc_data_lists import univariate_equal_length
from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier

from tsfresh import extract_features, select_features
from tsfresh.feature_extraction import MinimalFCParameters

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import PiecewiseAggregateApproximation, SymbolicAggregateApproximation

import os
import math
import pywt

from sklearn.metrics import accuracy_score

from sklearn.model_selection import LeaveOneOut
from sklearn.svm import SVC
from sklearn.linear_model import RidgeClassifierCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import RandomForestClassifier

from scipy.fftpack import fft
from scipy.stats import norm

from tqdm import tqdm
import timeit
from datetime import timedelta

import warnings
warnings.filterwarnings("ignore")

### Dados

In [14]:
try:
    train_data = pd.read_parquet('D:\_MESTRADO\_Meta_Learning\MSC\CSV_Parquet\Car_TRAIN.parquet')
    test_data = pd.read_parquet('D:\_MESTRADO\_Meta_Learning\MSC\CSV_Parquet\Car_TRAIN.parquet')
except FileNotFoundError:
    print("Ensure the Parquet files are in the correct path.")
    raise
    
    
X_train = train_data.drop('target', axis=1).values
y_train = train_data['target'].values

X_test = test_data.drop('target', axis=1).values
y_test = test_data['target'].values


### Função de transformação dos dados.

In [79]:
# Transform data atualizado
def transform_data(X):
    n_sax_symbols = int(X.shape[1] / 4)
    n_paa_segments = int(X.shape[1] / 4)
    
    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, 'db1', axis=1)
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)

    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)

    data_X = TimeSeriesScalerMeanVariance().fit_transform(X)
    data_X.resize(data_X.shape[0], data_X.shape[1])
    
    data_FFT = TimeSeriesScalerMeanVariance().fit_transform(X_fft)
    data_FFT.resize(data_FFT.shape[0], data_FFT.shape[1])
    
    data_DWT = TimeSeriesScalerMeanVariance().fit_transform(X_dwt)
    data_DWT.resize(data_DWT.shape[0], data_DWT.shape[1])

    return {
        "TS": data_X,
        "FFT": data_FFT,
        "DWT": data_DWT,
        "PAA": X_paa,
        "SAX": X_sax
    }

### Seleção do modelo extrator e modelo classificador

In [84]:
def select_model(option, random_state):
    if option == '1nn':
        return KNeighborsTimeSeriesClassifier(distance='dtw', n_neighbors=1)
    elif option == '3nn':
        return KNeighborsTimeSeriesClassifier(distance='dtw', n_neighbors=2)
    elif option == 'svm':
        return SVC(C = 1000, gamma=0.01, kernel='rbf', probability=True)
    elif option == 'gbc':
        return GradientBoostingClassifier(n_estimators=100, random_state=random_state)
    elif option == 'nb':
        return GaussianNB()
    elif option == 'shape':
        return ShapeDTW(n_neighbors=1)
    elif option == 'ee':
        return ElasticEnsemble(proportion_of_param_options= 0.5,
                               proportion_train_in_param_finding= 0.5,
                               proportion_train_for_test=0.5,
                               n_jobs=-1,
                               random_state=random_state,
                               majority_vote=True)
    elif option == 'rd':
        return RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
    else:
        return RandomForestClassifier(n_estimators=200,
                                    n_jobs=-1,
                                    random_state=random_state)
        #return RandomForestClassifier(n_estimators=100,random_state=random_state)

### Treino do modelos extrator e classificador

In [85]:
def train_with_meta_classifier(X_train, y_train, base_option='random_forest', meta_option='1nn', random_state=42):
    trained_models = {}  # Salvar modelos treinados para cada transformação
    
    X_train_transformed = transform_data(X_train)  # Transformar todo o conjunto de treino
    
    loo = LeaveOneOut()
    
    # Treinar um modelo para cada transformação e salvar no dicionário
    for rep, X_trans in tqdm(X_train_transformed.items(), ascii=True, desc="Training Base Models"):
        model = select_model(base_option, random_state)
        scores = []
        for train_index, _ in loo.split(X_trans):
            model.fit(X_trans[train_index], y_train[train_index])
            score = model.score(X_trans[train_index], y_train[train_index])  # Score do modelo nos dados de treino
            scores.append(score)
        avg_score = np.mean(scores)
        trained_models[rep] = (model, avg_score)  # Salvar o modelo treinado e a média dos scores
        
    # Preparar dados para o meta-classificador
    meta_features = []
    for i in range(X_train.shape[0]):
        instance_features = []
        for rep, (model, _) in trained_models.items():
            proba = model.predict_proba(X_train_transformed[rep][i].reshape(1, -1))
            instance_features.extend(proba.flatten())  # Estender a lista com todas as probabilidades
        meta_features.append(instance_features)
    
    meta_features = np.array(meta_features)
    np.savetxt("meta-features-train.csv", meta_features, delimiter=",")
    
    # Treinar o meta-classificador
    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(meta_features, y_train)
    
    return trained_models, meta_classifier


### Predicao do meta-classificador

In [86]:
def predict_with_meta_classifier(X_test, trained_base_models, trained_meta_classifier):
    predictions = []
    meta_features_test = []  # Inicialize uma lista para armazenar todos os meta-recursos dos dados de teste
    
    for i in tqdm(range(len(X_test)), ascii=True, desc="Testing Instances"):
        x_instance = X_test[i].reshape(1, -1)
        x_transformed = transform_data(x_instance)
        
        instance_features = []
        for rep, (model, _) in trained_base_models.items():  # Ajuste para percorrer os modelos treinados e os scores médios
            proba = model.predict_proba(x_transformed[rep][0].reshape(1, -1))  # Ajuste aqui para pegar o primeiro elemento
            instance_features.extend(proba.flatten())  # Estender a lista com todas as probabilidades
        
        meta_feature = np.array(instance_features).reshape(1, -1)
        predictions.append(trained_meta_classifier.predict(meta_feature)[0])  # Adicionar a previsão à lista de previsões
        
        meta_features_test.append(meta_feature.flatten())  # Adicionar meta-recursos da instância atual à lista
    
    # Converter a lista de meta-recursos dos dados de teste em um array numpy
    meta_features_test = np.array(meta_features_test)

    # Salvar todos os meta-recursos dos dados de teste em um arquivo CSV
    np.savetxt("meta-features-test.csv", meta_features_test, delimiter=",")
    
    return predictions


### Testando um único modelo - Random Forest como extrator e SVM como meta-classificador

In [89]:
dataset_list = ['Adiac', 'Beef', 'Car', 'CBF', 'Coffee', 'DiatomSizeReduction', 'ECG200', 'ECGFiveDays', 'FaceFour',
                'GunPoint', 'Lightning2', 'Lightning7', 'MoteStrain', 'OliveOil','MedicalImages', 'Trace', 'TwoPatterns', 'SonyAIBORobotSurface1','SonyAIBORobotSurface2', 'SyntheticControl']
#'

# Para cada conjunto de dados na lista
for dataset_name in dataset_list:
    # Carregue os dados de treinamento e teste
    X_train, y_train = load_classification(dataset_name, split="TRAIN")
    X_test, y_test = load_classification(dataset_name, split="test")
    
    # Achatando os dados para 2D, pois alguns algoritmos esperam 2D
    X_train_flat = X_train.reshape(X_train.shape[0], -1)
    X_test_flat = X_test.reshape(X_test.shape[0], -1)
    
    dataset_accuracies = []
    trained_base_models, meta_classifier = train_with_meta_classifier(X_train_flat, y_train, base_option='svm', meta_option='rd')
    predictions_test_meta = predict_with_meta_classifier(X_test_flat, trained_base_models, meta_classifier)
    test_accuracy_meta = np.mean(predictions_test_meta == y_test)
    dataset_accuracies.append(test_accuracy_meta)
        
    print(f"Acurácia {dataset_name}: {test_accuracy_meta}")
        
#np.savetxt("Results_MSLOO_.csv", dataset_accuracies, delimiter=",")

Training Base Models: 100%|##########| 5/5 [07:45<00:00, 93.05s/it]
Testing Instances: 100%|##########| 391/391 [00:02<00:00, 148.70it/s]


Acurácia Adiac: 0.7774936061381074


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  5.92it/s]
Testing Instances: 100%|##########| 30/30 [00:00<00:00, 92.85it/s]


Acurácia Beef: 0.8666666666666667


Training Base Models: 100%|##########| 5/5 [00:04<00:00,  1.03it/s]
Testing Instances: 100%|##########| 60/60 [00:00<00:00, 92.15it/s]


Acurácia Car: 0.8833333333333333


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 13.29it/s]
Testing Instances: 100%|##########| 900/900 [00:04<00:00, 190.81it/s]


Acurácia CBF: 0.9044444444444445


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 18.06it/s]
Testing Instances: 100%|##########| 28/28 [00:00<00:00, 145.32it/s]


Acurácia Coffee: 0.9642857142857143


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 33.98it/s]
Testing Instances: 100%|##########| 306/306 [00:02<00:00, 124.45it/s]


Acurácia DiatomSizeReduction: 0.934640522875817


Training Base Models: 100%|##########| 5/5 [00:02<00:00,  1.94it/s]
Testing Instances: 100%|##########| 100/100 [00:00<00:00, 213.07it/s]


Acurácia ECG200: 0.91


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 24.43it/s]
Testing Instances: 100%|##########| 861/861 [00:04<00:00, 187.91it/s]


Acurácia ECGFiveDays: 0.9872241579558653


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 12.36it/s]
Testing Instances: 100%|##########| 88/88 [00:00<00:00, 119.23it/s]


Acurácia FaceFour: 0.8295454545454546


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  7.35it/s]
Testing Instances: 100%|##########| 150/150 [00:00<00:00, 189.18it/s]


Acurácia GunPoint: 0.9333333333333333


Training Base Models: 100%|##########| 5/5 [00:04<00:00,  1.12it/s]
Testing Instances: 100%|##########| 61/61 [00:00<00:00, 88.69it/s]


Acurácia Lightning2: 0.7704918032786885


Training Base Models: 100%|##########| 5/5 [00:06<00:00,  1.37s/it]
Testing Instances: 100%|##########| 73/73 [00:00<00:00, 113.80it/s]


Acurácia Lightning7: 0.6164383561643836


Training Base Models: 100%|##########| 5/5 [00:00<00:00, 33.40it/s]
Testing Instances: 100%|##########| 1252/1252 [00:05<00:00, 215.30it/s]


Acurácia MoteStrain: 0.8642172523961661


Training Base Models: 100%|##########| 5/5 [00:00<00:00,  7.32it/s]
Testing Instances: 100%|##########| 30/30 [00:00<00:00, 88.38it/s]


Acurácia OliveOil: 0.8666666666666667


Training Base Models: 100%|##########| 5/5 [04:43<00:00, 56.76s/it]
Testing Instances: 100%|##########| 760/760 [00:03<00:00, 194.29it/s]


Acurácia MedicalImages: 0.7144736842105263


Training Base Models: 100%|##########| 5/5 [00:09<00:00,  1.91s/it]
Testing Instances: 100%|##########| 100/100 [00:00<00:00, 138.41it/s]


Acurácia Trace: 0.89


Training Base Models:   0%|          | 0/5 [00:00<?, ?it/s]

In [None]:
total_time = 0
algos = ['1nn', '3nn', 'svm', 'nb', 'gbc', 'random_forest']
    # Carregue os dados de treinamento e teste
try:
    train_data = pd.read_parquet('D:\_MESTRADO\_Meta_Learning\MSC\CSV_Parquet\GunPoint_TRAIN.parquet')
    test_data = pd.read_parquet('D:\_MESTRADO\_Meta_Learning\MSC\CSV_Parquet\GunPoint_TEST.parquet')
except FileNotFoundError:
    print("Ensure the Parquet files are in the correct path.")
    raise

X_train = train_data.drop('target', axis=1).values
y_train = train_data['target'].values

X_test = test_data.drop('target', axis=1).values
y_test = test_data['target'].values

In [None]:
trained_base_models, meta_classifier = train_with_meta_classifier(X_train, y_train, base_option='svm', meta_option='rd')
predictions_test_meta = predict_with_meta_classifier(X_test, trained_base_models, meta_classifier)
test_accuracy_meta = np.mean(predictions_test_meta == y_test)
print(f"Accuracy: {test_accuracy_meta}")

Training Base Models: 100%|##########| 5/5 [00:00<00:00,  7.09it/s]
Testing Instances: 100%|##########| 150/150 [00:00<00:00, 177.86it/s]

Accuracy: 0.9533333333333334





In [None]:
# Treino
trained_base_models, meta_classifier = train_with_meta_classifier(xtrain, train_labels, base_option='svm', meta_option='rd', random_state=42)

# Teste
predictions_test_meta = predict_with_meta_classifier(xtest, trained_base_models, meta_classifier)

# Resultado
test_accuracy_meta = accuracy_score(test_labels, predictions_test_meta)

print(f'Accuracy: {test_accuracy_meta}')

Training Base Models: 100%|##########| 5/5 [00:04<00:00,  1.20it/s]
Testing Instances: 100%|##########| 60/60 [00:02<00:00, 28.46it/s]

Accuracy: 0.8833333333333333





### Testando um único modelo - SVM como extrator e meta-classificador

In [None]:
# Treino
trained_base_models, meta_classifier = train_with_meta_classifier(xtrain, train_labels, base_option='random_forest', meta_option='svm', random_state=42)

# Teste
predictions_test_meta = predict_with_meta_classifier(xtest, trained_base_models, meta_classifier)

# Resultado
test_accuracy_meta = accuracy_score(test_labels, predictions_test_meta)

print(f'Accuracy: {test_accuracy_meta}')

In [None]:
#Teste utilizando o classificador SVM
meta_attrib_train = np.loadtxt("meta-features-train.csv", delimiter=",")
meta_attrib_test = np.loadtxt("meta-features-test.csv", delimiter=",")
from sklearn.metrics import accuracy_score

clf = SVC(probability=True)
clf.fit(meta_attrib_train, y_train)
y_hat = clf.predict(meta_attrib_test)
test_accuracy_meta = accuracy_score(y_hat, y_test)
print(f"Accuracy: {test_accuracy_meta}")

In [None]:
clf_2 = SVC(probability=True)
clf_2.fit(X_train, y_train)
y_hat_ = clf_2.predict(X_test)
test_accuracy_meta_2 = accuracy_score(y_hat_,y_test)
print(f"Accuracy: {test_accuracy_meta_2}")

### Gráfico das diferenças de dados

In [None]:
import matplotlib.pyplot as plt

y1 = y_hat  # depois da transformação
y2 = y_test  

z1 = y_hat_ #antes da transformação
z2 = y_test

#suavizar os dados do gráfico
window_size = 15
y1_smoothed = pd.Series(y1).rolling(window=window_size).mean()
y2_smoothed = pd.Series(y2).rolling(window=window_size).mean()
z1_smoothed = pd.Series(z1).rolling(window=window_size).mean()
z2_smoothed = pd.Series(z2).rolling(window=window_size).mean()

fig, axs = plt.subplots(nrows=1, ncols=2, figsize=(15, 5), layout='constrained')

# Conjunto de validação do classificador
axs[0].set_title('Antes da transformação')
axs[0].plot(z1_smoothed, label='Treino')
axs[0].plot(z2_smoothed, label='Teste')
axs[0].set_xlabel('Tempo (s)')
axs[0].set_ylabel('Treino')
axs[0].grid(True)

# Conjunto de validação do meta-classificador
axs[1].set_title('Depois da transformação')
axs[1].plot(y1_smoothed, label='Treino')
axs[1].plot(y2_smoothed, label='Teste')
axs[1].set_xlabel('Tempo (s)')
axs[1].set_ylabel('Treino')
axs[1].grid(True)

plt.legend()
plt.show()



In [None]:
w1 = y_hat  # meta-classificador
w2 = y_hat_ #classificação

# Suavizar os dados do gráfico
window_size = 15
w1_smoothed = pd.Series(w1).rolling(window=window_size).mean()
w2_smoothed = pd.Series(w2).rolling(window=window_size).mean()

# Plotar os dados
plt.figure(figsize=(10, 6))
plt.plot(w1_smoothed, label='w1 (Classificação usando meta-caracteristicas)')
plt.plot(w2_smoothed, label='w2 (classificação utilizando dados brutos)')
plt.xlabel('Tempo (s)')
plt.ylabel('Valores suavizados')
plt.title('Comparação entre os resultados de um SVM')
plt.legend()
plt.grid(True)
plt.show()


### Treino em loop de todas as opções de classificadores disponiveis no Select Model.

In [None]:
algos = ['1nn', '3nn', 'svm', 'nb', 'gbc', 'ee', 'shape', 'rf', 'rd']
for algo in algos:
    
    print(f'Meta-classificador com modelo extrator {algo.upper()}')
    
    # Training
    try:
        trained_base_models, meta_classifier = train_with_meta_classifier(X_train, y_train, base_option='svm', meta_option=algo)
        # Testing
        predictions_test_meta = predict_with_meta_classifier(X_test, trained_base_models, meta_classifier)
        test_accuracy_meta = np.mean(predictions_test_meta == y_test)
        
        print(f'Acurácia do teste usando o meta-classificador com modelo extrator {algo}: {test_accuracy_meta}')
    except Exception as e:
        print(f"Ocorreu um erro no teste com o {algo}: {e}")
    print("-------------------------------")