In [None]:
"""!pip install aeon
!pip install sktime
!pip install tsfresh
!pip install tslearn
!pip install PyWavelets"""

### To Do list


*   Comparar os resultados do 1NN contra o SVM+RF
*   Comparar os resultados dos classificadores Feature Based com o SVM+RF
*   Comparar os resultados do MetaClf_Conc contra o MetaClf_Dict



In [18]:
import pandas as pd
import numpy as np

from aeon.datasets import load_classification
from aeon.datasets.tsc_data_lists import univariate_equal_length
from aeon.classification.distance_based import KNeighborsTimeSeriesClassifier
from aeon.classification.convolution_based import RocketClassifier

import tsfresh
from tsfresh import extract_features, select_features
from tsfresh.feature_extraction import MinimalFCParameters

from tslearn.preprocessing import TimeSeriesScalerMeanVariance
from tslearn.piecewise import PiecewiseAggregateApproximation, SymbolicAggregateApproximation

import os
import math
import pywt

from sklearn.metrics import accuracy_score
from sklearn.model_selection import LeaveOneOut
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

from scipy.fftpack import fft
from scipy.stats import norm

from numba import jit, njit

from tqdm import tqdm
import timeit
from datetime import timedelta

import warnings
warnings.filterwarnings("ignore")

In [19]:
"""# Transform data original
def sax_transform(series, w, a):
    paa = [series[i:i + w].mean() for i in range(0, len(series), w)]

    if np.std(paa) != 0:
        paa = (paa - np.mean(paa)) / np.std(paa)
    else:
        paa = paa - np.mean(paa)

    breakpoints = norm.ppf(np.linspace(0, 1, a+1)[1:-1])
    sax_symbols = np.array(range(a))
    sax_representation = sax_symbols[np.digitize(paa, breakpoints)]

    return sax_representation

def transform_data(X, num_features=10):
    a = 5
    w = int(X.shape[1] / num_features)  # Ajuste do tamanho da janela baseado no número de características desejado

    X_sax = np.array([sax_transform(row, w, a) for row in X])
    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, 'db1', axis=1)
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    X_paa = np.column_stack([X[:, i:i+2].mean(axis=1) for i in range(0, X.shape[1], 2)])

    return {
        "TS": X,
        "FFT": X_fft,
        "DWT": X_dwt,
        "PAA": X_paa,
        "SAX": X_sax
    }"""

'# Transform data original\ndef sax_transform(series, w, a):\n    paa = [series[i:i + w].mean() for i in range(0, len(series), w)]\n\n    if np.std(paa) != 0:\n        paa = (paa - np.mean(paa)) / np.std(paa)\n    else:\n        paa = paa - np.mean(paa)\n\n    breakpoints = norm.ppf(np.linspace(0, 1, a+1)[1:-1])\n    sax_symbols = np.array(range(a))\n    sax_representation = sax_symbols[np.digitize(paa, breakpoints)]\n\n    return sax_representation\n\ndef transform_data(X, num_features=10):\n    a = 5\n    w = int(X.shape[1] / num_features)  # Ajuste do tamanho da janela baseado no número de características desejado\n\n    X_sax = np.array([sax_transform(row, w, a) for row in X])\n    X_fft = np.abs(fft(X, axis=1))\n\n    coeffs_cA, coeffs_cD = pywt.dwt(X, \'db1\', axis=1)\n    X_dwt = np.hstack((coeffs_cA, coeffs_cD))\n\n    X_paa = np.column_stack([X[:, i:i+2].mean(axis=1) for i in range(0, X.shape[1], 2)])\n\n    return {\n        "TS": X,\n        "FFT": X_fft,\n        "DWT": X_d

In [20]:
# Transform data using TimeSeriesScalerMeanVariance and concatenate all transformed data
def transform_data(X, num_features=10):
    n_sax_symbols = int(X.shape[1] / num_features)
    n_paa_segments = int(X.shape[1] / num_features)

    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, 'db1', axis=1)
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)

    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)

    data = np.concatenate((X, X_fft, X_dwt, X_paa, X_sax), axis=1)
    data_concat = TimeSeriesScalerMeanVariance().fit_transform(data)
    data_concat.resize(data.shape[0], data.shape[1])

    return data_concat

In [21]:
"""# Transform data using Extract and Select features
def transform_data(X, num_features=10):
    n_sax_symbols = int(X.shape[1] / num_features)
    n_paa_segments = int(X.shape[1] / num_features)

    X_fft = np.abs(fft(X, axis=1))

    coeffs_cA, coeffs_cD = pywt.dwt(X, 'db1', axis=1)
    X_dwt = np.hstack((coeffs_cA, coeffs_cD))

    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)
    X_paa_ = paa.inverse_transform(paa.fit_transform(X))
    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)

    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)
    X_sax_ = sax.inverse_transform(sax.fit_transform(X))
    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)

    data = np.concatenate((X, X_fft, X_dwt, X_paa, X_sax), axis=1)
    data_concat = TimeSeriesScalerMeanVariance().fit_transform(data)
    data_concat.resize(data.shape[0], data.shape[1])

    transformed_data = pd.DataFrame(data_concat)


    return transformed_data"""

"# Transform data using Extract and Select features\ndef transform_data(X, num_features=10):\n    n_sax_symbols = int(X.shape[1] / num_features)\n    n_paa_segments = int(X.shape[1] / num_features)\n\n    X_fft = np.abs(fft(X, axis=1))\n\n    coeffs_cA, coeffs_cD = pywt.dwt(X, 'db1', axis=1)\n    X_dwt = np.hstack((coeffs_cA, coeffs_cD))\n\n    paa = PiecewiseAggregateApproximation(n_segments=n_paa_segments)\n    X_paa_ = paa.inverse_transform(paa.fit_transform(X))\n    X_paa = X_paa_.reshape(X_paa_.shape[0], -1)\n\n    sax = SymbolicAggregateApproximation(n_segments=n_paa_segments, alphabet_size_avg=n_sax_symbols)\n    X_sax_ = sax.inverse_transform(sax.fit_transform(X))\n    X_sax = X_sax_.reshape(X_sax_.shape[0], -1)\n\n    data = np.concatenate((X, X_fft, X_dwt, X_paa, X_sax), axis=1)\n    data_concat = TimeSeriesScalerMeanVariance().fit_transform(data)\n    data_concat.resize(data.shape[0], data.shape[1])\n\n    transformed_data = pd.DataFrame(data_concat)\n\n\n    return transfor

In [22]:
def select_model(option, random_state):
    if option == 'svm':
        return SVC(C = 100, gamma=0.01, kernel='rbf', probability=True)
    else:
        return RandomForestClassifier(n_estimators=200, n_jobs=-1, random_state=random_state)

In [36]:
def train_with_meta_classifier(X_train, y_train, base_option='svm', meta_option='random_forest', random_state=42):
    X_train_transformed = transform_data(X_train)

    loo = LeaveOneOut()
    loo.get_n_splits(X_train_transformed)

    # Treinar um modelo para todos os dados transformados
    model = select_model(base_option, random_state)
    scores = []
    for train_index, test_index in tqdm(loo.split(X_train_transformed), ascii=True, desc="Traning Instances"):
        X_train_fold, X_test_fold = X_train_transformed[train_index], X_train_transformed[test_index]
        y_train_fold, y_test_fold = y_train[train_index], y_train[test_index]
        model.fit(X_train_fold, y_train_fold)
        score = model.score(X_test_fold, y_test_fold)
        scores.append(score)
    avg_score = np.mean(scores)

    # Preparar dados para o meta-classificador
    meta_features = []
    for X_trans in X_train_transformed:
        instance_features = []
        proba = model.predict_proba(X_trans.reshape(1, -1)) # Reshape para compatibilidade com predict_proba
        instance_features.extend(proba.flatten())
        meta_features.append(instance_features)

    meta_features = np.array(meta_features)

    # Treinar o meta-classificador
    meta_classifier = select_model(meta_option, random_state)
    meta_classifier.fit(meta_features, y_train)

    return model, meta_classifier


In [37]:
from tqdm import tqdm  # Importar a biblioteca tqdm

def predict_with_meta_classifier(X_test, trained_base_model, trained_meta_classifier):
    predictions = []
    meta_features_test = []  # Inicialize uma lista para armazenar todos os meta-recursos dos dados de teste

    for i in tqdm(range(len(X_test)), ascii=True, desc="Testing Instances"):
        x_instance = X_test[i].reshape(1, -1)
        x_transformed = transform_data(x_instance)

        instance_features = []
        for X_trans in x_transformed:  # Iterar sobre as diferentes transformações
            proba = trained_base_model.predict_proba(X_trans.reshape(1, -1))
            instance_features.extend(proba.flatten())  # Estender a lista com todas as probabilidades

        meta_feature = np.array(instance_features).reshape(1, -1)
        predictions.append(trained_meta_classifier.predict(meta_feature)[0])  # Adicionar a previsão à lista de previsões

        meta_features_test.append(meta_feature.flatten())  # Adicionar meta-recursos da instância atual à lista

    # Converter a lista de meta-recursos dos dados de teste em um array numpy
    meta_features_test = np.array(meta_features_test)

    # Salvar todos os meta-recursos dos dados de teste em um arquivo CSV
    # np.savetxt("meta-features-test.csv", meta_features_test, delimiter=",")

    return predictions


In [38]:
univariate_list = list(univariate_equal_length)
univariate_list.sort()

In [39]:
accuracy_data = []
dataset_list = ['Adiac', 'Beef', 'Car', 'CBF', 'Coffee', 'DiatomSizeReduction', 'ECG200', 'ECGFiveDays', 'FaceFour',
'GunPoint', 'Lightning2', 'Lightning7', 'MedicalImages', 'MoteStrain', 'OliveOil', 'SonyAIBORobotSurface1','SonyAIBORobotSurface2', 'SyntheticControl', 'Trace', 'TwoPatterns']

for dataset_name in dataset_list:
    train, train_labels = load_classification(dataset_name, split='TRAIN')
    test, test_labels = load_classification(dataset_name, split='test')

    xtrain = train.reshape(train.shape[0], -1)
    xtest = test.reshape(test.shape[0], -1)

    #data_train = transform_data(xtrain)
    #data_test = transform_data(xtest)

    # Treino
    trained_base_models, meta_classifier = train_with_meta_classifier(xtrain, train_labels, base_option='svm', meta_option='random_forest', random_state=42)
    # Teste
    predictions_test_meta = predict_with_meta_classifier(xtest, trained_base_models, meta_classifier)
    # Resultado
    test_accuracy_meta = np.mean(predictions_test_meta == test_labels)

    accuracy_data.append({'Dataset Name': dataset_name, 'Accuracy': test_accuracy_meta})

accuracy_df = pd.DataFrame(accuracy_data)


Traning Instances: 390it [04:01,  1.61it/s]
Testing Instances: 100%|##########| 391/391 [00:14<00:00, 26.67it/s]
Traning Instances: 30it [00:00, 49.87it/s]
Testing Instances: 100%|##########| 30/30 [00:01<00:00, 25.57it/s]
Traning Instances: 60it [00:03, 15.32it/s]
Testing Instances: 100%|##########| 60/60 [00:02<00:00, 24.04it/s]
Traning Instances: 30it [00:00, 148.87it/s]
Testing Instances: 100%|##########| 900/900 [00:32<00:00, 27.39it/s]
Traning Instances: 28it [00:00, 223.56it/s]
Testing Instances: 100%|##########| 28/28 [00:01<00:00, 26.92it/s]
Traning Instances: 16it [00:00, 167.17it/s]
Testing Instances: 100%|##########| 306/306 [00:11<00:00, 26.45it/s]
Traning Instances: 100it [00:01, 68.80it/s]
Testing Instances: 100%|##########| 100/100 [00:03<00:00, 28.98it/s]
Traning Instances: 23it [00:00, 279.24it/s]
Testing Instances: 100%|##########| 861/861 [00:31<00:00, 27.59it/s]
Traning Instances: 24it [00:00, 90.68it/s]
Testing Instances: 100%|##########| 88/88 [00:02<00:00, 29.43

In [40]:
accuracy_df

Unnamed: 0,Dataset Name,Accuracy
0,Adiac,0.790281
1,Beef,0.833333
2,Car,0.683333
3,CBF,0.874444
4,Coffee,1.0
5,DiatomSizeReduction,0.924837
6,ECG200,0.91
7,ECGFiveDays,0.994193
8,FaceFour,0.534091
9,GunPoint,0.98


In [43]:
accuracy_df.to_csv('model_acc_SVM+RF+TDV1.csv', index=False)