In [3]:
import requests
import os

# List of UEA datasets to download
datasets = [
    'EthanolConcentration', 'FaceDetection', 'Handwriting', 'Heartbeat',
    'JapaneseVowels', 'PEMS-SF', 'SelfRegulationSCP1', 'SelfRegulationSCP2',
    'SpokenArabicDigits', 'UWaveGestureLibrary'
]

# Base URL for downloading the datasets
base_url = "https://www.timeseriesclassification.com/aeon-toolkit/"

# Create a directory for the downloaded datasets if it doesn't exist
os.makedirs('datasets', exist_ok=True)

# Download each dataset
for dataset in datasets:
    # Construct the URL for the dataset
    dataset_url = f"{base_url}{dataset}.zip"
    
    # Make the request to download the dataset
    response = requests.get(dataset_url, verify=False)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Save the dataset to a file
        filename = os.path.join('UEA_datasets', f"{dataset}.zip")
        with open(filename, 'wb') as f:
            f.write(response.content)
        print(f"Downloaded {dataset}")
    else:
        print(f"Failed to download {dataset}: Status code {response.status_code}")

# Note: This code assumes that the datasets are available at the given URL pattern.
# If the URL pattern is incorrect, the downloads will fail.



Error downloading EthanolConcentration: File is not a zip file
Error downloading FaceDetection: File is not a zip file
Error downloading Handwriting: File is not a zip file
Error downloading Heartbeat: File is not a zip file
Error downloading JapaneseVowels: File is not a zip file
Error downloading PEMS-SF: File is not a zip file
Error downloading SelfRegulationSCP1: File is not a zip file
Error downloading SelfRegulationSCP2: File is not a zip file
Error downloading SpokenArabicDigits: File is not a zip file
Error downloading UWaveGestureLibrary: File is not a zip file


In [2]:
from sktime.datasets import load_from_tsfile_to_dataframe
import numpy as np
import pandas as pd

def load_from_tsfile(file_path, return_y=True):
    X, y = load_from_tsfile_to_dataframe(file_path)
    X = pd.DataFrame({i: pd.Series(x) for i, x in enumerate(X.iloc[:, 0])})
    if return_y:
        return X, y
    else:
        return X


# Test the function
file_path = './datasets/JapaneseVowels/JapaneseVowels_TRAIN.ts'
X, y = load_from_tsfile(file_path)
print(X.shape, y.shape)



(26, 270) (270,)


In [12]:
file_path = './datasets/JapaneseVowels/JapaneseVowels_TEST.ts'
X_test, y_test = load_from_tsfile(file_path)
print(X_test.shape, y_test.shape)

(29, 370) (370,)


In [6]:
y

array(['1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '2', '2', '2', '2', '2', '2', '2', '2', '2',
       '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2',
       '2', '2', '2', '2', '2', '2', '2', '2', '3', '3', '3', '3', '3',
       '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '3',
       '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '4',
       '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4',
       '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4',
       '4', '4', '4', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5',
       '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5',
       '5', '5', '5', '5', '5', '5', '5', '6', '6', '6', '6', '6', '6',
       '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6',
       '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '7

In [9]:
X_train  = X.transpose()

In [5]:
y

array(['1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1', '1',
       '1', '1', '1', '1', '2', '2', '2', '2', '2', '2', '2', '2', '2',
       '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2', '2',
       '2', '2', '2', '2', '2', '2', '2', '2', '3', '3', '3', '3', '3',
       '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '3',
       '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '3', '4',
       '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4',
       '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4', '4',
       '4', '4', '4', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5',
       '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5', '5',
       '5', '5', '5', '5', '5', '5', '5', '6', '6', '6', '6', '6', '6',
       '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6',
       '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '6', '7

In [15]:
X_test = X_test.transpose()

In [18]:
X_test = X_test.iloc[:, :26]



In [23]:
import lightgbm as lgb
import optuna
from sklearn.metrics import accuracy_score

def objective(trial):
    param = {
        'objective': 'multiclass',  # or 'multiclass' for multi-class classification
        'metric': 'multi_logloss',  # or 'multi_logloss' for multi-class classification
        'verbosity': -1,
        'boosting_type': 'gbdt',
        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
        'learning_rate': trial.suggest_float('learning_rate', 1e-8, 1.0, log=True),
        'n_estimators': trial.suggest_int('n_estimators', 10, 1000),
    }

    gbm = lgb.LGBMClassifier(**param)
    gbm.fit(X_train, y)
    preds = gbm.predict(X_test)
    accuracy = accuracy_score(y_test, preds)
    return accuracy



In [24]:

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

print('Number of finished trials:', len(study.trials))
print('Best trial:', study.best_trial.params)



[32m[I 2024-02-08 17:25:21,275][0m A new study created in memory with name: no-name-299c7f46-f756-4e55-b031-743f11daab37[0m
[32m[I 2024-02-08 17:25:22,561][0m Trial 0 finished with value: 0.5243243243243243 and parameters: {'num_leaves': 118, 'learning_rate': 1.9073863075115233e-06, 'n_estimators': 569}. Best is trial 0 with value: 0.5243243243243243.[0m
[32m[I 2024-02-08 17:25:23,621][0m Trial 1 finished with value: 0.572972972972973 and parameters: {'num_leaves': 142, 'learning_rate': 0.0001681096073458359, 'n_estimators': 555}. Best is trial 1 with value: 0.572972972972973.[0m
[32m[I 2024-02-08 17:25:25,493][0m Trial 2 finished with value: 0.5702702702702702 and parameters: {'num_leaves': 238, 'learning_rate': 0.0001463594221872099, 'n_estimators': 978}. Best is trial 1 with value: 0.572972972972973.[0m
[32m[I 2024-02-08 17:25:26,204][0m Trial 3 finished with value: 0.518918918918919 and parameters: {'num_leaves': 3, 'learning_rate': 1.4236015354497582e-07, 'n_estimato

KeyboardInterrupt: 