# ROCKET playground Tuning

In [1]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sktime.transformations.panel.rocket import Rocket
from sktime.transformations.panel.rocket import MiniRocket
from sktime.transformations.panel.rocket import MiniRocketMultivariate
from sktime.transformations.panel.rocket import MiniRocketMultivariateVariable
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline


## Load Datasets

In [2]:
#loading our preprocessed datasets
X = np.load('/Users/kirafriedrichs/neuefische/Capstone_Project_SignMeUp/data/X-data_sub2834.npy')
y = np.load('/Users/kirafriedrichs/neuefische/Capstone_Project_SignMeUp/data/y-data_sub2834.npy')

#make y 1-Dimensional because this is what SKTIME wants
y = np.argmax(y, axis=1)

#defining signs --> edit for specific subset of data
#actions = np.array ( ['alligator', 'radio', 'moon', 'sleep', 'grandpa', 'tiger', 'pencil', 'sleepy', 'grandma', 'chocolate'])

### Split train and test data

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
# visualize the first univariate time series
#X_train.iloc[0, 0].plot()

### Tuning: GridSearch

In [10]:
# Define parameter grid for ROCKET and logistic regression

# Here, we define a dictionary param_grid that specifies the values to be searched for the num_kernels parameter of ROCKET and the C parameter of logistic regression. 
# We try different values for each parameter to find the best combination.

param_grid = {
    'rocket__num_kernels': [1000, 5000, 10000],
    'clf__C': [0.01, 0.1, 1, 10]
}


Apply ROCKET and logistic regression with grid search:

In [5]:
# Apply ROCKET and logistic regression with grid search

# Here, we create a Rocket object and a LogisticRegression object, and combine them into a pipeline using Pipeline from sklearn.pipeline. 
# We then use GridSearchCV to perform a grid search over the parameter grid defined earlier. 
# We use 5-fold cross-validation to evaluate the performance of each parameter combination and accuracy as the scoring metric. 
# We also set n_jobs=-1 to use all available CPUs for parallel processing.

rocket = Rocket(random_state=123)
clf = LogisticRegression(random_state=123)
pipeline = Pipeline([('rocket', rocket), ('clf', clf)])
grid = GridSearchCV(pipeline, param_grid, cv=5, scoring='accuracy', n_jobs=-1)
grid.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

GridSearchCV(cv=5,
             estimator=Pipeline(steps=[('rocket', Rocket(random_state=123)),
                                       ('clf',
                                        LogisticRegression(random_state=123))]),
             n_jobs=-1,
             param_grid={'clf__C': [0.01, 0.1, 1, 10],
                         'rocket__num_kernels': [1000, 5000, 10000]},
             scoring='accuracy')

Print best parameters and test accuracy:

In [10]:
# Print best parameters and test accuracy

# Here, we print the best combination of hyperparameters found by grid search, and use the predict method to make predictions on the test data. 
# We then compute the accuracy score and print it.

print('Best parameters:', grid.best_params_)
y_pred = grid.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Test accuracy:', accuracy)

#Best parameters: {'clf__C': 0.1, 'rocket__num_kernels': 5000}

Best parameters: {'clf__C': 0.1, 'rocket__num_kernels': 5000}


In [9]:
y_pred = grid.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print('Test accuracy:', accuracy)

Test accuracy: 0.015267175572519083


### Now test with Dataset from Franziskas preprocessing notebook

In [2]:
#loading our preprocessed datasets
X = np.load('/Users/kirafriedrichs/neuefische/Capstone_Project_SignMeUp/data/feature_data.npy')
y = np.load('/Users/kirafriedrichs/neuefische/Capstone_Project_SignMeUp/data/feature_labels.npy')

#make y 1-Dimensional because this is what SKTIME wants
#y = np.argmax(y, axis=1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

In [9]:
# Define the MiniROCKET model
from sktime.transformations.panel.rocket import MiniRocket
model = MiniRocket(num_kernels=5000, random_state=42)

# Fit the model
model.fit(X_train, y_train)

# Extract features
X_train_features = model.transform(X_train)

# Train the classifier
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(C = 0.1, random_state=42)
classifier.fit(X_train_features, y_train)

# Evaluate the model
X_test_features = model.transform(X_test)
y_pred = classifier.predict(X_test_features)

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)


: 

: 