In [10]:
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import time

sys.path.append('../code')
from minirocket import fit, transform
from aeon.datasets import load_classification
from aeon.transformations.collection.convolution_based import Rocket
from sklearn.linear_model import RidgeClassifierCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [11]:
published = pd.read_csv('../results/results_ucr109_mean.csv')
print(published.columns.tolist())
print(published.head())
dataset_names = published['dataset'].tolist()
print(f"\n{len(dataset_names)} datasets")

['dataset', 'accuracy', 'time_training_seconds', 'time_test_seconds']
     dataset  accuracy  time_training_seconds  time_test_seconds
0      ACSF1  0.822333                   0.49               0.25
1      Adiac  0.801705                   0.45               0.23
2  ArrowHead  0.880952                   0.13               0.14
3        BME  0.992222                   0.11               0.07
4       Beef  0.761111                   0.17               0.04

109 datasets


In [None]:
from aeon.transformations.collection.convolution_based import Rocket
from sklearn.linear_model import RidgeClassifierCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import time

minirocket_df = pd.read_csv('../results/minirocket_results.csv')
rocket_results = []

for name in dataset_names:
    try:
        X_train, y_train = load_classification(name, split="train")
        X_test, y_test = load_classification(name, split="test")
        
        # Rocket expects 3D input: (n_samples, n_channels, length)
        
        t0 = time.time()
        rocket = Rocket(n_kernels=10000, random_state=42)
        rocket.fit(X_train)
        X_train_tf = rocket.transform(X_train)
        X_test_tf = rocket.transform(X_test)
        transform_time = time.time() - t0
        
        clf = make_pipeline(StandardScaler(), RidgeClassifierCV(alphas=np.logspace(-3, 3, 10)))
        clf.fit(X_train_tf, y_train)
        acc = clf.score(X_test_tf, y_test)
        
        rocket_results.append({
            'dataset': name,
            'accuracy_rocket': acc,
            'transform_time_rocket': transform_time
        })
        print(f"Ok {name}: {acc:.4f} ({transform_time:.2f}s)")
    except Exception as e:
        print(f"Wrong {name}: {e}")

rocket_df = pd.DataFrame(rocket_results)

Ok ACSF1: 0.8900 (20.81s)
Ok Adiac: 0.7724 (6.64s)
Ok ArrowHead: 0.8057 (2.46s)
Ok BME: 1.0000 (1.16s)
Ok Beef: 0.8000 (1.37s)
Ok BeetleFly: 0.9000 (0.96s)
Ok BirdChicken: 0.9000 (0.96s)
Ok CBF: 1.0000 (5.47s)
Ok Car: 0.9167 (3.13s)
Ok Chinatown: 0.9825 (0.50s)
Ok ChlorineConcentration: 0.8096 (32.62s)
Ok CinCECGTorso: 0.8304 (105.25s)
Ok Coffee: 1.0000 (0.75s)
Ok Computers: 0.7680 (16.21s)
Ok CricketX: 0.8231 (10.56s)
Ok CricketY: 0.8538 (10.55s)
Ok CricketZ: 0.8564 (10.56s)
Ok Crop: 0.7572 (55.16s)
Ok DiatomSizeReduction: 0.9739 (5.20s)
Ok DistalPhalanxOutlineAgeGroup: 0.7554 (2.08s)
Ok DistalPhalanxOutlineCorrect: 0.7717 (3.31s)
Ok DistalPhalanxTW: 0.6763 (2.06s)
Ok ECG200: 0.9200 (0.93s)
Ok ECG5000: 0.9473 (32.11s)
Ok ECGFiveDays: 1.0000 (5.53s)
Ok EOGHorizontalSignal: 0.6381 (40.85s)
Ok EOGVerticalSignal: 0.5414 (41.87s)
Ok Earthquakes: 0.7554 (10.83s)
Ok ElectricDevices: 0.7260 (74.86s)
Ok EthanolLevel: 0.5920 (80.32s)
Ok FaceAll: 0.9432 (14.13s)
Ok FaceFour: 0.9773 (1.94s)
Ok Fa

In [None]:
combined = minirocket_df.merge(rocket_df, on='dataset')
combined.to_csv('../results/combined_results.csv', index=False)
print(combined.head())

In [None]:
# Accuracy scatter plot (reproducing Figure 4)
fig, ax = plt.subplots(figsize=(8, 8))

ax.scatter(combined['accuracy_rocket'], combined['accuracy_minirocket'],
           alpha=0.6, edgecolors='black', linewidths=0.5, s=50)
ax.plot([0, 1], [0, 1], 'r--', label='Equal accuracy')
ax.set_xlabel('Rocket Accuracy', fontsize=12)
ax.set_ylabel('MiniRocket Accuracy', fontsize=12)
ax.set_title('Accuracy: MiniRocket vs Rocket', fontsize=14)
ax.legend()
ax.set_xlim(0.3, 1.02)
ax.set_ylim(0.3, 1.02)
ax.set_aspect('equal')
plt.tight_layout()
plt.savefig('../results/accuracy_minirocket_vs_rocket.png', dpi=150)
plt.show()

In [None]:
# Timing scatter plot (reproducing Figure 2)
fig, ax = plt.subplots(figsize=(8, 8))

ax.scatter(combined['time_rocket'], combined['time_minirocket'],
           alpha=0.6, edgecolors='black', linewidths=0.5, s=50)
ax.plot([0.01, 1000], [0.01, 1000], 'r--', label='Equal time')
ax.plot([0.01, 1000], [0.001, 100], 'g--', alpha=0.5, label='MiniRocket 10x faster')
ax.plot([0.01, 1000], [0.0001, 10], 'b--', alpha=0.5, label='MiniRocket 100x faster')
ax.set_xlabel('Rocket Transform Time (s)', fontsize=12)
ax.set_ylabel('MiniRocket Transform Time (s)', fontsize=12)
ax.set_title('Transform Time: MiniRocket vs Rocket', fontsize=14)
ax.set_xscale('log')
ax.set_yscale('log')
ax.legend()
plt.tight_layout()
plt.savefig('../results/timing_minirocket_vs_rocket.png', dpi=150)
plt.show()

In [None]:
# Summary statistics
mr_wins = (combined['accuracy_minirocket'] > combined['accuracy_rocket']).sum()
rk_wins = (combined['accuracy_minirocket'] < combined['accuracy_rocket']).sum()
draws = (combined['accuracy_minirocket'] == combined['accuracy_rocket']).sum()
speedup = (combined['time_rocket'] / combined['time_minirocket']).mean()

print(f"=== Accuracy ===")
print(f"MiniRocket mean: {combined['accuracy_minirocket'].mean():.4f}")
print(f"Rocket mean:     {combined['accuracy_rocket'].mean():.4f}")
print(f"MiniRocket wins: {mr_wins}")
print(f"Draws:           {draws}")
print(f"Rocket wins:     {rk_wins}")
print(f"(Paper reports W/D/L = 61/3/45)")
print()
print(f"=== Speed ===")
print(f"Average speedup: {speedup:.1f}x")
print(f"Median speedup:  {(combined['time_rocket'] / combined['time_minirocket']).median():.1f}x")
print(f"(Paper reports ~30x average speedup)")

In [None]:
# Biggest accuracy differences
combined['diff'] = combined['accuracy_minirocket'] - combined['accuracy_rocket']
print("MiniRocket much better:")
print(combined.nlargest(5, 'diff')[['dataset', 'accuracy_minirocket', 'accuracy_rocket', 'diff']])
print()
print("Rocket much better:")
print(combined.nsmallest(5, 'diff')[['dataset', 'accuracy_minirocket', 'accuracy_rocket', 'diff']])