# sklearn-Style Classifier


Demonstrate the sklearn-style API (`tinycudann_sklearn.MLPClassifier`) across multiple datasets,
including much larger problems than Iris.


In [1]:
import time
import warnings
from pathlib import Path

import numpy as np
from sklearn.datasets import load_digits, load_iris, make_classification
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tinycudann_sklearn import MLPClassifier

def find_repo_root(start: Path) -> Path:
    for p in [start, *start.parents]:
        if (p / "README.md").exists() and (p / "CMakeLists.txt").exists():
            return p
    raise RuntimeError("Could not find tiny-cuda-nn repository root.")

ROOT = find_repo_root(Path.cwd().resolve())
print("Repository root:", ROOT)

warnings.filterwarnings("ignore")
np.random.seed(42)

def run_experiment(name, X, y, *, max_iter, batch_size, hidden=(64, 64), lr=5e-3):
    X = np.asarray(X, dtype=np.float32)
    y = np.asarray(y, dtype=np.int64)

    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.25, random_state=42, stratify=y
    )

    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train).astype(np.float32)
    X_test = scaler.transform(X_test).astype(np.float32)

    clf = MLPClassifier(
        hidden_layer_sizes=hidden,
        max_iter=max_iter,
        learning_rate_init=lr,
        batch_size=batch_size,
        random_state=42,
        early_stopping=True,
        n_iter_no_change=8,
        validation_fraction=0.1,
    )

    t0 = time.perf_counter()
    clf.fit(X_train, y_train)
    fit_seconds = time.perf_counter() - t0
    accuracy = float(clf.score(X_test, y_test))

    result = {
        "dataset": name,
        "n_samples": int(X.shape[0]),
        "n_features": int(X.shape[1]),
        "n_classes": int(len(np.unique(y))),
        "fit_seconds": float(fit_seconds),
        "accuracy": accuracy,
        "n_iter": int(clf.n_iter_),
        "using_tcnn_backend": bool(getattr(clf, "_using_tcnn", False)),
    }

    print(
        f"{name:14s} | samples={result['n_samples']:6d} | features={result['n_features']:3d} | "
        f"classes={result['n_classes']:2d} | iter={result['n_iter']:3d} | "
        f"fit={result['fit_seconds']:.2f}s | acc={result['accuracy']:.4f} | "
        f"tinycudann={result['using_tcnn_backend']}"
    )

    return result



A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.4.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "/home/tunguz/.local/lib/python3.12/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/home/tunguz/.local/lib/python3.12/site-packages/traitlets/config/application.py", line 1075, in launch_instance
    app.start()
  File "/home/tunguz/.local/lib/python3.12/site-packages/ipykernel/kernelapp.py", line 739, in start
    self.io_loop.start()
  File "/home/tunguz/.local/

ImportError: 
A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.4.2 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.



Repository root: /media/tunguz/3139-3535/tiny-cuda-nn


In [2]:
iris_X, iris_y = load_iris(return_X_y=True)
digits_X, digits_y = load_digits(return_X_y=True)
synth_X, synth_y = make_classification(
    n_samples=50_000,
    n_features=64,
    n_informative=32,
    n_redundant=8,
    n_classes=10,
    class_sep=1.5,
    flip_y=0.01,
    random_state=42,
)

results = []
results.append(run_experiment("iris", iris_X, iris_y, max_iter=60, batch_size=32))
results.append(run_experiment("digits", digits_X, digits_y, max_iter=80, batch_size=128))
results.append(run_experiment("synthetic_50k", synth_X, synth_y, max_iter=20, batch_size=2048))

print()
print("Summary:")
for r in results:
    print(r)

iris_n = results[0]['n_samples']
assert results[1]['n_samples'] > iris_n
assert results[2]['n_samples'] > iris_n

assert results[0]['accuracy'] >= 0.80, "Iris accuracy is unexpectedly low."
assert results[1]['accuracy'] >= 0.90, "Digits accuracy is unexpectedly low."
assert results[2]['accuracy'] >= 0.70, "Synthetic dataset accuracy is unexpectedly low."


iris           | samples=   150 | features=  4 | classes= 3 | iter= 57 | fit=1.23s | acc=0.9474 | tinycudann=True


digits         | samples=  1797 | features= 64 | classes=10 | iter= 20 | fit=0.24s | acc=0.9667 | tinycudann=True


synthetic_50k  | samples= 50000 | features= 64 | classes=10 | iter= 20 | fit=0.39s | acc=0.8958 | tinycudann=True

Summary:
{'dataset': 'iris', 'n_samples': 150, 'n_features': 4, 'n_classes': 3, 'fit_seconds': 1.2321980119995715, 'accuracy': 0.9473684210526315, 'n_iter': 57, 'using_tcnn_backend': True}
{'dataset': 'digits', 'n_samples': 1797, 'n_features': 64, 'n_classes': 10, 'fit_seconds': 0.24166929999955755, 'accuracy': 0.9666666666666667, 'n_iter': 20, 'using_tcnn_backend': True}
{'dataset': 'synthetic_50k', 'n_samples': 50000, 'n_features': 64, 'n_classes': 10, 'fit_seconds': 0.39364483799909067, 'accuracy': 0.89584, 'n_iter': 20, 'using_tcnn_backend': True}
