### Imports

In [1]:
import fast_stats
from sklearn.metrics import (
    precision_score, 
    recall_score, 
    f1_score, 
    confusion_matrix
)
import numpy as np

### Settings

In [2]:
SIZE = (10, 512, 512)
NUM_CATS = 20

### Binary compared to scikit-learn

In [3]:
y_true = np.random.randint(0, 2, SIZE).astype(bool).flatten()
y_pred = np.random.randint(0, 2, SIZE).astype(bool).flatten()

In [4]:
%%timeit
_ = precision_score(y_true, y_pred)

568 ms ± 705 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [5]:
%%timeit
_ = recall_score(y_true, y_pred)

576 ms ± 8.66 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
%%timeit
_ = f1_score(y_true, y_pred)

577 ms ± 4.02 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [7]:
# don't need to flatten them for fast-stats
y_true = np.random.randint(0, 2, SIZE).astype(bool)
y_pred = np.random.randint(0, 2, SIZE).astype(bool)

In [8]:
%%timeit
_ = fast_stats.binary_precision(y_true, y_pred)

3.91 ms ± 30.8 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [9]:
%%timeit
_ = fast_stats.binary_recall(y_true, y_pred)

3.9 ms ± 22.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [10]:
%%timeit
_ = fast_stats.binary_f1_score(y_true, y_pred)

5.15 ms ± 97.3 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [11]:
assert np.allclose(
    fast_stats.binary_precision(y_true, y_pred),
    precision_score(y_true.flatten(), y_pred.flatten())
)
assert np.allclose(
    fast_stats.binary_recall(y_true, y_pred),
    recall_score(y_true.flatten(), y_pred.flatten())
)
assert np.allclose(
    fast_stats.binary_f1_score(y_true, y_pred),
    f1_score(y_true.flatten(), y_pred.flatten())
)

### Binary compared to numpy

In [12]:
y_true, y_pred = y_true.astype(bool), y_pred.astype(bool)

In [13]:
%%timeit
_ = np.logical_and(y_true, y_pred).sum() / y_pred.sum()

5.75 ms ± 45 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [14]:
%%timeit
_ = fast_stats.binary_precision(y_true, y_pred)

3.96 ms ± 93.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### Confusion matrix compared to sklearn

In [15]:
y_true = np.random.randint(0, NUM_CATS, SIZE).flatten()
y_pred = np.random.randint(0, NUM_CATS, SIZE).flatten()

In [16]:
%%timeit
_ = confusion_matrix(y_true, y_pred)

330 ms ± 2.29 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
%%timeit
# while labels is optional argument providing
# labels will lead to a significant speedup
# since it will not have to be inferred
_ = confusion_matrix(y_true, y_pred, labels = list(range(NUM_CATS)))

174 ms ± 702 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [18]:
# don't need to flatten them for fast-stats, 
# this is another point for speedup depending on use-case
y_true = np.random.randint(0, NUM_CATS, SIZE)
y_pred = np.random.randint(0, NUM_CATS, SIZE)

In [19]:
%%timeit
_ = fast_stats.confusion_matrix(y_true, y_pred)

141 ms ± 2.11 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [20]:
%%timeit
# while labels is optional argument providing
# labels will lead to a significant speedup
# since it will not have to be inferred
_ = fast_stats.confusion_matrix(y_true, y_pred, labels = list(range(NUM_CATS)))

72.4 ms ± 1.67 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [21]:
assert np.allclose(
    confusion_matrix(y_true.flatten(), y_pred.flatten(), labels = list(range(NUM_CATS))),
    fast_stats.confusion_matrix(y_true, y_pred)
)

### Effect of flattening or reshaping for scikit-learn

In [22]:
mat = np.random.randint(0, NUM_CATS, SIZE)

In [23]:
%%timeit
_ = mat.flatten()

1.83 ms ± 55.2 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [24]:
%%timeit
_ = mat.reshape(-1)

321 ns ± 1.95 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)
