# Benchmark pyfoamalgo - Statistics

Author: Jun Zhu

In [None]:
import random
import numpy as np

import pyfoamalgo
print("pyfoamalgo version: ", pyfoamalgo.__version__)

import multiprocessing as mp
print("Number of cores: ", mp.cpu_count())

from pyfoamalgo import nansum, nanmean, nanmin, nanmax, nanstd, nanvar, nanmean_image_data
from pyfoamalgo import histogram1d

---
## Initialize image data

In [None]:
NUM_IMAGES = 128
IMAGE_SHAPE = (1200, 1124)
DTYPE = np.float32

In [None]:
def generate_image_array(n, with_nan=True):
    data = np.random.randn(n, *IMAGE_SHAPE).astype(DTYPE)
    if with_nan:
        data[:, ::2, ::2] = np.nan
    return data


def generate_image(with_nan=True):
    data = np.random.randn(*IMAGE_SHAPE).astype(DTYPE)
    if with_nan:
        data[::2, ::2] = np.nan
    return data

---
## Statistics with nan

### nanmean / nanmean_image_data (image array)

In [None]:
imgs = generate_image_array(NUM_IMAGES)
%timeit np.nanmean(imgs, axis=0)

In [None]:
imgs = generate_image_array(NUM_IMAGES)
%timeit nanmean(imgs, axis=0)

In [None]:
imgs = generate_image_array(NUM_IMAGES)
%timeit nanmean_image_data(imgs)

A common use case is to apply `nanmean` to a list of selected images.

In [None]:
# select 120 images out of 128 ones
selected = random.sample(range(NUM_IMAGES), 120)

In [None]:
# numpy is slower than operating on all the images since it copies the data when 'selected' is a list.
imgs = generate_image_array(NUM_IMAGES)
%timeit np.nanmean(imgs[selected], axis=0)

In [None]:
imgs = generate_image_array(NUM_IMAGES)
%timeit nanmean_image_data(imgs, kept=selected)

### nanmean (single image)

In [None]:
img = generate_image()
%timeit np.nanmean(img)

In [None]:
img = generate_image()
%timeit nanmean(img)

### nansum (image array) - without parallelization

In [None]:
imgs = generate_image_array(NUM_IMAGES)
%timeit np.nansum(imgs, axis=0)

In [None]:
imgs = generate_image_array(NUM_IMAGES)
%timeit nansum(imgs, axis=0)

### nansum (single image)

In [None]:
img = generate_image()
%timeit np.nansum(img)

In [None]:
img = generate_image()
%timeit nansum(img)

### nanmin (image array) - without parallelization [FIXME]

In [None]:
imgs = generate_image_array(NUM_IMAGES)
%timeit np.nanmin(imgs, axis=0)

In [None]:
imgs = generate_image_array(NUM_IMAGES)
%timeit nanmin(imgs, axis=0)

### nanmin (single image) [FIXME]

In [None]:
img = generate_image()
%timeit np.nanmin(img)

In [None]:
img = generate_image()
%timeit nanmin(img)

### nanmax (image array) - without parallelization [FIXME]

In [None]:
imgs = generate_image_array(NUM_IMAGES)
%timeit np.nanmax(imgs, axis=0)

In [None]:
imgs = generate_image_array(NUM_IMAGES)
%timeit nanmax(imgs, axis=0)

### nanmax (single image) [FIXME]

In [None]:
img = generate_image()
%timeit np.nanmax(img)

In [None]:
img = generate_image()
%timeit nanmax(img)

### nanstd  (single image) [FIXME]

In [None]:
# img = generate_image()
# %timeit np.nanstd(single_image)

In [None]:
# img = generate_image()
# %timeit nanstd(single_image)

### nanvar  (single image) [FIXME]

In [None]:
# img = generate_image()
# %timeit np.nanvar(img)

In [None]:
# img = generate_image()
# %timeit nanvar(single_image)

---
## Statistics without nan

### histogram1d (single image)

In [None]:
img = generate_image(with_nan=False)
%timeit np.histogram(img, bins=120)

In [None]:
img = generate_image(with_nan=False)
%timeit histogram1d(img, bins=120)