# `Accumulator` test


Quite often, we need to aggregate a number of matrices by taking their average, min/max, etc. For example, one can write:

```python
my_matrices = []
for _ in range(N):
    my_matrices.append(new_matrix)
stacked = np.stack(my_matrices, axis=0)
aggregated = np.nanmean(stacked, axis=0)
```
Unfortunately, that requires to store all `N` matrices in memory at the same time, which may be undesirable. On the other hand, we all know that to compute the mean we can just sum all the matrices and divide by the amount of such matrices. The `Accumulator` class wraps such logic for multiple aggregation methods and allows to sequentially update containers with matrices to get their desired aggregate, for example:

```python
accumulator = Accumulator('mean')
for _ in range(N):
    accumulator.update(new_matrix)
aggregated = accumulator.get()
```
Note that while the second approach uses less memory, it is (at least in some cases) slower.

In this notebook, we test that results obtained by both approaches are the same, and provide some benchmarks.

In [1]:
# Necessary imports
import sys
from time import sleep

import numpy as np
try:
    import cupy as cp
    CUPY_AVAILABLE = True
except ImportError:
    cp = np
    CUPY_AVAILABLE = False

sys.path.insert(0, '../../..')
from seismiqb import Accumulator, timer
from seismiqb.batchflow import MemoryMonitor

# Assertion test:
## Make sure that result of `Accumulator` aggregation is the same, as naive `NumPy`/`cupy` usage

In [2]:
ASSERT_SIZE = (100, 500, 500)

for module in [np, cp]:
    print(f'Module ::: {module.__name__:>5}')

    for op in ['nanmean', 'nanstd', 'nanmin', 'nanmax', 'argmin', 'argmax', 'nanargmin', 'nanargmax']:
        acc = Accumulator(op, total=ASSERT_SIZE[0])
        acc_amortized = Accumulator(op, amortize=True)
        module_func = getattr(np, op)

        stacked = module.empty(ASSERT_SIZE)
        for i in range(ASSERT_SIZE[0]):
            matrix = module.random.random(size=ASSERT_SIZE[-2:])
            matrix[matrix > 0.999] = module.nan
            acc.update(matrix)
            acc_amortized.update(matrix)
            stacked[i, :, :] = matrix

        result1 = acc.get(final=False)
        result2 = acc_amortized.get(final=False)
        result3 = module_func(stacked, axis=0)

        if np.isclose(result1, result3, equal_nan=True).all() and np.isclose(result2, result3, equal_nan=True).all():
            print(f'    {op:7} : OK')
        else:
            print()
            print(f'    {op:7} : {np.nanmean(np.abs(result1 - result3))}')
            print(f'    {op:7} : {np.nanmean(np.abs(result2 - result3))}')
            print(f'    {op:7} : {np.mean(np.abs(result1 - result3))}')
            print(f'    {op:7} : {np.mean(np.abs(result2 - result3))}')
            print()

Module ::: numpy
    nanmean : OK
    nanstd  : OK
    nanmin  : OK
    nanmax  : OK



invalid value encountered in less




    argmin  : 0.0
    argmin  : 3.217604
    argmin  : 0.0
    argmin  : 3.217604

    argmax  : 0.0
    argmax  : 49.46956
    argmax  : 0.0
    argmax  : 49.46956
    nanargmin : OK

    nanargmax : 0.0
    nanargmax : 49.606156
    nanargmax : 0.0
    nanargmax : 49.606156
Module :::  cupy
    nanmean : OK
    nanstd  : OK
    nanmin  : OK
    nanmax  : OK

    argmin  : 0.0
    argmin  : 3.238324
    argmin  : 0.0
    argmin  : 3.238324

    argmax  : 0.0
    argmax  : 49.52176
    argmax  : 0.0
    argmax  : 49.52176
    nanargmin : OK

    nanargmax : 0.0
    nanargmax : 49.497068
    nanargmax : 0.0
    nanargmax : 49.497068


# Speed benchmark:
## Compare `NumPy`/`cupy` against both amortized and not amortized `Accumulators`

In [3]:
BENCHMARK_SIZE = (100, 2000, 2000) # usual size of the data along horizon

for module in [np, cp]:
    print(f'Module ::: {module.__name__}')

    for op in ['nanmean', 'nanstd', 'nanmin', 'nanmax', 'argmin', 'argmax']:

        module_func = getattr(module, op)    
        with timer(f'    naive         {op}'):
            stacked = module.empty(BENCHMARK_SIZE)
            for i in range(BENCHMARK_SIZE[0]):
                matrix = module.random.random(size=BENCHMARK_SIZE[-2:])
                stacked[i, :, :] = matrix
            result = module_func(stacked, axis=0)

        acc = Accumulator(op, total=BENCHMARK_SIZE[0])
        with timer(f'    acc stacking  {op}'):
            for _ in range(BENCHMARK_SIZE[0]):
                matrix = module.random.random(size=BENCHMARK_SIZE[-2:])
                acc.update(matrix)
            result = acc.get(final=True)

        acc = Accumulator(op, amortize=True)
        with timer(f'    acc amortized {op}'):
        
            for _ in range(BENCHMARK_SIZE[0]):
                matrix = module.random.random(size=BENCHMARK_SIZE[-2:])
                acc.update(matrix)
            result = acc.get(final=True)

        print()

Module ::: numpy
    naive         nanmean evaluated in 5.8100 seconds
    acc stacking  nanmean evaluated in 5.5468 seconds
    acc amortized nanmean evaluated in 7.5932 seconds

    naive         nanstd evaluated in 6.7273 seconds
    acc stacking  nanstd evaluated in 6.7457 seconds
    acc amortized nanstd evaluated in 10.6772 seconds

    naive         nanmin evaluated in 3.6750 seconds
    acc stacking  nanmin evaluated in 3.7111 seconds
    acc amortized nanmin evaluated in 6.6303 seconds

    naive         nanmax evaluated in 3.6992 seconds
    acc stacking  nanmax evaluated in 3.7031 seconds
    acc amortized nanmax evaluated in 6.6747 seconds

    naive         argmin evaluated in 5.0597 seconds
    acc stacking  argmin evaluated in 5.0737 seconds
    acc amortized argmin evaluated in 4.4951 seconds

    naive         argmax evaluated in 5.0593 seconds
    acc stacking  argmax evaluated in 5.0670 seconds
    acc amortized argmax evaluated in 13.6282 seconds

Module ::: cupy
  

# Memory benchmark:
## Compare `Numpy` against both amortized and not amortized `Accumulators`

In [4]:
BENCHMARK_SIZE = (100, 2000, 2000) # usual size of the data along horizon
FREQUENCY = 0.01

module = np 

for op in ['nanmean', 'nanstd', 'nanmin', 'nanmax', 'argmin', 'argmax']:
    
    module_func = getattr(module, op)
    with MemoryMonitor(frequency=FREQUENCY) as monitor:
        stacked = module.empty(BENCHMARK_SIZE)
        for i in range(BENCHMARK_SIZE[0]):
            matrix = module.random.random(size=BENCHMARK_SIZE[-2:])
            stacked[i, :, :] = matrix
        result = module_func(stacked, axis=0)
    print(f'naive         {op}  {np.max(monitor.data) - np.min(monitor.data):.4f}')
#     monitor.visualize()

    acc = Accumulator(op, total=BENCHMARK_SIZE[0])
    with MemoryMonitor(frequency=FREQUENCY) as monitor:
        for _ in range(BENCHMARK_SIZE[0]):
            matrix = module.random.random(size=BENCHMARK_SIZE[-2:])
            acc.update(matrix)
        result = acc.get(final=True)
    print(f'acc stacking  {op}  {np.max(monitor.data) - np.min(monitor.data):.4f}')
#     monitor.visualize()
    
    acc = Accumulator(op, amortize=True)
    with MemoryMonitor(frequency=FREQUENCY) as monitor:
        for _ in range(BENCHMARK_SIZE[0]):
            matrix = module.random.random(size=BENCHMARK_SIZE[-2:])
            acc.update(matrix)
        result = acc.get(final=True)
    print(f'acc amortized {op}  {np.max(monitor.data) - np.min(monitor.data):.4f}')
#     monitor.visualize()

    print()

naive         nanmean  6.7814
acc stacking  nanmean  6.7827
acc amortized nanmean  0.1629

naive         nanstd  6.7831
acc stacking  nanstd  6.8124
acc amortized nanstd  0.2197

naive         nanmin  3.0616
acc stacking  nanmin  3.0932
acc amortized nanmin  0.1659

naive         nanmax  3.0516
acc stacking  nanmax  3.0465
acc amortized nanmax  0.1604

naive         argmin  6.0576
acc stacking  argmin  6.0322
acc amortized argmin  0.1538

naive         argmax  6.0373
acc stacking  argmax  6.0429
acc amortized argmax  0.1656

