# Vectorization

This shows the advantage of using numpy built in vectorization (and other optimizations) on the example of calculating the dot product.

In [3]:
# Dot product not using numpy
import array 
  
# 8 bytes size int 
a = array.array('q') 
for i in range(100000): 
    a.append(i); 
  
b = array.array('q') 
for i in range(100000, 200000): 
    b.append(i) 
  
# classic dot product of vectors implementation
dot = 0.0  
for i in range(len(a)): 
    dot += a[i] * b[i]

print(dot)


833323333350000.0


In [4]:
%%timeit
dot = 0.0
for i in range(len(a)): 
    dot += a[i] * b[i]

10.6 ms ± 532 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [5]:
import numpy as np
# list compiling options of numpy to see which SIMD extension are supported
np.show_config()

blas_armpl_info:
  NOT AVAILABLE
blas_mkl_info:
  NOT AVAILABLE
blis_info:
  NOT AVAILABLE
openblas_info:
  NOT AVAILABLE
accelerate_info:
  NOT AVAILABLE
atlas_3_10_blas_threads_info:
  NOT AVAILABLE
atlas_3_10_blas_info:
  NOT AVAILABLE
atlas_blas_threads_info:
  NOT AVAILABLE
atlas_blas_info:
  NOT AVAILABLE
blas_info:
    libraries = ['blas', 'blas']
    library_dirs = ['/usr/lib/x86_64-linux-gnu']
    include_dirs = ['/usr/local/include', '/usr/include']
    language = c
    define_macros = [('HAVE_CBLAS', None)]
blas_opt_info:
    define_macros = [('NO_ATLAS_INFO', 1), ('HAVE_CBLAS', None)]
    libraries = ['blas', 'blas']
    library_dirs = ['/usr/lib/x86_64-linux-gnu']
    include_dirs = ['/usr/local/include', '/usr/include']
    language = c
lapack_armpl_info:
  NOT AVAILABLE
lapack_mkl_info:
  NOT AVAILABLE
openblas_lapack_info:
  NOT AVAILABLE
openblas_clapack_info:
  NOT AVAILABLE
flame_info:
  NOT AVAILABLE
atlas_3_10_threads_info:
  NOT AVAILABLE
atlas_3_10_info:
  NOT AV

In [6]:
## using numpy arrays

a = np.arange(0,10000)
b = np.arange(10000,20000) 

dot = 0.0 
for i in range(len(a)): 
      dot += a[i] * b[i] 

print(dot) 


833233335000.0


In [7]:
%%timeit
dot = 0.0 
for i in range(len(a)): 
      dot += a[i] * b[i] 

2.81 ms ± 396 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [8]:
## Vectorize function
dot = 0.0
def mydot(a, b):
    return a * b
myvecdot = np.vectorize(mydot)
print(np.sum(myvecdot(a,b)))

833233335000


In [9]:
%%timeit
np.sum(myvecdot(a,b))

1.15 ms ± 82.8 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


## BUT: 
The vectorize function is provided primarily for convenience, not for performance. The implementation is essentially a for loop.

See: https://numpy.org/doc/stable/reference/generated/numpy.vectorize.html

In [10]:
## just using numpy's smart sum function

print(np.sum(a*b, axis=0))

833233335000


In [11]:
%%timeit
np.sum(a*b, axis=0)

8.91 µs ± 352 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [12]:
## numpy dot function which is vectorized
dot = np.dot(a, b)  

print(dot) 

833233335000


In [13]:
%%timeit
dot = np.dot(a, b)  

7.61 µs ± 371 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [14]:
## numpy inner
print(np.inner( a, b))

833233335000


In [15]:
%%timeit
np.inner(a, b)

7.81 µs ± 480 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


In [16]:
## numpy einsum
print(np.einsum('i,i', a, b))

833233335000


In [17]:
%%timeit
np.einsum('i,i', a, b)

5.75 µs ± 595 ns per loop (mean ± std. dev. of 7 runs, 100,000 loops each)


## SCIPY

Example ivp:

> vectorized: bool, optional

>Whether fun can be called in a vectorized fashion. Default is False.
If vectorized is False, fun will always be called with y of shape (n,), where n = len(y0).
If vectorized is True, fun may be called with y of shape (n, k), where k is an integer. In this case, fun must behave such that fun(t, y)[:, i] == fun(t, y[:, i]) (i.e. each column of the returned array is the time derivative of the state corresponding with a column of y).
Setting vectorized=True allows for faster finite difference approximation of the Jacobian by methods ‘Radau’ and ‘BDF’, but will result in slower execution for other methods and for ‘Radau’ and ‘BDF’ in some circumstances (e.g. small len(y0)).