In [5]:
import numpy as np
import numpy.ma as ma
from random import sample

## What are masks?

We can apply a mask into an array to "clean" the data into it. For this, we are going to use the ``numpy.ma`` module.

In [3]:
dados_invalidos = np.array([1, 2, 3, np.nan, 5])

In [4]:
dados_mascarados = ma.masked_array(dados_invalidos, mask=[False, False, False, True, False])
dados_mascarados

masked_array(data=[1.0, 2.0, 3.0, --, 5.0],
             mask=[False, False, False,  True, False],
       fill_value=1e+20)

## Masking an Array

On the other example, we masked the NaN values from the array just because we knew the position of it on the array. Now, we are going to learn **how to mask a numpy array just knowing the value to mask**.

In [6]:
array_invalido = np.arange(20)
indices = sample(array_invalido.data, k=5)
array_invalido[indices] = -999
print(array_invalido) # [ 0 1 -999 3 -999 5 -999 7 8 9 10 11 12 -999 14 -999 16 17 18 19]

[-999 -999    2    3    4 -999    6    7    8    9   10   11   12 -999
   14   15 -999   17   18   19]


In [7]:
array_mascarado = ma.masked_where(array_invalido == -999, array_invalido)
print(array_mascarado)

[-- -- 2 3 4 -- 6 7 8 9 10 11 12 -- 14 15 -- 17 18 19]


In [8]:
array_invalido[array_invalido < 0]

array([-999, -999, -999, -999, -999])

In [14]:
np.array([10, 20, 30]) * 2

array([20, 40, 60])