In [None]:
import numpy as np

def print_info(a):
    """ Print the content of an array, and its metadata. """
    
    txt = f"""
dtype\t{a.dtype}
ndim\t{a.ndim}
shape\t{a.shape}
strides\t{a.strides}
    """

    print(a)
    print(txt)
    
def is_view(a, x): #checks if the base of a is the same as the base of x
    if a.base is x:
        print('Bases are the same')
    else:
        print('Bases are different')
        print(f'The base of {0} is {1}'.format(str(a),a.base))
        print(f'The base of {0} is {1}'.format(str(x),x.base))
    #return a.base is x

# NumPy views and copies

- Operations that only require changing the metadata always do so, and return a **view**
- Operations that cannot be executed by changing the metadata create a new memory block, and return a **copy**

In [None]:
x = np.arange(12).reshape(3, 4).copy()
print_info(x)

# Views

Operations that only require changing the metadata always do so, and return a **view**

In [None]:
# slice
y = x[0::2, 1::2]
print_info(y)

A view shares the same memory block as the original array. 

CAREFUL: Modifying the view changes the original array and all an other views of that array as well!

In [None]:
z = x.reshape(1, 12)
print_info(z)

##### in place operations

In [None]:
y += 100
print_info(y)

In [None]:
print_info(x)
print_info(z)

Functions that take an array as an input should **avoid modifying it in place!***

Always make a copy or be super extra clear in the docstring.

In [None]:
def robust_log(a, cte=1e-10):
    """ Returns the log of an array, avoiding troubles when a value is 0.
    
    Add a tiny constant to the values of `a` so that they are not 0. 
    `a` is expected to have non-negative values.
    """
    a[a == 0] += cte
    return np.log(a)
    
# this is not being very clear

In [None]:
a = np.array([[0.3, 0.01], [0, 1]])

In [None]:
# This is a view of `a`
b = a[1, :]
print_info(b)

In [None]:
# what is the output?
robust_log(a)

In [None]:
# what is the output?
a   # what about b??

Better to make a copy!

In [None]:
def robust_log(a, cte=1e-10):
    """ Returns the log of an array, avoiding troubles when a value is 0.
    Add a tiny constant to the values of `a` so that they are not 0. 
    `a` is expected to have non-negative values.
    """
    a = a.copy()
    a[a == 0] += cte
    return np.log(a)

In [None]:
a = np.array([[0.3, 0.01], [0, 1]])
b = a[1, :]

#robust_log(a)

In [None]:
a  # what is the output?   
# b

# Copies

Operations that cannot be executed by changing the metadata create a new memory block, and return a **copy**

In [None]:
x = np.arange(12).reshape(3, 4).copy()
print_info(x)

Choosing row, columns, or individual elements of an array by giving explicitly their indices (a.k.a "fancy indexing") it's an operation that in general cannot be executed by changing the metadata alone.

Therefore, **fancy indexing always returns a copy**.

### Fancy indexing

<div>
<img src="images/fancy_indexing_lookup.png" width="700"/>
</div>

In [None]:
# fancy indexing
y = x[:, [0, 1]]
print_info(y)

print('The base of x is {0}'.format(x.base))
print('The base of y is {0}'.format(y.base))

In [None]:
y += 1000
print_info(y)
# the original array is unchanged => not a view!
print_info(x)


print('The base of x is {0}'.format(x.base))
print('The base of y is {0}'.format(y.base))

In [None]:
#print(x)
z = x[[0, 0, 2], [1, 0, 3]]
# Can you guess what's z equal to?

print_info(z)

# print('the base of x is {0}'.format(x.base))
# print('the base of z is {0}'.format(z.base))

In [None]:
z += 1000
print_info(z)

# the original array is unchanged => not a view!
print_info(x)

print('the base of x is {0}'.format(x.base))
print('the base of z is {0}'.format(z.base))

Any operation that computes new values also returns a copy.

In [None]:
y = x * 7.1 # !! not an in-place operation
print_info(y)

print(y.base)

# Views and Copies: an important distinction!

**View**
- accessing the array without changing the databuffer 
- **regular indexing** and **slicing** give views
- *in-place* operations can be done in views

**Copy**
- when a new array is created by duplicating the data buffer as well as the array metadata
- **fancy indexing** give always copies
- a copy can be forced by method **.copy()**

### 1.2.1 Strides - why some indexing gives copies and others views?

- how does numpy arrange data in memory? - When you create an array, numpy allocates certain memory that depends on the type you choose

Strides tell you by how many bytes you should move in memory when moving one step in that dimension.

<div>
<img src="images/strides.png" width="700"/>
</div>

To go from the first item in the first row to the first item in the second row, you need to move (3*8) 24 bytes. To move from the column-wise, you just need to move 8 bytes.

In [17]:
x = np.arange(9).reshape(3, 3).copy()
print_info(x)

[[0 1 2]
 [3 4 5]
 [6 7 8]]

dtype	int64
ndim	2
shape	(3, 3)
strides	(24, 8)
    


**Views** are created, when you use other strides to read your data. Slicing and regular indexing allows that, as you know how many byte steps you need to take to get the data.

**Fancy indexing** does not allow that, because the data you are asking **cannot** be obtained by just changing the strides. Thus, numpy need to make a **copy** of it in memory.

# Your turn. Hands on. If view, how's the metadata changed?

### Open the notebook in the exercises folder
- exercises/view_or_copy/view_or_copy.ipynb


In [23]:
x = np.arange(12).reshape(3, 4).copy()
#print_info(x)
x

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]

dtype	int64
ndim	2
shape	(3, 4)
strides	(32, 8)
    


In [None]:
x[[0, 2], :]

In [None]:
x[1, :]

In [None]:
x[1]


In [None]:
x[[1, 2, 0], [1, 1, 2]]


In [None]:
x[[0, 2], :]


In [None]:
x.reshape((6, 2))
    

In [None]:
x.ravel()
#x.T.ravel()


In [None]:
x[(x % 2) == 1]


In [None]:
y = x + 2
# how to make it a view?

In [None]:
y = np.sort(x, axis=1)
