In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rc('figure', figsize=(10, 6))
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_columns = 20
pd.options.display.max_rows = 20
pd.options.display.max_colwidth = 80
np.set_printoptions(precision=4, suppress=True)

In [4]:
rng = np.random.default_rng(seed=12345)

## A.1 ndarray Object Internals

More precisely, the ndarray internally consists of the following:

A pointer to data—that is, a block of data in RAM or in a memory-mapped file

The data type or dtype describing fixed-size value cells in the array

A tuple indicating the array’s shape

A tuple of strides—integers indicating the number of bytes to “step” in order to advance one element along a dimension

In [5]:
np.ones((10, 5)).shape

(10, 5)

In [6]:
np.ones((3, 4, 5), dtype=np.float64).strides

(160, 40, 8)

### NumPy Data Type Hierarchy

You may occasionally have code that needs to check whether an array contains integers, floating-point numbers, strings, or Python objects. Because there are multiple types of floating-point numbers (float16 through float128), checking that the data type is among a list of types would be very verbose. Fortunately, the data types have superclasses, such as `np.integer` and `np.floating`, which can be used with the `np.issubdtype` function:

In [7]:
ints = np.ones(10, dtype=np.uint16)
floats = np.ones(10, dtype=np.float32)
np.issubdtype(ints.dtype, np.integer)

True

In [8]:
ints

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=uint16)

In [9]:
np.issubdtype(floats.dtype, np.floating)

True

You can see all of the parent classes of a specific data type by calling the type’s mro method:


The `mro()` method, when called on a data type like np.float64, provides the Method Resolution Order for that data type within NumPy's type hierarchy.

The Method Resolution Order (MRO) specifies the order in which classes are searched for attribute or method resolution. In this context, it's used to determine the order in which NumPy looks for methods or attributes when you perform operations on arrays of this data type.

In [10]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [11]:
np.issubdtype(ints.dtype, np.number)

True

## A.2 Advanced Array Manipulation

### Reshaping Arrays

In [12]:
arr = np.arange(8)
arr
arr.reshape((4, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [13]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [14]:
arr.reshape((4, 2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [15]:
arr = np.arange(15)
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [16]:
other_arr = np.ones((3, 5))
other_arr.shape


(3, 5)

In [17]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

The opposite operation of reshape from one-dimensional to a higher dimension is typically known as flattening or raveling:

In [18]:
arr = np.arange(15).reshape((5, 3))
arr


array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

`ravel` does not produce a copy of the underlying values if the values in the result were contiguous in the original array.

The `flatten` method behaves like ravel except it always returns a copy of the data:

In [19]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [20]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [21]:
arr = np.arange(12).reshape((3, 4))
arr


array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [22]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

Functions like reshape and ravel accept an order argument indicating the order to use the data in the array. This is usually set to 'C' or 'F' in most cases (there are also less commonly used options 'A' and 'K'; see the NumPy documentation, and refer back to Figure A.3 for an illustration of these options):

In [23]:
arr.ravel('F')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

Reshaping arrays with more than two dimensions can be a bit mind-bending (see Figure A.3). The key difference between C and FORTRAN order is the way in which the dimensions are walked:

C/row major order
Traverse higher dimensions first (e.g., axis 1 before advancing on axis 0).

FORTRAN/column major order
Traverse higher dimensions last (e.g., axis 0 before advancing on axis 1).



In [24]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8, 9], [10, 11, 12]])


In [25]:
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [26]:
np.concatenate([arr1, arr2], axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [27]:
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [28]:

np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [29]:
arr = rng.standard_normal((5, 2))
arr

array([[-1.4238,  1.2637],
       [-0.8707, -0.2592],
       [-0.0753, -0.7409],
       [-1.3678,  0.6489],
       [ 0.3611, -1.9529]])

split, on the other hand, slices an array into multiple arrays along an axis:

```numpy.split(ary, indices_or_sections, axis=0)```

- ary: The array you want to split.
- indices_or_sections: If this is an integer, n, the array will be divided into n equal arrays along the axis. If such a split is not possible, it will raise an error. If this is a 1-D array of sorted integers, it defines the points after which the split takes place. For example, [2, 3] would mean splitting after the second and third elements of the array.

In [30]:
arr


array([[-1.4238,  1.2637],
       [-0.8707, -0.2592],
       [-0.0753, -0.7409],
       [-1.3678,  0.6489],
       [ 0.3611, -1.9529]])

In [31]:

first, second, third = np.split(arr, [1, 3])
first


array([[-1.4238,  1.2637]])

In [32]:
second,third

(array([[-0.8707, -0.2592],
        [-0.0753, -0.7409]]),
 array([[-1.3678,  0.6489],
        [ 0.3611, -1.9529]]))

The value [1, 3] passed to np.split indicates the indices at which to split the array into pieces.

See Table A.1 for a list of all relevant concatenation and splitting functions, some of which are provided only as a convenience of the very general-purpose concatenate.

Table A.1: Array concatenation functions
Function |	Description
|:--------|:---------------------------------------------------|
concatenate	| Most general function, concatenate collection of arrays along one axis
vstack, row_stack|	Stack arrays by rows (along axis 0)
hstack|	Stack arrays by columns (along axis 1)
column_stack|	Like hstack, but convert 1D arrays to 2D column vectors first
dstack	|Stack arrays by “depth” (along axis 2)
split|	Split array at passed locations along a particular axis
hsplit/vsplit	|Convenience functions for splitting on axis 0 and 1, respectively


Stacking helpers: r_ and c_
There are two special objects in the NumPy namespace, r_ and c_, that make stacking arrays more concise:

In [33]:
arr = np.arange(6)
arr1 = arr.reshape((3, 2))
arr2 = rng.standard_normal((3, 2))
np.r_[arr1, arr2]


array([[ 0.    ,  1.    ],
       [ 2.    ,  3.    ],
       [ 4.    ,  5.    ],
       [ 2.3474,  0.9685],
       [-0.7594,  0.9022],
       [-0.467 , -0.0607]])

In [34]:
np.c_[np.r_[arr1, arr2], arr]

array([[ 0.    ,  1.    ,  0.    ],
       [ 2.    ,  3.    ,  1.    ],
       [ 4.    ,  5.    ,  2.    ],
       [ 2.3474,  0.9685,  3.    ],
       [-0.7594,  0.9022,  4.    ],
       [-0.467 , -0.0607,  5.    ]])

In [35]:
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

### Repeating Elements: tile and repeat

Two useful tools for repeating or replicating arrays to produce larger arrays are the repeat and tile functions. repeat replicates each element in an array some number of times, producing a larger array:

In [36]:
arr = np.arange(3)
arr


array([0, 1, 2])

In [37]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

By default, if you pass an integer, each element will be repeated that number of times. If you pass an array of integers, each element can be repeated a different number of times:

In [38]:
arr.repeat([2, 3, 4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [39]:
arr = rng.standard_normal((2, 2))
arr


array([[ 0.7888, -1.2567],
       [ 0.5759,  1.399 ]])

Multidimensional arrays can have their elements repeated along a particular axis:

In [40]:
arr.repeat(2, axis=0)

array([[ 0.7888, -1.2567],
       [ 0.7888, -1.2567],
       [ 0.5759,  1.399 ],
       [ 0.5759,  1.399 ]])

Note that if no axis is passed, the array will be flattened first, which is likely not what you want. Similarly, you can pass an array of integers when repeating a multidimensional array to repeat a given slice a different number of times:

In [41]:
arr.repeat([2, 3], axis=0)


array([[ 0.7888, -1.2567],
       [ 0.7888, -1.2567],
       [ 0.5759,  1.399 ],
       [ 0.5759,  1.399 ],
       [ 0.5759,  1.399 ]])

In [42]:
arr.repeat([2, 3], axis=1)

array([[ 0.7888,  0.7888, -1.2567, -1.2567, -1.2567],
       [ 0.5759,  0.5759,  1.399 ,  1.399 ,  1.399 ]])

tile, on the other hand, is a shortcut for stacking copies of an array along an axis. Visually you can think of it as being akin to “laying down tiles”:

In [43]:
arr


array([[ 0.7888, -1.2567],
       [ 0.5759,  1.399 ]])

In [44]:
np.tile(arr, 2)

array([[ 0.7888, -1.2567,  0.7888, -1.2567],
       [ 0.5759,  1.399 ,  0.5759,  1.399 ]])

The second argument is the number of tiles; with a scalar, the tiling is made row by row, rather than column by column. The second argument to tile can be a tuple indicating the layout of the “tiling”

In [45]:
arr
np.tile(arr, (2, 1))


array([[ 0.7888, -1.2567],
       [ 0.5759,  1.399 ],
       [ 0.7888, -1.2567],
       [ 0.5759,  1.399 ]])

In [46]:
np.tile(arr, (3, 2))

array([[ 0.7888, -1.2567,  0.7888, -1.2567],
       [ 0.5759,  1.399 ,  0.5759,  1.399 ],
       [ 0.7888, -1.2567,  0.7888, -1.2567],
       [ 0.5759,  1.399 ,  0.5759,  1.399 ],
       [ 0.7888, -1.2567,  0.7888, -1.2567],
       [ 0.5759,  1.399 ,  0.5759,  1.399 ]])

### Fancy Indexing Equivalents: take and put
As you may recall from Ch 4: NumPy Basics: Arrays and Vectorized Computation, one way to get and set subsets of arrays is by fancy indexing using integer arrays:

In [47]:
arr = np.arange(10) * 100
arr

array([  0, 100, 200, 300, 400, 500, 600, 700, 800, 900])

In [48]:
inds = [7, 1, 2, 6]
arr[inds]

array([700, 100, 200, 600])

There are alternative ndarray methods that are useful in the special case of making a selection only on a single axis:

In [49]:
arr.take(inds)

array([700, 100, 200, 600])

In [50]:
arr.put(inds, 42)
arr

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [51]:
arr.put(inds, [40, 41, 42, 43])
arr

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

To use take along other axes, you can pass the axis keyword:

In [52]:
inds = [2, 0, 2, 1]
arr = rng.standard_normal((2, 4))
arr


array([[ 1.3223, -0.2997,  0.9029, -1.6216],
       [-0.1582,  0.4495, -1.3436, -0.0817]])

In [53]:
arr.take(inds, axis=1)

array([[ 0.9029,  1.3223,  0.9029, -0.2997],
       [-1.3436, -0.1582, -1.3436,  0.4495]])

put does not accept an axis argument but rather indexes into the flattened (one-dimensional, C order) version of the array. Thus, when you need to set elements using an index array on other axes, it is best to use []-based indexing.

## A.3 Broadcasting
Broadcasting governs how operations work between arrays of different shapes. It can be a powerful feature, but it can cause confusion, even for experienced users. The simplest example of broadcasting occurs when combining a scalar value with an array:

Two arrays are compatible for broadcasting if for each trailing dimension (i.e., starting from the end) the axis lengths match or if either of the lengths is 1. Broadcasting is then performed over the missing or length 1 dimensions.

In [54]:
arr = np.arange(5)
arr
arr * 4

array([ 0,  4,  8, 12, 16])

In [55]:
arr = rng.standard_normal((4, 3))
arr


array([[ 1.7247,  2.6182,  0.7774],
       [ 0.8286, -0.959 , -1.2094],
       [-1.4123,  0.5415,  0.7519],
       [-0.6588, -1.2287,  0.2576]])

In [56]:
arr.mean(), arr.mean(0)

(0.1693194977715596, array([0.1206, 0.243 , 0.1444]))

In [57]:
demeaned = arr - arr.mean(0)
demeaned
demeaned.mean(0)

array([ 0., -0.,  0.])

In [58]:
arr
row_means = arr.mean(1)
row_means

array([ 1.7068, -0.4466, -0.0396, -0.5433])

In [59]:
row_means.shape

(4,)

In [60]:
row_means.reshape((4, 1))

array([[ 1.7068],
       [-0.4466],
       [-0.0396],
       [-0.5433]])

In [61]:
demeaned = arr - row_means.reshape((4, 1))
demeaned.mean(1)

array([-0.,  0.,  0.,  0.])

### Broadcasting over Other Axes
Broadcasting with higher dimensional arrays can seem even more mind-bending, but it is really a matter of following the rules. If you don’t, you’ll get an error like this:

In [62]:
try: 
    arr -  arr.mean(1)
except:
    print("Error!!!\n")

Error!!!



It’s quite common to want to perform an arithmetic operation with a lower dimensional array across axes other than axis 0. According to the broadcasting rule, the “broadcast dimensions” must be 1 in the smaller array. In the example of row demeaning shown here, this means reshaping the row to be shape (4, 1) instead of (4,):

In [63]:
arr - arr.mean(1).reshape((4, 1))

array([[ 0.018 ,  0.9114, -0.9294],
       [ 1.2752, -0.5124, -0.7628],
       [-1.3727,  0.5811,  0.7915],
       [-0.1155, -0.6854,  0.8009]])

A common problem, therefore, is needing to add a new axis with length 1 specifically for broadcasting purposes. Using `reshape` is one option, but inserting an axis requires constructing a tuple indicating the new shape. This often can be a tedious exercise. Thus, NumPy arrays offer a special syntax for inserting new axes by indexing. We use the special `np.newaxis` attribute along with “full” slices to insert the new axis:



In [64]:
arr = np.zeros((4, 4))
arr_3d = arr[:, np.newaxis, :]
arr_3d.shape

(4, 1, 4)

In [65]:
arr_1d = rng.standard_normal(3)
arr_1d[:, np.newaxis]

array([[ 0.3129],
       [-0.1308],
       [ 1.27  ]])

In [66]:

arr_1d[np.newaxis, :]

array([[ 0.3129, -0.1308,  1.27  ]])

Thus, if we had a three-dimensional array and wanted to demean axis 2, we would need to write:

In [67]:
arr = rng.standard_normal((3, 4, 5))
depth_means = arr.mean(2)
depth_means

array([[ 0.0431,  0.2747, -0.1885, -0.2014],
       [-0.5732, -0.5467,  0.1183, -0.6301],
       [ 0.0972,  0.5954,  0.0331, -0.6002]])

In [68]:
depth_means.shape

(3, 4)

In [69]:
demeaned = arr - depth_means[:, :, np.newaxis]
demeaned.mean(2)

array([[ 0., -0.,  0., -0.],
       [ 0., -0., -0., -0.],
       [ 0.,  0.,  0.,  0.]])

In [70]:
arr.ndim

3

In Python, `[slice(None)]` is a way to create a single-element list containing a slice object. A slice object is used to specify how to extract a portion of a sequence (such as a list, tuple, or string). The `slice(None)` is equivalent to the full slice, meaning it includes all elements along that axis.

Here's a breakdown of `[slice(None)]`:

- `slice`: This is a built-in Python function used to create a slice object.
- `None`: When `None` is used as an argument to slice, it represents an omitted or unspecified value, effectively meaning "all" when used as a start, stop, or step in a slice.

In [71]:
original_string = "Hello, World!"

# Using a slice with start, stop, and step
substring = original_string[7:12:2]  # Elements from index 7 to 11, with a step of 2

print(substring)


Wrd


In [72]:
original_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Creating a slice object using the slice function
my_slice = slice(2, 7)  # Slice from index 2 to 6 (7 is exclusive)

# Applying the slice object to the list
subset = original_list[my_slice]

print(subset)


[3, 4, 5, 6, 7]


In [73]:
slice(None)

slice(None, None, None)

In [74]:
[slice(None)]

[slice(None, None, None)]

In [75]:
[slice(None)] * arr.ndim

[slice(None, None, None), slice(None, None, None), slice(None, None, None)]

```
def demean_axis(arr, axis=0):
    means = arr.mean(axis)

    # This generalizes things like [:, :, np.newaxis] to N dimensions
    indexer = [slice(None)] * arr.ndim
    indexer[axis] = np.newaxis
    return arr - means[indexer]
```

### Setting Array Values by Broadcasting
The same broadcasting rule governing arithmetic operations also applies to setting values via array indexing. In a simple case, we can do things like:

In [76]:
arr = np.zeros((4, 3))
arr[:] = 5
arr

array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

However, if we had a one-dimensional array of values we wanted to set into the columns of the array, we can do that as long as the shape is compatible:


In [77]:
col = np.array([1.28, -0.42, 0.44, 1.6])
col[:, np.newaxis]

array([[ 1.28],
       [-0.42],
       [ 0.44],
       [ 1.6 ]])

In [78]:
arr[:] = col[:, np.newaxis]
arr

array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [79]:
[[-1.37], [0.509]]

[[-1.37], [0.509]]

In [81]:
np.array([[-1.37], [0.509]])

array([[-1.37 ],
       [ 0.509]])

In [80]:
arr[:2]

array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42]])

In [84]:
arr[:2] = [[-1.37], [0.509]]
arr

array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

## A.4 Advanced ufunc Usage
While many NumPy users will only use the fast element-wise operations provided by the universal functions, a number of additional features occasionally can help you write more concise code without explicit loops.


## ufunc Instance Methods
Each of NumPy’s binary ufuncs has special methods for performing certain kinds of special vectorized operations. These are summarized in Table A.2, but I’ll give a few concrete examples to illustrate how they work.

Table A.2: ufunc methods
Method	| Description
|:--------------|:--------------------------------------------------
accumulate(x)|	Aggregate values, preserving all partial aggregates.
at(x, indices, b=None)|	Perform operation in place on x at the specified indices. The argument b is the second input to ufuncs that requires two array inputs.
reduce(x)	|Aggregate values by successive applications of the operation.
reduceat(x, bins)|	“Local” reduce or “group by”; reduce contiguous slices of data to produce an aggregated array.
outer(x, y)	|Apply operation to all pairs of elements in x and y; the resulting array has shape x.shape + y.shape.




`reduce` takes a single array and aggregates its values, optionally along an axis, by performing a sequence of binary operations. For example, an alternative way to sum elements in an array is to use `np.add.reduce`:

The np.add.reduce() function in NumPy is a special method that applies the add operation (element-wise addition) in a "reducing" manner across an array. Essentially, it reduces the array's dimensions by one, by successively applying the add operation along a specified axis (default is axis 0). This process is similar to folding or accumulating the elements of the array.

In [82]:
arr = np.arange(10)
np.add.reduce(arr)


45

In [83]:
arr.sum()

45

The starting value (for example, 0 for add) depends on the ufunc. If an axis is passed, the reduction is performed along that axis. This allows you to answer certain kinds of questions in a concise way. As a less mundane example, we can use `np.logical_and` to check whether the values in each row of an array are sorted:

In [84]:
my_rng = np.random.default_rng(12346)  # for reproducibility
arr = my_rng.standard_normal((5, 5))
arr

array([[-0.9039,  0.1571,  0.8976, -0.7622, -0.1763],
       [ 0.053 , -1.6284, -0.1775,  1.9636,  1.7813],
       [-0.8797, -1.6985, -1.8189,  0.119 , -0.4441],
       [ 0.7691, -0.0343,  0.3925,  0.7589, -0.0705],
       [ 1.0498,  1.0297, -0.4201,  0.7863,  0.9612]])

In [85]:
arr[::2]

array([[-0.9039,  0.1571,  0.8976, -0.7622, -0.1763],
       [-0.8797, -1.6985, -1.8189,  0.119 , -0.4441],
       [ 1.0498,  1.0297, -0.4201,  0.7863,  0.9612]])

`.sort(1)` is then applied to this subset of rows. The `.sort()` method sorts an array in-place. The argument 1 indicates that the sorting should be done along the last axis, which typically means columns in a 2D array.

In [86]:
arr[::2].sort(1) # sort a few rows along the first axis (col)

In [87]:
arr

array([[-0.9039, -0.7622, -0.1763,  0.1571,  0.8976],
       [ 0.053 , -1.6284, -0.1775,  1.9636,  1.7813],
       [-1.8189, -1.6985, -0.8797, -0.4441,  0.119 ],
       [ 0.7691, -0.0343,  0.3925,  0.7589, -0.0705],
       [-0.4201,  0.7863,  0.9612,  1.0297,  1.0498]])

In [88]:
arr[:, :-1], arr[:, 1:]

(array([[-0.9039, -0.7622, -0.1763,  0.1571],
        [ 0.053 , -1.6284, -0.1775,  1.9636],
        [-1.8189, -1.6985, -0.8797, -0.4441],
        [ 0.7691, -0.0343,  0.3925,  0.7589],
        [-0.4201,  0.7863,  0.9612,  1.0297]]),
 array([[-0.7622, -0.1763,  0.1571,  0.8976],
        [-1.6284, -0.1775,  1.9636,  1.7813],
        [-1.6985, -0.8797, -0.4441,  0.119 ],
        [-0.0343,  0.3925,  0.7589, -0.0705],
        [ 0.7863,  0.9612,  1.0297,  1.0498]]))

In [89]:
arr[:, :-1] < arr[:, 1:]

array([[ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True]])

In [90]:

np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)

array([ True, False,  True, False,  True])

In [91]:
arr = np.arange(15).reshape((3, 5))
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [92]:

np.add.accumulate(arr, axis=1)

array([[ 0,  1,  3,  6, 10],
       [ 5, 11, 18, 26, 35],
       [10, 21, 33, 46, 60]])

In [93]:
arr = np.arange(3).repeat([1, 2, 2])
arr

array([0, 1, 1, 2, 2])

outer performs a pair-wise cross product between two arrays:

The outer product of arr and np.arange(5) results in a 2D array where each element of arr is multiplied by each element of [0, 1, 2, 3, 4].

The final output is a 2D array where each row corresponds to an element of arr, and each column corresponds to an element of [0, 1, 2, 3, 4]. Each cell in the 2D array is the product of the corresponding elements from arr and [0, 1, 2, 3, 4].

In [94]:

np.multiply.outer(arr, np.arange(5))

array([[0, 0, 0, 0, 0],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 2, 4, 6, 8],
       [0, 2, 4, 6, 8]])

In [102]:
x,y =np.arange(12).reshape(3,4), np.arange(5)
x,y

(array([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]]),
 array([0, 1, 2, 3, 4]))

In [103]:
np.subtract.outer(x,y)

array([[[ 0, -1, -2, -3, -4],
        [ 1,  0, -1, -2, -3],
        [ 2,  1,  0, -1, -2],
        [ 3,  2,  1,  0, -1]],

       [[ 4,  3,  2,  1,  0],
        [ 5,  4,  3,  2,  1],
        [ 6,  5,  4,  3,  2],
        [ 7,  6,  5,  4,  3]],

       [[ 8,  7,  6,  5,  4],
        [ 9,  8,  7,  6,  5],
        [10,  9,  8,  7,  6],
        [11, 10,  9,  8,  7]]])

In [97]:
x, y = rng.standard_normal((3, 4)), rng.standard_normal(5)
x,y

(array([[-0.6236,  1.1218,  0.8412, -0.7759],
        [ 0.4107, -2.7224, -0.6733,  1.2462],
        [ 0.7902,  0.1753, -0.0293, -1.4195]]),
 array([-1.36  ,  0.2234,  1.7618, -2.1709,  0.6285]))

np.subtract.outer computes the outer difference between two arrays. It applies the subtraction operation for each pair of elements from x and y.
For arrays x of shape (3, 4) and y of shape (5,), the result of the outer operation will be an array of shape (3, 4, 5).
Each element of this result array is computed as x[i, j] - y[k], where i, j, and k iterate over the dimensions of x and y.

In [98]:
result = np.subtract.outer(x, y)
result

array([[[ 0.7364, -0.847 , -2.3854,  1.5473, -1.2521],
        [ 2.4818,  0.8984, -0.64  ,  3.2927,  0.4933],
        [ 2.2012,  0.6178, -0.9206,  3.0121,  0.2127],
        [ 0.5841, -0.9993, -2.5377,  1.395 , -1.4044]],

       [[ 1.7707,  0.1873, -1.3511,  2.5816, -0.2178],
        [-1.3624, -2.9458, -4.4842, -0.5515, -3.3509],
        [ 0.6867, -0.8967, -2.4351,  1.4976, -1.3018],
        [ 2.6062,  1.0228, -0.5156,  3.4171,  0.6177]],

       [[ 2.1502,  0.5668, -0.9716,  2.9611,  0.1617],
        [ 1.5353, -0.0481, -1.5864,  2.3462, -0.4531],
        [ 1.3307, -0.2527, -1.7911,  2.1416, -0.6578],
        [-0.0595, -1.6429, -3.1813,  0.7514, -2.048 ]]])

In [99]:
result.shape

(3, 4, 5)

# uptohere

In [43]:
arr = np.arange(10)
np.add.reduceat(arr, [0, 5, 8])

In [44]:
arr = np.multiply.outer(np.arange(4), np.arange(5))
arr
np.add.reduceat(arr, [0, 2, 4], axis=1)

In [45]:
def add_elements(x, y):
    return x + y
add_them = np.frompyfunc(add_elements, 2, 1)
add_them(np.arange(8), np.arange(8))

In [46]:
add_them = np.vectorize(add_elements, otypes=[np.float64])
add_them(np.arange(8), np.arange(8))

In [47]:
arr = rng.standard_normal(10000)
%timeit add_them(arr, arr)
%timeit np.add(arr, arr)

In [48]:
dtype = [('x', np.float64), ('y', np.int32)]
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)
sarr

In [49]:
sarr[0]
sarr[0]['y']

In [50]:
sarr['x']

In [51]:
dtype = [('x', np.int64, 3), ('y', np.int32)]
arr = np.zeros(4, dtype=dtype)
arr

In [52]:
arr[0]['x']

In [53]:
arr['x']

In [54]:
dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)]
data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)
data['x']
data['y']
data['x']['a']

In [55]:
arr = rng.standard_normal(6)
arr.sort()
arr

In [56]:
arr = rng.standard_normal((3, 5))
arr
arr[:, 0].sort()  # Sort first column values in place
arr

In [57]:
arr = rng.standard_normal(5)
arr
np.sort(arr)
arr

In [58]:
arr = rng.standard_normal((3, 5))
arr
arr.sort(axis=1)
arr

In [59]:
arr[:, ::-1]

In [60]:
values = np.array([5, 0, 1, 3, 2])
indexer = values.argsort()
indexer
values[indexer]

In [61]:
arr = rng.standard_normal((3, 5))
arr[0] = values
arr
arr[:, arr[0].argsort()]

In [62]:
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])
sorter = np.lexsort((first_name, last_name))
sorter
list(zip(last_name[sorter], first_name[sorter]))

In [63]:
values = np.array(['2:first', '2:second', '1:first', '1:second',
                   '1:third'])
key = np.array([2, 2, 1, 1, 1])
indexer = key.argsort(kind='mergesort')
indexer
values.take(indexer)

In [64]:
rng = np.random.default_rng(12345)
arr = rng.standard_normal(20)
arr
np.partition(arr, 3)

In [65]:
indices = np.argpartition(arr, 3)
indices
arr.take(indices)

In [66]:
arr = np.array([0, 1, 7, 12, 15])
arr.searchsorted(9)

In [67]:
arr.searchsorted([0, 8, 11, 16])

In [68]:
arr = np.array([0, 0, 0, 1, 1, 1, 1])
arr.searchsorted([0, 1])
arr.searchsorted([0, 1], side='right')

In [69]:
data = np.floor(rng.uniform(0, 10000, size=50))
bins = np.array([0, 100, 1000, 5000, 10000])
data

In [70]:
labels = bins.searchsorted(data)
labels

In [71]:
pd.Series(data).groupby(labels).mean()

In [72]:
import numpy as np

def mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count

In [73]:
mmap = np.memmap('mymmap', dtype='float64', mode='w+',
                 shape=(10000, 10000))
mmap

In [74]:
section = mmap[:5]

In [75]:
section[:] = rng.standard_normal((5, 10000))
mmap.flush()
mmap
del mmap

In [76]:
mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))
mmap

In [77]:
%xdel mmap
!rm mymmap

In [78]:
arr_c = np.ones((100, 10000), order='C')
arr_f = np.ones((100, 10000), order='F')
arr_c.flags
arr_f.flags
arr_f.flags.f_contiguous

In [79]:
%timeit arr_c.sum(1)
%timeit arr_f.sum(1)

In [80]:
arr_f.copy('C').flags

In [81]:
arr_c[:50].flags.contiguous
arr_c[:, :50].flags

In [82]:
%xdel arr_c
%xdel arr_f

In [84]:
pd.options.display.max_rows = PREVIOUS_MAX_ROWS