In [1]:
import numpy as np

In [4]:
my_1D_array = np.array([1, 2, 3, 4, 5])
my_1D_array

array([1, 2, 3, 4, 5])

In [6]:
print (my_1D_array.shape )    #(5,) -> 5 elements in 1 row and 0 columns

(5,)


In [8]:
my_2D_array = np.array([[1, 2],
                        [3, 4],
                        [6, 5]])

print (my_2D_array.shape)     #(3, 2) -> 3 rows and 2 columns 

(3, 2)


### Using shape to change array dimensions

In [9]:
change_array = np.array([1,2,3,4,5,6])

change_array.shape = (2, 3)

print (change_array)      

[[1 2 3]
 [4 5 6]]


### Reshape 1D to 2D Array

In [10]:
# define array
arr = np.array([11, 22, 33, 44, 55, 66])
arr

array([11, 22, 33, 44, 55, 66])

In [11]:
arr.shape 

(6,)

In [13]:
arr.shape = (6, 1)
arr

array([[11],
       [22],
       [33],
       [44],
       [55],
       [66]])

In [14]:
# reshape
arr_new = arr.reshape((3, 2))
arr_new

array([[11, 22],
       [33, 44],
       [55, 66]])

In [15]:
np.reshape(arr, (2, 3))

array([[11, 22, 33],
       [44, 55, 66]])

#### Ex

In [10]:
a = np.array([[1,2,3], [4,5,6]])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [12]:
np.reshape(a, 6)

array([1, 2, 3, 4, 5, 6])

In [13]:
a

array([[1, 2, 3],
       [4, 5, 6]])

#### Ex

If you want numpy to automatically determine what size/length a particular dimension should be, specify the dimension as -1 for that dimension.

In [18]:
a1 = np.arange(1, 13)  # numbers 1 to 12

print(a1.shape)

print(a1)

(12,)
[ 1  2  3  4  5  6  7  8  9 10 11 12]


In [19]:
a1_2d = a1.reshape(3, 4)  # 3_4
print(a1_2d.shape)

(3, 4)


In [20]:
a1.reshape(3,  4)
a1.reshape(-1, 4) # same as above: a1.reshape(3, 4)

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [21]:
a1.reshape(3,  4)
a1.reshape(3, -1)  # same as above: a1.reshape(3, 4)

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [23]:
a1.reshape(6,  2)
a1.reshape(5, -1)  # same as above: a1.reshape(3, 4)

ValueError: cannot reshape array of size 12 into shape (5,newaxis)

In [19]:
a1.reshape(8, -1)

ValueError: cannot reshape array of size 12 into shape (8,newaxis)

### np.newaxis

The value of np.newaxis over reshape() is that you do not have to know the number of dimensions that should be added. 

The np.newaxis expression increases the dimension so that one-dimensional arrays become two-dimensional, two-dimensional arrays become three-dimensional and so on…

    1D array will become 2D array
    2D array will become 3D array
    3D array will become 4D array

In [20]:
arr = np.arange(4)
arr

array([0, 1, 2, 3])

In [21]:
arr.shape

(4,)

In [22]:
# make it as row vector by inserting an axis along first dimension
arr[np.newaxis, :]

array([[0, 1, 2, 3]])

In [23]:
# make it as column vector by inserting an axis along second dimension
arr[:, np.newaxis]

array([[0],
       [1],
       [2],
       [3]])

#### Example:
Let’s say you want to add the two following arrays:

In [24]:
x1 = np.array([1, 2, 3, 4, 5])
x2 = np.array([5, 4, 3])

In [25]:
# If you tried to add these like this, you would get a ValueError, 

x1 + x2

ValueError: operands could not be broadcast together with shapes (5,) (3,) 

In [26]:
x1_new = x1[:, np.newaxis]
x1_new

array([[1],
       [2],
       [3],
       [4],
       [5]])

In [27]:
x1_new + x2

array([[ 6,  5,  4],
       [ 7,  6,  5],
       [ 8,  7,  6],
       [ 9,  8,  7],
       [10,  9,  8]])

In [28]:
x2 + x1_new

array([[ 6,  5,  4],
       [ 7,  6,  5],
       [ 8,  7,  6],
       [ 9,  8,  7],
       [10,  9,  8]])

other way

In [29]:
x2_new = x2[:, np.newaxis]
x2_new

array([[5],
       [4],
       [3]])

In [30]:
x1 + x2_new

array([[ 6,  7,  8,  9, 10],
       [ 5,  6,  7,  8,  9],
       [ 4,  5,  6,  7,  8]])

## Computation on Arrays: Broadcasting (vectorize operations)

Recall that for arrays of the same size, binary operations are performed on an element-by-element basis:

In [25]:
a = np.array([0, 1, 2])
b = np.array([5, 5, 5])
a + b

array([5, 6, 7])

Broadcasting allows these types of binary operations to be performed on arrays of different sizes–for example, we can just as easily add a scalar (think of it as a zero-dimensional array) to an array:

In [26]:
a + 5

array([5, 6, 7])

We can think of this as an operation that stretches or duplicates the value 5 into the array [5, 5, 5], and adds the results. The advantage of NumPy's broadcasting is that this duplication of values does not actually take place, but it is a useful mental model as we think about broadcasting.


We can similarly extend this to arrays of higher dimension. Observe the result when we add a one-dimensional array to a two-dimensional array:

In [30]:
M = np.ones((3, 3))
M

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [12]:
a

array([0, 1, 2])

In [33]:
M - a

array([[ 1.,  0., -1.],
       [ 1.,  0., -1.],
       [ 1.,  0., -1.]])

Here the one-dimensional array a is stretched, or broadcast across the second dimension in order to match the shape of M.

#### broadcasting of both arrays

In [14]:
a = np.arange(3)
a

array([0, 1, 2])

In [15]:
b = np.arange(3)[:, np.newaxis]
b

array([[0],
       [1],
       [2]])

In [16]:
a + b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

In [54]:
print(a.shape, b.shape)

(3,) (3, 1)


## Rules of Broadcasting
Broadcasting in NumPy follows a strict set of rules to determine the interaction between the two arrays:

- Rule 1: If the two arrays differ in their number of dimensions, the shape of the one with fewer dimensions is padded with ones on its leading (left) side.

- Rule 2: If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.

- Rule 3: If in any dimension the sizes disagree and neither is equal to 1, an error is raised.

#### Example 1

In [27]:
M = np.ones((2, 3))
a = np.arange(3)

In [28]:
print(M)
print(a)

print(M.shape, a.shape)

[[1. 1. 1.]
 [1. 1. 1.]]
[0 1 2]
(2, 3) (3,)


The shape of the arrays are

    M.shape = (2, 3)
    a.shape = (3,)

We see by rule 1 that the array a has fewer dimensions, so we pad it on the left with ones:

    M.shape -> (2, 3)
    a.shape -> (1, 3)

By rule 2, we now see that the first dimension disagrees, so we stretch this dimension to match:

    M.shape -> (2, 3)
    a.shape -> (2, 3)
    
The shapes match, and we see that the final shape will be (2, 3):

In [36]:
M + a

array([[1., 2., 3.],
       [1., 2., 3.]])

#### Example 2

In [29]:
a = np.arange(3).reshape((3, 1))
b = np.arange(3)

In [30]:
print(a)
print(b)

print(a.shape, b.shape)

[[0]
 [1]
 [2]]
[0 1 2]
(3, 1) (3,)


shape of the arrays:

    a.shape = (3, 1)
    b.shape = (3,)
    
Rule 1 says we must pad the shape of b with ones:

    a.shape -> (3, 1)
    b.shape -> (1, 3)
    
And rule 2 tells us that we upgrade each of these ones to match the corresponding size of the other array:

    a.shape -> (3, 3)
    b.shape -> (3, 3)
    
Because the result matches, these shapes are compatible

In [31]:
a + b

array([[0, 1, 2],
       [1, 2, 3],
       [2, 3, 4]])

#### Example 3 - two arrays are not compatible:

In [32]:
M = np.ones((3, 2))
a = np.arange(3)

In [33]:
print(M)
print(a)

print(M.shape, a.shape)

[[1. 1.]
 [1. 1.]
 [1. 1.]]
[0 1 2]
(3, 2) (3,)


The shape of the arrays are

    M.shape = (3, 2)
    a.shape = (3,)
    
rule 1 tells us that we must pad the shape of a with ones:

    M.shape -> (3, 2)
    a.shape -> (1, 3)
    
By rule 2, the first dimension of a is stretched to match that of M:

    M.shape -> (3, 2)
    a.shape -> (3, 3)

Now we hit rule 3–the final shapes do not match, so these two arrays are incompatible

In [55]:
M + a

ValueError: operands could not be broadcast together with shapes (3,2) (3,) 

OK... variation of the same

In [56]:
print(M)
print(a)

print(M.shape, a.shape)

[[1. 1.]
 [1. 1.]
 [1. 1.]]
[0 1 2]
(3, 2) (3,)


In [57]:
M + a[:, np.newaxis]

array([[1., 1.],
       [2., 2.],
       [3., 3.]])

## More examples on broadcasting ...

In [3]:
A = np.array([[56,  0,   4.4, 68],
              [1.2, 104, 52,  8.0],
              [1.8, 135, 99,  0.9]
             ])

In [4]:
np.sum(A, axis=0)

array([ 59. , 239. , 155.4,  76.9])

Suppose we want to express each of the number as percentage to total

In [5]:
A.shape

(3, 4)

In [6]:
np.sum(A, axis=0).shape

(4,)

In [7]:
A/np.sum(A, axis=0).reshape(1, 4)

array([[0.94915254, 0.        , 0.02831403, 0.88426528],
       [0.02033898, 0.43514644, 0.33462033, 0.10403121],
       [0.03050847, 0.56485356, 0.63706564, 0.01170351]])

## Stacking

In [61]:
a = np.random.rand(2, 3)
print(a)

b = np.random.rand(2, 3)
print(b)

[[0.80290499 0.37318746 0.71391612]
 [0.72223608 0.72847003 0.316678  ]]
[[0.18404482 0.32401548 0.98818296]
 [0.67664943 0.31974258 0.52468609]]


In [62]:
np.vstack([a, b])

array([[0.80290499, 0.37318746, 0.71391612],
       [0.72223608, 0.72847003, 0.316678  ],
       [0.18404482, 0.32401548, 0.98818296],
       [0.67664943, 0.31974258, 0.52468609]])

In [63]:
np.hstack([a, b])

array([[0.80290499, 0.37318746, 0.71391612, 0.18404482, 0.32401548,
        0.98818296],
       [0.72223608, 0.72847003, 0.316678  , 0.67664943, 0.31974258,
        0.52468609]])

## Centering an array

In [64]:
X = np.random.random((10, 3))
X

array([[0.32388622, 0.5023758 , 0.90622018],
       [0.33594877, 0.91538294, 0.87619718],
       [0.16769344, 0.09572879, 0.13393913],
       [0.52106243, 0.70503979, 0.03458784],
       [0.2561471 , 0.59668786, 0.7832083 ],
       [0.6617182 , 0.34825177, 0.35041029],
       [0.3654839 , 0.54489131, 0.51152633],
       [0.92135062, 0.06830035, 0.35826472],
       [0.23068581, 0.25896773, 0.96820036],
       [0.35538404, 0.65907556, 0.09606903]])

In [68]:
Xmean = X.mean(axis=0)
Xmean

array([0.41393606, 0.46947019, 0.50186234])

And now we can center the X array by subtracting the mean (this is a broadcasting operation):

In [69]:
X_centered = X - Xmean

To double-check that we've done this correctly, we can check that the centered array has near zero mean:

In [70]:
X_centered.mean(0)

array([-1.66533454e-17, -3.33066907e-17,  1.11022302e-17])

within machine precision, the mean is now zero.