# Introduction to numpy

**Package for scientific computing with Python**

## ndarray

### Time and space-efficient multidimensional arrays:

In [1]:
import numpy as np

In [2]:
an_array = np.array([3, 33, 333])  # Create a rank 1 array
type(an_array)

numpy.ndarray

In [3]:
an_array.shape

(3,)

In [4]:
an_array[0], an_array[1], an_array[2]

(3, 33, 333)

In [5]:
an_array[0] = 888                 # Change an element of the array
an_array

array([888,  33, 333])

### How to create a Rank 2 numpy array:

In [6]:
another = np.array([[11,12,13],[21,22,23]])   # Create a rank 2 array

In [7]:
another.shape # rows x columns                   

(2, 3)

In [8]:
another[0, 0], another[0, 1], another[1, 0]

(11, 12, 21)

### There are many ways to create numpy arrays:

In [9]:
np.zeros((2,2))      #   an array of zeros

array([[ 0.,  0.],
       [ 0.,  0.]])

In [10]:
np.full((2,2), 9.0)  #   an array filled with 9.0

array([[ 9.,  9.],
       [ 9.,  9.]])

In [11]:
np.eye(2)            #   a 2x2  matrix

array([[ 1.,  0.],
       [ 0.,  1.]])

In [12]:
np.ones((1,2))       #   an array of ones

array([[ 1.,  1.]])

In [13]:
np.random.random((2,2)) # an array of random values                     

array([[ 0.37830674,  0.61042129],
       [ 0.69192138,  0.05149793]])

## Array Indexing


### Slice indexing:

In [14]:
import numpy as np

# Rank 2 array of shape (3, 4)
an_array = np.array([[11,12,13,14], [21,22,23,24], [31,32,33,34]])
an_array

array([[11, 12, 13, 14],
       [21, 22, 23, 24],
       [31, 32, 33, 34]])

In [15]:
# Array slicing: get a subarray consisting of the first 2 rows x 2 columns

a_slice = an_array[:2, 1:3]
a_slice

array([[12, 13],
       [22, 23]])

In [16]:
# When you modify a slice, you modify the underlying array

an_array[0, 1]

12

In [17]:
a_slice[0, 0] = 1000    # a_slice[0, 0] is the same piece of data as an_array[0, 1]
an_array[0, 1]

1000

### Use both integer indexing & slice indexing

In [18]:
# Rank 2 array of shape (3, 4)

an_array = np.array([[11,12,13,14], [21,22,23,24], [31,32,33,34]])
an_array

# Using both integer indexing & slicing generates an array of lower rank
row_rank1 = an_array[1, :]    # Rank 1 view 

# Slicing alone: generates an array of the same rank as the an_array
row_rank2 = an_array[1:2, :]  # Rank 2 view 

print(row_rank1, row_rank1.shape)   

print(row_rank2, row_rank2.shape)   # Notice the [[ ]]

[21 22 23 24] (4,)
[[21 22 23 24]] (1, 4)


In [19]:
# For columns of an array:

col_rank1 = an_array[:, 1]
col_rank2 = an_array[:, 1:2]

print(col_rank1, col_rank1.shape)  # Rank 1
print()
print(col_rank2, col_rank2.shape)  # Rank 2

[12 22 32] (3,)

[[12]
 [22]
 [32]] (3, 1)


### Array Indexing for changing elements:

In [20]:
# Create a new array
an_array = np.array([[11,12,13], [21,22,23], [31,32,33], [41,42,43]])

print('Original Array:')
print(an_array)

# Create an array of indices
indices = np.array([0, 1, 2, 0])

whichRows = np.arange(4)
print('\nRows indices picked : %s' % whichRows)

# Select one element from each row
print(an_array[whichRows, indices])

# Change one element from each row
an_array[np.arange(4), indices] += 100000

print('\nChanged Array:')
print(an_array)

Original Array:
[[11 12 13]
 [21 22 23]
 [31 32 33]
 [41 42 43]]

Rows indices picked : [0 1 2 3]
[11 22 33 41]

Changed Array:
[[100011     12     13]
 [    21 100022     23]
 [    31     32 100033]
 [100041     42     43]]


In [21]:
an_array = np.array([[11,12], [21, 22], [31, 32]])

In [22]:
array_filter = (an_array > 15)
array_filter

array([[False, False],
       [ True,  True],
       [ True,  True]], dtype=bool)

In [23]:
an_array[array_filter]

array([21, 22, 31, 32])

In [24]:
#Can be done this way also:

an_array[an_array > 15]

array([21, 22, 31, 32])

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Datatypes:
</p>

In [25]:
ex1 = np.array([11, 12]) # Python assigns the  data type
print(ex1.dtype)

ex2 = np.array([11.0, 12.0]) # Python assigns the  data type
print(ex2.dtype)

ex3 = np.array([11, 21], dtype=np.int64) #You can also tell Python the  data type
print(ex3.dtype)

int64
float64
int64


<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Array Operations:

</p>

In [26]:
x = np.array([[111,112],[121,122]], dtype=np.int)
y = np.array([[211.1,212.1],[221.1,222.1]], dtype=np.float64)

print(x + y)
print(np.add(x, y))

[[ 322.1  324.1]
 [ 342.1  344.1]]
[[ 322.1  324.1]
 [ 342.1  344.1]]


In [27]:
print(x - y)
print(np.subtract(x, y))

[[-100.1 -100.1]
 [-100.1 -100.1]]
[[-100.1 -100.1]
 [-100.1 -100.1]]


In [28]:
print(x * y)
print(np.multiply(x, y))

[[ 23432.1  23755.2]
 [ 26753.1  27096.2]]
[[ 23432.1  23755.2]
 [ 26753.1  27096.2]]


In [29]:
print(x / y)
print(np.divide(x, y))

[[ 0.52581715  0.52805281]
 [ 0.54726368  0.54930212]]
[[ 0.52581715  0.52805281]
 [ 0.54726368  0.54930212]]


In [30]:
print(np.sqrt(x))

[[ 10.53565375  10.58300524]
 [ 11.          11.04536102]]


In [31]:
print(np.exp(x))

[[  1.60948707e+48   4.37503945e+48]
 [  3.54513118e+52   9.63666567e+52]]


<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Elementwise Operations:

</p>

In [32]:
x2d = np.array([[1,1],[1,1]])
y2d = np.array([[2,2],[2,2]])

print(x2d.dot(y2d))
print()
print(np.dot(x2d, y2d))

[[4 4]
 [4 4]]

[[4 4]
 [4 4]]


In [33]:
a1d = np.array([9 , 9 ])
b1d = np.array([10, 10])

print(a1d.dot(b1d))
print()
print(np.dot(a1d, b1d))

180

180


In [34]:
print(x2d.dot(a1d))
print()
print(np.dot(x2d, a1d))

[18 18]

[18 18]


### Sum / Transpose:

In [35]:
ex1 = np.array([[11,12],[21,22]])

print(np.sum(ex1))          # add all members
print()

print(np.sum(ex1, axis=0))  # columnwise sum
print()

print(np.sum(ex1, axis=1))  # rowwise sum
print()

66

[32 34]

[23 43]



<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Binary functions:</p>

In [36]:
x = np.random.randn(8)
x

array([ 0.8135956 , -0.61503931, -1.57272368,  1.13656873, -1.65887287,
       -0.38885077,  0.5579007 , -0.52356696])

In [37]:
y = np.random.randn(8)
y

array([ 2.0540249 ,  0.52009934, -0.23390168,  0.80349976,  1.4333977 ,
       -0.35678557, -0.68519478, -0.82984323])

In [38]:
# returns element wise maximum between two arrays

np.maximum(x, y)

array([ 2.0540249 ,  0.52009934, -0.23390168,  1.13656873,  1.4333977 ,
       -0.35678557,  0.5579007 , -0.52356696])

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Reshaping array:
</p>

In [39]:
arr = np.arange(20)
print(arr)

[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


In [40]:
arr.reshape(4,5)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [41]:
ex1.T

array([[11, 21],
       [12, 22]])

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Indexing using where():</p>

In [42]:
x_1 = np.array([1,2,3,4,5])

y_1 = np.array([11,22,33,44,55])

array_filter = np.array([True, False, True, False, True])

In [43]:
out = np.where(array_filter, x_1, y_1)
out

array([ 1, 22,  3, 44,  5])

In [44]:
mat = np.random.rand(5,5)
mat

array([[ 0.91097769,  0.01382332,  0.34385898,  0.30619568,  0.73164818],
       [ 0.50068786,  0.0860277 ,  0.62094109,  0.3119278 ,  0.22577485],
       [ 0.33082435,  0.33200552,  0.24286065,  0.5747302 ,  0.360954  ],
       [ 0.63688761,  0.19346601,  0.82191114,  0.08260183,  0.04139749],
       [ 0.3067378 ,  0.17679344,  0.69402147,  0.22225128,  0.04608601]])

In [45]:
np.where( mat > 0.5, 1000, -1)

array([[1000,   -1,   -1,   -1, 1000],
       [1000,   -1, 1000,   -1,   -1],
       [  -1,   -1,   -1, 1000,   -1],
       [1000,   -1, 1000,   -1,   -1],
       [  -1,   -1, 1000,   -1,   -1]])

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

any or all conditionals:</p>

In [46]:
arr_bools = np.array([ True, False, True, True, False ])

In [47]:
arr_bools.any()

True

In [48]:
arr_bools.all()

False

<p style="font-family: Arial; font-size:2.75em;color:purple; font-style:bold"><br>

Broadcasting:
<br><br>
</p>

In [49]:
start   = np.array([
                    [11,12,13], 
                    [21*10,22*10,23*10], 
                    [31*100,32*100,33*100], 
                    [41*1000,42*1000,43*1000]
                   ])
print(start)
print()

addThis = np.array([1, 0, 1])
print(addThis)
print()

y = start + addThis  # add to each row of 'start' using broadcasting
print(y)

[[   11    12    13]
 [  210   220   230]
 [ 3100  3200  3300]
 [41000 42000 43000]]

[1 0 1]

[[   12    12    14]
 [  211   220   231]
 [ 3101  3200  3301]
 [41001 42000 43001]]


<p style="font-family: Arial; font-size:2.75em;color:purple; font-style:bold"><br>

Statistical Methods:
<br><br>
</p>

In [50]:
arry = 10 * np.random.randn(2,4)
arry

array([[-13.39130792,  11.62399777,   2.76608195, -15.82490046],
       [ 13.98641774,  12.04861669,  -9.84732799, -12.26634673]])

In [51]:
arry.mean()

-1.3630961187878881

In [52]:
arry.mean(axis = 1)

array([-3.70653217,  0.98033993])

In [53]:
arry.mean(axis = 0)

array([  0.29755491,  11.83630723,  -3.54062302, -14.04562359])

In [54]:
arry.sum()

-10.904768950303104

<p style="font-family: Arial; font-size:2.75em;color:purple; font-style:bold"><br>

Sorting:
<br><br>
</p>

In [55]:
unsorted = np.random.randn(10)
unsorted

array([-0.58081944, -1.23475549, -0.19859194, -0.32263167,  0.09288969,
       -2.49754732, -0.79613834, -2.12327923, -0.31256849,  0.47078937])

In [56]:
unsorted.sort() #inplace sorting
unsorted

array([-2.49754732, -2.12327923, -1.23475549, -0.79613834, -0.58081944,
       -0.32263167, -0.31256849, -0.19859194,  0.09288969,  0.47078937])

In [57]:
# Bonus: Find how many unique values are there in the array:
np.unique(unsorted)

array([-2.49754732, -2.12327923, -1.23475549, -0.79613834, -0.58081944,
       -0.32263167, -0.31256849, -0.19859194,  0.09288969,  0.47078937])

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Set Operations with np.array data type:
</p>

In [58]:
s1 = np.array(['desk','chair','bulb'])
s2 = np.array(['lamp','bulb','chair'])

In [59]:
print(s1, s2)

['desk' 'chair' 'bulb'] ['lamp' 'bulb' 'chair']


In [60]:
np.intersect1d(s1, s2)

array(['bulb', 'chair'], 
      dtype='<U5')

In [61]:
np.union1d(s1, s2)

array(['bulb', 'chair', 'desk', 'lamp'], 
      dtype='<U5')

In [62]:
np.setdiff1d(s1, s2) # elements in s1 that are not in s2

array(['desk'], 
      dtype='<U5')

In [63]:
np.in1d(s1, s2) #which element of s1 is also in s2

array([False,  True,  True], dtype=bool)

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Read or Write to Disk:</p>

<p style="font-family: Arial; font-size:1.3em;color:#2462C0; font-style:bold"><br>

Binary Format:</p>

In [64]:
x = np.array([ 23.23, 24.24] )

In [65]:
np.save('an_array', x)

In [66]:
np.load('an_array.npy')

array([ 23.23,  24.24])

<p style="font-family: Arial; font-size:1.3em;color:#2462C0; font-style:bold"><br>

Text Format:</p>

In [67]:
np.savetxt('array.txt', X=x, delimiter=',')

In [68]:
!cat array.txt

2.323000000000000043e+01
2.423999999999999844e+01


In [69]:
np.loadtxt('array.txt', delimiter=',')

array([ 23.23,  24.24])

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Random Number Generation:
</p>

In [70]:
Y = np.random.normal(size = (1,5))[0]
print(Y)

[ 0.48762847  1.03769063 -0.22429353  0.33782173 -0.63977572]


In [71]:
Z = np.random.randint(low=2,high=50,size=4)
print(Z)

[45 42 28 11]


In [72]:
np.random.permutation(Z) #return a new ordering of elements in Z

array([42, 11, 45, 28])

In [73]:
np.random.uniform(size=4) #uniform distribution

array([ 0.79765559,  0.44989363,  0.22669548,  0.8155371 ])

In [74]:
np.random.normal(size=4) #normal distribution

array([-1.01799639, -2.3592725 , -0.00469055, -0.02850282])

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

Merging data sets:
</p>

In [75]:
K = np.random.randint(low=2,high=50,size=(2,2))
print(K)

print()
M = np.random.randint(low=2,high=50,size=(2,2))
print(M)

[[ 5 10]
 [17 39]]

[[13  2]
 [ 6 48]]


In [76]:
np.vstack((K,M))

array([[ 5, 10],
       [17, 39],
       [13,  2],
       [ 6, 48]])

In [77]:
np.hstack((K,M))

array([[ 5, 10, 13,  2],
       [17, 39,  6, 48]])

In [78]:
np.concatenate([K, M], axis = 0)

array([[ 5, 10],
       [17, 39],
       [13,  2],
       [ 6, 48]])

In [79]:
np.concatenate([K, M.T], axis = 1)

array([[ 5, 10, 13,  6],
       [17, 39,  2, 48]])

<p style="font-family: Arial; font-size:1.75em;color:#2462C0; font-style:bold"><br>

numpy documentation: </p>

https://docs.scipy.org/doc/numpy/reference/