# Pandas

numpy is a multi-dimetional array library. i.e. we can store all sorts of arrays in it like 2d, 3d, 4d etc. The reason why we prefer numpy over lists is that numpy is faster, uses lesser space, can perform multiple functions on arrays and it is widely used like in ML, backend etc.

# Importing Numpy

In [1]:
import numpy as np

# Initializing an array

In [19]:
a = np.array([1,2,3], dtype = 'int16')
print(a)

[1 2 3]


In [9]:
b = np.array([[1,2,3,4,5,],[2,3,4,5,6],[1,3,2,3,2]])
print(b)

[[1 2 3 4 5]
 [2 3 4 5 6]
 [1 3 2 3 2]]


### Getting array dimentions

In [10]:
a.ndim

1

In [11]:
b.ndim

2

### Get shape of array (rows,col)

In [12]:
a.shape

(3,)

In [13]:
b.shape

(3, 5)

## Getting data type

In [20]:
a.dtype

dtype('int16')

In [25]:
# getting total number of elements (3 elements in this case)
a.size

3

In [26]:
#num of bytes
a.itemsize

2

## Accessing/changing elements in an array

In [28]:
a = np.array([[1,2,3,4,5,6,7],[2,3,4,5,6,7,8]])
print(a)

[[1 2 3 4 5 6 7]
 [2 3 4 5 6 7 8]]


In [29]:
a.shape

(2, 7)

In [34]:
#accessing the element '8' [row,col]
#way 1
a[1][6]

8

In [35]:
#way 2
a[1][-1]

8

### getting all the elements of a specific row

In [36]:
a[0,:]

array([1, 2, 3, 4, 5, 6, 7])

### getting all elements of a specific column

In [37]:
a[:,4]

array([5, 6])

### updating an element at a specific location

In [38]:
a[1,3] = 7
a[1,3]

7

In [39]:
print(a)

[[1 2 3 4 5 6 7]
 [2 3 4 7 6 7 8]]


In [40]:
#updating multiple row in the same column
a[:,2] = [99, 98]
print(a)

[[ 1  2 99  4  5  6  7]
 [ 2  3 98  7  6  7  8]]


#### 3d example

In [43]:
c = np.array([[[1,2],[2,3]], [[3,4],[4,5]]])
c

array([[[1, 2],
        [2, 3]],

       [[3, 4],
        [4, 5]]])

In [44]:
c[0,1,:]

array([2, 3])

## Initializing different type of arrays

In [48]:
# all 0 arrays
np.zeros(2)
# inside the bracket we place the shape of array


array([0., 0.])

In [49]:
np.zeros((2,3))
#shape = 2 rows, 3 col

array([[0., 0., 0.],
       [0., 0., 0.]])

In [50]:
# for all 1 array
np.ones(2)

array([1., 1.])

In [53]:
# for any other number, takes 2 parameters- shape and value
np.full((2,3), 99, dtype='float32')

array([[99., 99., 99.],
       [99., 99., 99.]], dtype=float32)

In [55]:
np.full_like(a,99)
#the full like fn uses the shape of a pre defined matrix for the new matrix

array([[99, 99, 99, 99, 99, 99, 99],
       [99, 99, 99, 99, 99, 99, 99]])

In [56]:
#using random numbers here
np.random.rand(2,3)

array([[0.60992611, 0.24618483, 0.61139362],
       [0.06361254, 0.4182406 , 0.51292712]])

In [62]:
#using random int values
#random.randint(low, high=None, size=None, dtype=int)
#Return random integers from low (inclusive) to high (exclusive)

np.random.randint(3,7, size = (2,3))

array([[4, 3, 6],
       [5, 5, 4]])

In [63]:
#identity matrix
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [64]:
#used for repeating an array (shape_like, repats, axis)
np.repeat(3, 4)

array([3, 3, 3, 3])

In [65]:
x = np.array([[1,2],[3,4]])
np.repeat(x, 2)

array([1, 1, 2, 2, 3, 3, 4, 4])

In [67]:
np.repeat(x, 3, axis=1)

array([[1, 1, 1, 2, 2, 2],
       [3, 3, 3, 4, 4, 4]])

In [68]:
np.repeat(x, [1, 2], axis=0)

array([[1, 2],
       [3, 4],
       [3, 4]])

In [76]:
a = np.ones((5,5))
print(a)

[[1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]
 [1. 1. 1. 1. 1.]]


In [80]:
b = np.zeros((3,3))
print(b)
b[1,1]=9
print(b)
a[1:4,1:4] = b

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]
[[0. 0. 0.]
 [0. 9. 0.]
 [0. 0. 0.]]


In [81]:
print(a)

[[1. 1. 1. 1. 1.]
 [1. 0. 0. 0. 1.]
 [1. 0. 9. 0. 1.]
 [1. 0. 0. 0. 1.]
 [1. 1. 1. 1. 1.]]


### in case of copying arrays

In [83]:
a = np.array([1,2,3])
a

array([1, 2, 3])

In [84]:
b=a
b

array([1, 2, 3])

In [85]:
b[1]=99
b

array([ 1, 99,  3])

In [86]:
#here is the issue and to avoid it we use .copy() function
a

array([ 1, 99,  3])

In [89]:
c = a.copy()
c[1] = 98
c

array([ 1, 98,  3])

In [88]:
a

array([ 1, 99,  3])

## Mathermatics

#### note that every operation we perform on an array is performed on each elment of the array

In [90]:
a = np.array([1,1,2])
a

array([1, 1, 2])

In [92]:
a += 3
a

array([7, 7, 8])

In [93]:
a - 2

array([5, 5, 6])

In [94]:
a**2

array([49, 49, 64])

In [96]:
np.sin(a)

array([0.6569866 , 0.6569866 , 0.98935825])

In [97]:
np.cos(a)

array([ 0.75390225,  0.75390225, -0.14550003])

## Linear Alebra

#### Matrix Multiply( matmul() )

In [105]:
#matrix multiply
a = np.ones((2,3))
print(a)
b = np.full((3,2), 2)
print(b)

np.matmul(a,b)


[[1. 1. 1.]
 [1. 1. 1.]]
[[2 2]
 [2 2]
 [2 2]]


array([[6., 6.],
       [6., 6.]])

#### Finding Determinant of a matrix

In [108]:
a = np.identity(3)
print(a)

#determinant fn = np.linalg.det()

np.linalg.det(a)

[[1. 0. 0.]
 [0. 1. 0.]
 [0. 0. 1.]]


1.0

In [109]:
## Reference docs (https://docs.scipy.org/doc/numpy/reference/routines.linalg.html)

### Statistics

In [111]:
a = np.array([[1,2,3],[4,5,6]])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [112]:
#when we find the min among all elements
np.min(a)

1

In [113]:
#when we find min row wise

np.min(a, axis =1)

array([1, 4])

In [114]:
#finding max
np.max(a)

6

In [115]:
np.sum(a)

21

In [117]:
#column elements sum

np.sum(a, axis =0)

array([5, 7, 9])

In [118]:
#row elements sum

np.sum(a, axis =1)

array([ 6, 15])

### Reorganizing arrays- (using reshape function)

In [124]:
before = np.array([[1,2,3,4],[5,6,7,8]])
print(before)

after = before.reshape((8,1))
print(after)

[[1 2 3 4]
 [5 6 7 8]]
[[1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [7]
 [8]]


#### horizontal and vertical stacking of vectors

In [129]:
#hstack for horizontal and vstack for vertical

a = np.array([1,2,3,4])
b = np.array([2,3,4,5])

np.hstack((a,b))

array([1, 2, 3, 4, 2, 3, 4, 5])

In [128]:
np.vstack((b,a))

array([[2, 3, 4, 5],
       [1, 2, 3, 4]])

In [130]:
np.vstack((b,a,a,b))

array([[2, 3, 4, 5],
       [1, 2, 3, 4],
       [1, 2, 3, 4],
       [2, 3, 4, 5]])