In [46]:
import numpy as  np

#### Array Creation & Initialization

In [45]:
# 1-D array
a = np.array([1,2,3,4,5,6,7,8,9,10])
# 2-D array
a = np.array([[1,2,3],
              [4,5,6],
              [7,8,9]])


In [None]:
b=np.asarray(a)
lst = [1, 2, 3]
a = np.asarray(lst)

In [8]:
empty = np.empty((1000,1000)) # array without initializing values.
empty

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], shape=(1000, 1000))

In [None]:
b = np.copy(a)
x=np.zeros((2,5))
y=np.ones((3,4))
z=np.eye(4)
m=np.full((3,5),5)

array([[5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5],
       [5, 5, 5, 5, 5]])

In [None]:
# Ranges
xy=np.arange(1,10)
yz=np.linspace(-5,5,20)
lr = np.logspace(-4, 0,5)
lr

array([1.e-04, 1.e-03, 1.e-02, 1.e-01, 1.e+00])

In [None]:
# Random
np.random.rand(1,20)
np.random.random((1,20))
np.random.randint(1,50,size=(3,3))
np.random.random_integers(1,10,size=(10))
np.random.choice(np.random.randint(1,50,size=(10)),5)
np.random.seed(42)
np.random.rand(1,10)


  np.random.random_integers(1,10,size=(3,3))


array([[0.37454012, 0.95071431, 0.73199394, 0.59865848, 0.15601864,
        0.15599452, 0.05808361, 0.86617615, 0.60111501, 0.70807258]])

#### Array Properties

In [77]:
arr = np.array([[1,2,3],
       [4,5,6],
       [7,8,9]])
arr2 = np.array([[[1,2,3],[4,5,6],
        [7,8,9],
        [11,12,13],
        [14,15,16]]])
print(arr.ndim,"\n",arr2.ndim)
print(arr.shape,arr2.shape)
print(arr.size,arr2.size)
print(arr.dtype,arr2.dtype)
print(arr.itemsize) #Bytes used by one element
print(arr.nbytes)   #Total memory consumed by array
print(arr.T,arr2.T,arr.transpose(),arr2.transpose())
print(np.info(arr2))    # Shows : Shape,Type,Memory,Flags

2 
 3
(3, 3) (1, 5, 3)
9 15
int64 int64
8
72
[[1 4 7]
 [2 5 8]
 [3 6 9]] [[[ 1]
  [ 4]
  [ 7]
  [11]
  [14]]

 [[ 2]
  [ 5]
  [ 8]
  [12]
  [15]]

 [[ 3]
  [ 6]
  [ 9]
  [13]
  [16]]] [[1 4 7]
 [2 5 8]
 [3 6 9]] [[[ 1]
  [ 4]
  [ 7]
  [11]
  [14]]

 [[ 2]
  [ 5]
  [ 8]
  [12]
  [15]]

 [[ 3]
  [ 6]
  [ 9]
  [13]
  [16]]]
class:  ndarray
shape:  (1, 5, 3)
strides:  (120, 24, 8)
itemsize:  8
aligned:  True
contiguous:  True
fortran:  False
data pointer: 0x17c71af7a90
byteorder:  little
byteswap:  False
type: int64
None


#### Indexing, Slicing & Masking

In [49]:
a[2,1]

a[1::2]

a[1:,2]
a[:, :]

a[2]

array([7, 8, 9])

Boolean Masking

In [None]:
a[a>2]
a[(a>2)&(a%2==0)]
np.where(a>2,"Condition_Satisfied","Condition_Not_Satisfied")
np.nonzero(a) # return index values where element is non-zero in form of tuple

(array([0, 0, 0, 1, 1, 1, 2, 2, 2]), array([0, 1, 2, 0, 1, 2, 0, 1, 2]))

Fancy Indexing

In [78]:
indexing=[1,3,4,7]
a.flatten()[indexing]

array([2, 4, 5, 8])

#### Shape Manipulation & Reshaping

In [None]:
a = np.random.randint(1,100,size=(18))
a.reshape(9,2)
a.reshape(3,-1) # -1 for auto calculate dimension
a.ravel() # flatten the matrix, fast but unsafe (changes affect original if possible)
a.flatten() # Slow but safe 
x = np.array([[[[5,6,7],[5,6,7]]]]) 
x.squeeze() # Remove extra dimensions
x = np.array([1, 2, 3])
np.expand_dims(x,axis=1)    # add new axis(row/column)
print(b)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


array([[1, 4, 7],
       [2, 5, 8],
       [3, 6, 9]])

#### Joining, Stacking & Splitting

In [16]:
import numpy as np
X1 = np.array([[1, 2], [3, 4]])
X2 = np.array([[5, 6]])
np.concatenate([X1,X2],axis=0) # For row wise merging column must be same dim   # default = axis=0

array([[1, 2],
       [3, 4],
       [5, 6]])

In [None]:
# column wise concatenate
A = np.array([[1], [2], [3]])
B = np.array([[10], [20], [30]])

np.concatenate([A, B], axis=1)

array([[ 1, 10],
       [ 2, 20],
       [ 3, 30]])

In [2]:
import numpy as np
a = np.array([1,2,3,4,5])
b = np.array([6,7,8,9,10])
np.stack([a,b],axis=0)
np.stack([a,b],axis=1)

array([[ 1,  6],
       [ 2,  7],
       [ 3,  8],
       [ 4,  9],
       [ 5, 10]])

In [12]:
np.vstack([a,b])
c=np.hstack([a,b])
c

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [19]:
np.split(c,2)   # Require exact division
np.array_split(c,3) # Safe bcz does not require exact division
print(X1)
np.hsplit(X1,2) # horizontal split
np.vsplit(X1,2) # vertical split

[[1 2]
 [3 4]]


[array([[1, 2]]), array([[3, 4]])]

#### Mathematical & Statistical Operation
Aggregation Functions

In [10]:
rng = np.random.seed(42)
a = np.array(np.random.randint(1,100,size=(20)))
b = np.array(np.random.randint(1,100,size=(3,3)))
print(a)
print('Sum :',np.sum(a))
print('Mean :',np.mean(a))
print('Median :',np.median(a))
print('Min :',np.min(a))
print('Max :',np.max(a))
print(b)
print('Sum row_wise :',np.sum(b,axis=1))
print('Sum column_wise :',np.sum(b,axis=0))


[52 93 15 72 61 21 83 87 75 75 88 24  3 22 53  2 88 30 38  2]
Sum : 984
Mean : 49.2
Median : 52.5
Min : 2
Max : 93
[[64 60 21]
 [33 76 58]
 [22 89 49]]
Sum row_wise : [145 167 160]
Sum column_wise : [119 225 128]


Spread / Variability

In [32]:
print(a.std())
print(a.var())

31.573089807619397
996.8599999999999


Position-based Aggregates

In [33]:
a.argmax()
a.argmin()

np.int64(15)

Range & Differences

In [36]:
# ptp() (peak-to-peak)
np.ptp(a)  # a.max()-a.min()
np.diff(a)

array([ 41, -78,  57, -11, -40,  62,   4, -12,   0,  13, -64, -21,  19,
        31, -51,  86, -58,   8, -36], dtype=int32)

In [37]:
print(np.cumsum(a))
print(np.cumprod(a))

[ 52 145 160 232 293 314 397 484 559 634 722 746 749 771 824 826 914 944
 982 984]
[                  52                 4836                72540
              5222880            318595680           6690509280
         555312270240       48312167510880     3623412563316000
   271755942248700000  5467778844176048384  2099483744258299904
  6298451232774899712 -9008025468628619264  2189996079131521024
  4379992158263042048 -1942315620752883712 -2929236401457856512
  -630518813141237760 -1261037626282475520]


Handling Missing Values (NaN)

In [38]:
X = np.array([1, 2, np.nan, 4])
#X.mean()   # returns nan 
# we use 
np.nanmean(X)
np.nanstd(X)
np.nanmin(X)
np.nanmax(X)

np.float64(4.0)

Basic Mathematical Operations

In [8]:
x = np.array([1,2,3])
np.sqrt(x)
np.power(x,2)
np.log(x)
np.exp(x)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [None]:
def square(x):
    return x * x

v = np.arange(5)
vf = np.vectorize(square)   #hidden for loop inside vf calling each element and square each element by for loop and result into array
vf(v)

array([ 0,  1,  4,  9, 16])

In [11]:
np.linalg.det(b)

np.float64(-85927.00000000004)

In [12]:
a = np.array([30, 10, 20])
np.sort(a)
M = np.array([[3, 1, 2],
              [6, 4, 5]])
np.sort(M, axis=1)   # row-wise
np.sort(M, axis=0)   # column-wise
a = np.array([50, 10, 30])
idx = np.argsort(a)
sorted_arr = np.array([10, 20, 30, 40])
np.searchsorted(sorted_arr, 25)


np.int64(2)

Set Operations

In [13]:
a = np.array([1, 2, 2, 3, 3, 3])
np.unique(a)
np.intersect1d([1, 2, 3], [2, 3, 4])
np.union1d([1, 2], [2, 3])
np.setdiff1d([1, 2, 3], [2])

array([1, 3])

Missing Values & Data Cleaning

In [14]:
x = np.array([1, 2, np.nan, 4])
np.isnan(x)
np.nan_to_num(x, nan=0) #Replaces NaNs and infinities in the array
x.astype(float)

array([ 1.,  2., nan,  4.])

In [17]:
np.random.shuffle(x)

Advanced NumPy Functions

In [16]:
x = np.array([1, 2, 3])
np.tile(x, 3)   #Repeats the entire array as blocks
np.repeat(x, 3) #Repeats each element individually.
a = np.array([10, 20, 30, 40])
np.take(a, [0, 2])  #Selects elements by index (safer than slicing)
np.put(a, [1, 3], [99, 88]) # Puts values at specific indices
from numpy.random import default_rng
rng = default_rng(seed=42)
y = rng.integers(1,1000,(4,4))
print(np.sort(y))
np.clip(y,100,500)
np.diag(a)    #creates a diagonal matrix

[[ 90 439 654 774]
 [ 86 433 697 858]
 [ 95 202 526 975]
 [717 736 761 786]]


array([[10,  0,  0,  0],
       [ 0, 99,  0,  0],
       [ 0,  0, 30,  0],
       [ 0,  0,  0, 88]])