# Numpy Tutorial Part 1

In [65]:
#https://www.machinelearningplus.com/101-numpy-exercises-python/

#1. Import numpy as np and see the version

In [1]:
#Version
import numpy as np
print(np.__version__)

1.13.3


In [76]:
#Create 1D array
print("Method 1:",np.array(range(10)))
print("Method 2:",np.arange(10))

Method 1: [0 1 2 3 4 5 6 7 8 9]
Method 2: [0 1 2 3 4 5 6 7 8 9]


In [79]:
#Create boolean array
print("Method 1",np.ones((3,3),dtype=bool))
print("Method 2",np.full((3,3),True,dtype=bool))

Method 1 [[ True  True  True]
 [ True  True  True]
 [ True  True  True]]
Method 2 [[ True  True  True]
 [ True  True  True]
 [ True  True  True]]


In [12]:
#4. How to extract items that satisfy a given condition from 1D array?
#Extract all odd numbers from arr
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
print(arr[1::2])
print(arr[arr%2==1])

[1 3 5 7 9]
[1 3 5 7 9]


In [14]:
#5. How to replace items that satisfy a condition with another value in numpy array?
#Replace all odd numbers in arr with -1
arr[arr%2==1]=-1
print(arr)

[ 0 -1  2 -1  4 -1  6 -1  8 -1]


In [80]:
#6. How to replace items that satisfy a condition without affecting the original array?
#Replace all odd numbers in arr with -1 without changing arr
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
out=np.where(arr%2==1,-1,arr)
print(arr,out)

[0 1 2 3 4 5 6 7 8 9] [ 0 -1  2 -1  4 -1  6 -1  8 -1]


In [17]:
#7. How to reshape an array?
#Convert a 1D array to a 2D array with 2 rows
np.reshape(arr,(2,-1))

array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])

#Diff between reshape and resize
#Reshape-just changes the shapre but doesnot change the actual array
#Resize=Make changes in actual array

In [38]:
#8. How to stack two arrays vertically?
#Vertically
a=np.arange(10).reshape((2,-1))
b=np.ones(10).reshape((2,-1))
print(a,b)
print("############Answer###########")
print(np.concatenate((a,b),axis=0))

[[0 1 2 3 4]
 [5 6 7 8 9]] [[ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]]
############Answer###########
[[ 0.  1.  2.  3.  4.]
 [ 5.  6.  7.  8.  9.]
 [ 1.  1.  1.  1.  1.]
 [ 1.  1.  1.  1.  1.]]


In [53]:
#9. How to stack two arrays horizontally?
a=np.repeat(1,10).reshape(2,-1)
b=np.full((2,5),2)
print("Method 1",np.concatenate((a,b),axis=1))
print("Method 2:",np.hstack([a,b]))
print("Method 3:",np.c_[a,b])

Method 1 [[1 1 1 1 1 2 2 2 2 2]
 [1 1 1 1 1 2 2 2 2 2]]
Method 2: [[1 1 1 1 1 2 2 2 2 2]
 [1 1 1 1 1 2 2 2 2 2]]
Method 3: [[1 1 1 1 1 2 2 2 2 2]
 [1 1 1 1 1 2 2 2 2 2]]


In [54]:
#10. How to generate custom sequences in numpy without hardcoding?
#Create the following pattern without hardcoding. Use only numpy functions and the below input array a.
a=np.array([1,2,3])
print(np.r_[np.repeat(a,3),np.tile(a,3)])

[1 1 1 2 2 2 3 3 3 1 2 3 1 2 3 1 2 3]


In [73]:
#11. How to get the common items between two python numpy arrays?
#Get the common items between a and b
a=np.array([1,2,3,2,3,4,3,4,5,6 ])
b = np.array([7,2,10,2,7,4,9,4,9,8])
print("Method 1:",np.unique(np.hstack([a[a==b],b[a==b]])))
print("Method 2:",np.intersect1d(a,b,assume_unique=False))

Method 1: [2 4]
Method 2: [2 4]


In [84]:
#12. How to remove from one array those items that exist in another?
#From array a remove all items present in array b
a = np.array([1,2,3,4,5])
b = np.array([5,6,7,8,9])
np.setdiff1d(a,b)

array([1, 2, 3, 4])

In [87]:
#
np.r_[np.intersect1d(a[a!=b],np.intersect1d(a,b))]

array([5])

In [89]:
#13. How to get the positions where elements of two arrays match?
#Get the positions where elements of a and b match
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
np.where(a==b)

(array([1, 3, 5, 7], dtype=int64),)

In [95]:
#14. How to extract all numbers between a given range from a numpy array?
#Get all items between 5 and 10 from a.
a=np.arange(15)
index=np.where((a>=5) & (a<=10))
a[index]

array([ 5,  6,  7,  8,  9, 10])

In [7]:
##np.vectorize
#15. How to make a python function that handles scalars to work on numpy arrays?
#Convert the function maxx that works on two scalars, to work on two arrays.
a = np.array([5, 7, 9, 8, 6, 4, 5])
b = np.array([6, 3, 4, 8, 9, 7, 1])
def maxx(a,b):
    if a>=b:
        return a
    else:
        return b
pair_max=np.vectorize(maxx,otypes=[float])
pair_max(a,b)

array([ 6.,  7.,  9.,  8.,  9.,  7.,  5.])

In [11]:
#16. How to swap two columns in a 2d numpy array?
#Swap columns 1 and 2 in the array arr
arr=np.arange(9).reshape((3,3))
print(arr)
print(arr[:, [1,0,2]])

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[1 0 2]
 [4 3 5]
 [7 6 8]]


In [18]:
#17. How to swap two rows in a 2d numpy array?
#Q. Swap rows 1 and 2 in the array arr:
arr=np.array(range(9)).reshape((3,3))
print(arr)
print(arr[[0,2,1],:])

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[0 1 2]
 [6 7 8]
 [3 4 5]]


In [24]:
#18. How to reverse the rows of a 2D array?
#Q. Reverse the rows of a 2D array arr.

arr=np.arange(9).reshape((3,3))
print(arr)
print(arr[::-1])

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[6 7 8]
 [3 4 5]
 [0 1 2]]


In [25]:
#19. How to reverse the columns of a 2D array?
#Q. Reverse the column of a 2D array arr.
arr=np.arange(9).reshape((3,3))
print(arr)
print(arr[:,::-1])

[[0 1 2]
 [3 4 5]
 [6 7 8]]
[[2 1 0]
 [5 4 3]
 [8 7 6]]


In [34]:
#20. How to create a 2D array containing random floats between 5 and 10?
#Q. Create a 2D array of shape 5x3 to contain random decimal numbers between 5 and 10.
#Draw float value between range [0,1)
print(np.random.random((5,3)))
#Draw values from Gaussian distribution
print(np.random.randn(5))
#Draw values from a uniform distribution
print(np.random.uniform(low=5,high=10,size=(5,3)))
#Random values in a given shape
print(np.random.rand(5))
#Draw integer values from a uniform distribution 
print(np.random.randint(low=5,high=10,size=(5,3)))
#Make it float
print(np.random.randint(low=5,high=10,size=(5,3))+np.random.random((5,3)))

[[ 0.80081846  0.23891664  0.86483481]
 [ 0.17027084  0.59696984  0.87047118]
 [ 0.64301931  0.7832947   0.33779797]
 [ 0.83429264  0.09495925  0.77543768]
 [ 0.5833381   0.42005038  0.16899374]]
[ 0.4858275   0.47948298 -1.10287005  1.90537258 -0.91912339]
[[ 5.87371886  9.60730503  6.64392671]
 [ 9.35894181  7.07179282  7.02191368]
 [ 7.60946783  5.34758007  6.29014371]
 [ 5.0812402   5.5162207   6.81811848]
 [ 8.61411081  6.97164591  6.3295144 ]]
[ 0.84855     0.1079259   0.89600299  0.31986902  0.57967067]
[[8 8 8]
 [5 7 7]
 [8 7 9]
 [5 5 6]
 [9 5 5]]
[[ 9.03276068  5.09020471  6.6015279 ]
 [ 9.06206596  6.32382702  5.73358526]
 [ 8.2849369   5.52134242  6.61861978]
 [ 6.14471238  9.52219976  6.58097756]
 [ 5.64683192  7.09746193  8.24958854]]


In [38]:
#np.set_printoptions
#21. How to print only 3 decimal places in python numpy array?
#Q. Print or show only 3 decimal places of the numpy array rand_arr.
arr=np.random.random((3,3))
print(arr)
np.set_printoptions(precision=3)
arr[:,1]

[[ 0.974  0.129  0.661]
 [ 0.651  0.442  0.223]
 [ 0.453  0.459  0.596]]


array([ 0.129,  0.442,  0.459])

In [45]:
#Reset print options to default
np.set_printoptions(suppress=False)

In [51]:
#22. How to pretty print a numpy array by suppressing the scientific notation (like 1e10)?
#Pretty print rand_arr by suppressing the scientific notation (like 1e10)
arr=np.random.random((3,3))/1e5
print(arr)
np.set_printoptions(suppress=True)
print(arr)

[[ 0.00001   0.000009  0.000003]
 [ 0.000006  0.000001  0.000003]
 [ 0.000005  0.000006  0.000007]]
[[ 0.00001   0.000009  0.000003]
 [ 0.000006  0.000001  0.000003]
 [ 0.000005  0.000006  0.000007]]


In [58]:
#23. How to limit the number of items printed in output of numpy array?
#np.set_printoptions(suppress=False)
np.set_printoptions(threshold=6)
arr=np.arange(10)
arr

array([0, 1, 2, ..., 7, 8, 9])

In [59]:
#24. How to print the full numpy array without truncating
#Print the full numpy array a without truncating.
np.set_printoptions(threshold=np.nan)
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [64]:
#25. How to import a dataset with numbers and texts keeping the text intact in python numpy?
#Q. Import the iris dataset keeping the text intact.
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')
print(iris.shape)
print(iris[:3])

(150, 5)
[[b'5.1' b'3.5' b'1.4' b'0.2' b'Iris-setosa']
 [b'4.9' b'3.0' b'1.4' b'0.2' b'Iris-setosa']
 [b'4.7' b'3.2' b'1.3' b'0.2' b'Iris-setosa']]


In [None]:
#26. How to extract a particular column from 1D array of tuples?
#Q. Extract the text column species from the 1D iris imported in previous question.

In [70]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype=None)
names = ('sepallength', 'sepalwidth', 'petallength', 'petalwidth', 'species')
print(iris.shape)
species=np.array([row[4] for row in iris])
species[:5]

(150,)


array([b'Iris-setosa', b'Iris-setosa', b'Iris-setosa', b'Iris-setosa',
       b'Iris-setosa'],
      dtype='|S18')

In [72]:
#27. How to convert a 1d array of tuples to a 2d numpy array?
#Q. Convert the 1D iris to 2D array iris_2d by omitting the species text field.
#Method 1
iris_1d=np.genfromtxt(url,delimiter=',',dtype='object')
iris_2d=np.array([row.tolist()[:4] for row in iris_1d])
iris_2d.shape

(150, 4)

In [73]:
#28. How to compute the mean, median, standard deviation of a numpy array?
#Find the mean, median, standard deviation of iris's sepallength (1st column)
sepal_length=np.genfromtxt(url,delimiter=',',dtype='float',usecols=[0])
mu,med,sd=np.mean(sepal_length),np.median(sepal_length),np.std(sepal_length)
print(mu,med,sd)

5.84333333333 5.8 0.825301291785


In [82]:
#29. How to normalize an array so the values range exactly between 0 and 1?
#Q. Create a normalized form of iris's sepallength whose values range exactly between 0 and 1 so that the minimum has value 0 and maximum has value 1.
Smax,Smin=np.max(sepal_length),np.min(sepal_length)
norm_sepal_length=(sepal_length-Smin)/(Smax-Smin)
norm_sepal_length
#2nd method
norm_sepal_length=(sepal_length-sepal_length.min())/(sepal_length.max()-sepal_length.min())
#print(norm_sepal_length)

In [91]:
#30. How to compute the softmax score?
#Q. Compute the softmax score of sepallength.
iris=np.genfromtxt(url,delimiter=',',dtype='object')
sepal_length=np.array([float(row[0]) for row in iris])
def softmax(x):
    e_x=np.exp(x)
    return(e_x/e_x.sum(axis=0))
#softmax(sepal_length)

In [92]:
#31. How to find the percentile scores of a numpy array?
#Q. Find the 5th and 95th percentile of iris's sepallength
np.percentile(sepal_length,q=[5,95])

array([ 4.6  ,  7.255])

In [102]:
#32. How to insert values at random positions in an array?
#Q. Insert np.nan values at 20 random positions in iris_2d dataset
iris2d=np.genfromtxt(url,delimiter=',',dtype='object')
i,j=np.where(iris2d)
iris2d[np.random.choice(i,20),np.random.choice(j,20)]=np.nan
print(iris2d[:10])
#Method 2
iris2d=np.genfromtxt(url,delimiter=',',dtype='object')
iris2d[np.random.randint(150,size=20),np.random.randint(4,size=20)]=np.nan
print(iris2d[:10])

[[b'5.1' b'3.5' b'1.4' b'0.2' b'Iris-setosa']
 [b'4.9' b'3.0' b'1.4' b'0.2' b'Iris-setosa']
 [b'4.7' b'3.2' nan b'0.2' b'Iris-setosa']
 [b'4.6' b'3.1' b'1.5' b'0.2' b'Iris-setosa']
 [b'5.0' b'3.6' b'1.4' b'0.2' b'Iris-setosa']
 [b'5.4' b'3.9' b'1.7' b'0.4' b'Iris-setosa']
 [b'4.6' b'3.4' b'1.4' b'0.3' b'Iris-setosa']
 [b'5.0' b'3.4' b'1.5' b'0.2' b'Iris-setosa']
 [b'4.4' b'2.9' b'1.4' nan b'Iris-setosa']
 [b'4.9' b'3.1' b'1.5' b'0.1' b'Iris-setosa']]
[[b'5.1' b'3.5' b'1.4' b'0.2' b'Iris-setosa']
 [b'4.9' b'3.0' b'1.4' b'0.2' b'Iris-setosa']
 [b'4.7' b'3.2' nan b'0.2' b'Iris-setosa']
 [b'4.6' b'3.1' b'1.5' b'0.2' b'Iris-setosa']
 [b'5.0' b'3.6' b'1.4' b'0.2' b'Iris-setosa']
 [b'5.4' b'3.9' nan b'0.4' b'Iris-setosa']
 [b'4.6' b'3.4' b'1.4' b'0.3' b'Iris-setosa']
 [b'5.0' b'3.4' b'1.5' b'0.2' b'Iris-setosa']
 [b'4.4' b'2.9' b'1.4' b'0.2' b'Iris-setosa']
 [b'4.9' b'3.1' b'1.5' b'0.1' b'Iris-setosa']]


In [116]:
#33. How to find the position of missing values in numpy array?
#Q. Find the number and position of missing values in iris_2d's sepallength (1st column)
iris2d=np.genfromtxt(url,dtype='float',delimiter=',',usecols=[0,1,2,3])
i,j=np.where(iris2d)
iris2d[np.random.choice(i,size=20),np.random.choice(j,size=20)]=np.nan
np.isnan(iris2d[:,0]).sum()
np.where(np.isnan(iris2d[:,0]))

(array([ 9, 15, 18, 43, 77, 88], dtype=int64),)

In [119]:
#34. How to filter a numpy array based on two or more conditions?
#Q. Filter the rows of iris_2d that has petallength (3rd column) > 1.5 and sepallength (1st column) < 5.0
condition=(iris2d[:,2]>1.5)&(iris2d[:,1]<5)
iris2d[condition][:10]

  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


array([[ 4.8,  3.4,  1.6,  0.2],
       [ nan,  3.8,  1.7,  0.3],
       [ 5.4,  3.4,  1.7,  0.2],
       [ 5.1,  3.3,  1.7,  0.5],
       [ 4.8,  3.4,  1.9,  nan],
       [ 5. ,  3. ,  1.6,  0.2],
       [ 5. ,  3.4,  1.6,  0.4],
       [ 4.7,  3.2,  1.6,  0.2],
       [ 4.8,  3.1,  1.6,  0.2],
       [ nan,  3.5,  1.6,  0.6]])

In [123]:
#35. How to drop rows that contain a missing value from a numpy array?
#Q. Select the rows of iris_2d that does not have any nan value.
iris2d=np.genfromtxt(url,dtype='float',delimiter=',',usecols=[0,1,2,3])
any_nan_in_row=np.array([~np.any(np.isnan(row)) for row in iris_2d])

TypeError: ufunc 'isnan' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''