**Import Data from csv file**

In [2]:
import numpy as np
data = np.genfromtxt('Datasets/Mall_Customers_Int.csv',
                     delimiter = ',',
                     skip_header = 1)

In [None]:
# CustomerID, Genre, Age, Annual_Income, Spending_Score
data

In [None]:
data.shape

(200, 5)

**Filter rows where the second column = 1**

Create the mask and use that as the row / column argument.

In [None]:
mask = data[:, 1] == 1
mask[:10]    # first 10

In [None]:
data[mask, :]

**Rows that contain missing value in 4th column**

In [None]:
data[np.isnan(data[:,3]), :]

array([[13.,  0., 58., nan, 15.],
       [27.,  0., 45., nan, 32.],
       [57.,  0., 51., nan, 50.],
       [68.,  0., 68., nan, 48.]])

**Rows that contain missing in any of the columns**

The `any` method will return `True` if any of the values is true. Setting `axis=1` will do it row-wise. `axis=0` is column-wise.

In [None]:
data[np.isnan(data).any(axis=1), :]

array([[ 8.,  0., 23., 18., nan],
       [13.,  0., 58., nan, 15.],
       [17.,  0., 35., 21., nan],
       [19.,  1., nan, 23., 29.],
       [27.,  0., 45., nan, 32.],
       [37.,  0., nan, 34., 17.],
       [57.,  0., 51., nan, 50.],
       [68.,  0., 68., nan, 48.]])

In [None]:
np.isnan(data).any(axis=1)

In [None]:
~np.isnan(data).any(axis=1)

**Drop all rows that contain one or more missing**

In [None]:
data[ ~np.isnan(data).any(axis=1), :]  # rows containing no any missing

**Get the maximum value in each row**<br>
  `axis=1` do it row-wise

In [None]:
data.max(axis=1)  

If a row contains missing values, it returns `nan`. So use `np.nanmax` instead to ignore missing.

In [None]:
np.nanmax(data, axis=1)

Equivalently, we have `nanmin`, `nanmean`, `nanmedian`, `nanpercentile` functions.

**Get the maximum value in each column**

In [None]:
data.max(axis=0)

array([200.,   1.,  nan,  nan,  nan])

Ignore missing data with `nanmax`.

In [None]:
np.nanmax(data, axis=0)

array([200.,   1.,  70., 137.,  99.])

**Writing if-else logic using** `np.where()`

Example: If the second column = 1, keep score (5th col) as it is, else divide it by 2.

In [None]:
np.where(data[:, 1] == 1, data[:, 4], data[:, 4]/2)  

**Get the position of the maximum value in each row**

In [None]:
max_pos = np.argmax(data, axis=1)
max_pos

In [None]:
data[:5]  

array([[ 1.,  1., 19., 15., 39.],
       [ 2.,  1., 21., 15., 81.],
       [ 3.,  0., 20., 16.,  6.],
       [ 4.,  0., 23., 16., 77.],
       [ 5.,  0., 31., 17., 40.]])

In [None]:
# max value in each row
[data[row, i] for row, i in enumerate(max_pos)]

**Get the position of values that satisfy a given condition**

In [None]:
# positions where a given condition is satisfied
pos = np.argwhere(data[:, 1] == 1)
pos[:5]

array([[ 0],
       [ 1],
       [ 8],
       [10],
       [14]])

In [None]:
data[:5, :]

array([[ 1.,  1., 19., 15., 39.],
       [ 2.,  1., 21., 15., 81.],
       [ 3.,  0., 20., 16.,  6.],
       [ 4.,  0., 23., 16., 77.],
       [ 5.,  0., 31., 17., 40.]])

In [None]:
# Challenge 1:From Mall_Customers_Int.csv, find the row positions where 2nd column is 1 and 3rd column has value < 21. Extract the values from these columns. 
pos = np.argwhere((data[:,1] == 1) & (data[:,2] < 21))
pos

array([[  0],
       [ 17],
       [ 33],
       [ 61],
       [ 65],
       [ 68],
       [ 91],
       [ 99],
       [113],
       [134],
       [138],
       [162]])

In [None]:
# How many such rows exist?
len(pos)

12

In [None]:
# Create a new array from Mall_Customers_Int.csv that has the value 1 if 2nd column = 1 and 3rd column < 21. Otherwise, it has the value 0.
arr = np.where((data[:,1] == 1) & (data[:,2] < 21),1,0)
arr