# Numpy exercises

## 1. Import numpy as np and see the version

In [None]:
import numpy as np

print(np.__version__)

## 2. How to create a 1D array?

Desired output:

`array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])`

In [None]:
arr = np.arange(10)
arr

## 3. How to extract items that satisfy a given condition from 1D array?

Extract all odd numbers from `arr`.

`arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])`

Desired output:

`array([1, 3, 5, 7, 9])`

In [None]:
arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

arr[arr % 2 == 1]

## 4. How to replace items that satisfy a condition with another value in numpy array?

Replace all odd numbers in `arr` with `-1`

`arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])`

Desired Output:

`array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])`

In [None]:
arr[arr % 2 == 1] = -1
arr

## 5. How to replace items that satisfy a condition without affecting the original array?

Replace all odd numbers in `arr` with `-1` without changing `arr`

`arr = np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])`

Desired Output:

````python
#arr
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

#out
array([ 0, -1,  2, -1,  4, -1,  6, -1,  8, -1])
````

In [None]:
arr = np.arange(10)
out = np.where(arr % 2 == 1, -1, arr)
print(arr)
out

## 6. How to reshape an array?

Convert a 1D array to a 2D array with 2 rows.

````python
np.arange(10)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
````

Desired Output:

````python
array([[0, 1, 2, 3, 4],
       [5, 6, 7, 8, 9]])
````

In [None]:
arr = np.arange(10)
arr.reshape(2, -1)  # Setting to -1 automatically decides the number of cols

## 7. How to stack two arrays vertically?

Stack arrays `a` and `b` vertically.

````python
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)
````

Desired Output:

````python
[[0, 1, 2, 3, 4],
 [5, 6, 7, 8, 9],
 [1, 1, 1, 1, 1],
 [1, 1, 1, 1, 1]]
````

In [None]:
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)

# Answers
# Method 1:
print(np.concatenate([a, b], axis=0))
print()

# Method 2:
print(np.vstack([a, b]))
print()

# Method 3:
print(np.r_[a, b])

## 8. How to stack two arrays horizontally?

Stack arrays `a` and `b` horizontally.

````python
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)
````

Desired Output:

````python
[[0, 1, 2, 3, 4, 1, 1, 1, 1, 1],
 [5, 6, 7, 8, 9, 1, 1, 1, 1, 1]]
````

In [None]:
a = np.arange(10).reshape(2,-1)
b = np.repeat(1, 10).reshape(2,-1)

# Answers
# Method 1:
print(np.concatenate([a, b], axis=1))
print()

# Method 2:
print(np.hstack([a, b]))
print()

# Method 3:
print(np.c_[a, b])

## 9. How to get the common items between two python numpy arrays?

Get the common items between `a` and `b`.

````python
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])
````

Desired Output:

````python
array([2, 4])
````

In [None]:
a = np.array([1,2,3,2,3,4,3,4,5,6])
b = np.array([7,2,10,2,7,4,9,4,9,8])

np.intersect1d(a,b)

## 10. How to remove from one array those items that exist in another?

From array `a` remove all items present in array `b`

````python
a = np.array([1,2,3,4,5])
b = np.array([5,6,7,8,9])
````

Desired Output:

````python
array([1,2,3,4])
````

In [None]:
a = np.array([1,2,3,4,5])
b = np.array([5,6,7,8,9])

# From 'a' remove all of 'b'
np.setdiff1d(a,b)

## 11. How to make a python function that handles scalars to work on numpy arrays?

Convert the function `maxx` that works on two scalars, to work on two arrays.

````python
def maxx(x, y):
    """Get the maximum of two items"""
    if x >= y:
        return x
    else:
        return y

maxx(1, 5)
# 5
````

Desired Output:

````python
a = np.array([5, 7, 9, 8, 6, 4, 5])
b = np.array([6, 3, 4, 8, 9, 7, 1])
pair_max(a, b)
# array([ 6,  7,  9,  8,  9,  7,  5])
````


In [None]:
def maxx(x, y):
    """Get the maximum of two items"""
    if x >= y:
        return x
    else:
        return y

pair_max = np.vectorize(maxx, otypes=[int])

a = np.array([5, 7, 9, 8, 6, 4, 5])
b = np.array([6, 3, 4, 8, 9, 7, 1])

pair_max(a, b)

## 12. How to import a dataset with numbers and texts keeping the text intact in numpy?

Import the iris dataset (https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data) keeping the text intact, and print the first 3 rows.

Desired output:

````python
array([[b'5.1', b'3.5', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.9', b'3.0', b'1.4', b'0.2', b'Iris-setosa'],
       [b'4.7', b'3.2', b'1.3', b'0.2', b'Iris-setosa']], dtype=object)
````

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object', encoding=None)

# Print the first 3 rows
iris[:3]

## 13. How to extract a particular column from 1D array of tuples?

Extract the text column `species` from the 1D iris imported in previous question, and print the first 5 values.

````python
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_1d = np.genfromtxt(url, delimiter=',', dtype=None, encoding=None)
````

Desired output:

````python
array(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa'], dtype='<U15')
````

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_1d = np.genfromtxt(url, delimiter=',', dtype=None, encoding=None)

species = np.array([row[4] for row in iris_1d])

# Print the first 5 values
species[:5]

## 14. How to compute the mean, median, standard deviation of a numpy array?

Find the mean, median, standard deviation of iris's `sepallength` column (1st column).

````python
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
````

Desired output:

````python
5.843333333333334 5.8 0.8253012917851409
````

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0]) # notice the usecols argument

# Solution
mu, med, sd = np.mean(sepallength), np.median(sepallength), np.std(sepallength)
print(mu, med, sd)

## 15. How to normalize an array so the values range exactly between 0 and 1?

Create a normalized form of iris's `sepallength` whose values range exactly between 0 and 1 so that the minimum has value 0 and maximum has value 1.

````python
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])
````

Desired output (sample of first 10 values)

````python
[0.22222222 0.16666667 0.11111111 0.08333333 0.19444444 0.30555556
 0.08333333 0.19444444 0.02777778 0.16666667]
````

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
sepallength = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0])

Smax, Smin = sepallength.max(), sepallength.min()
S = (sepallength - Smin)/(Smax - Smin)

print(S[:10])

## 16. How to find the position of missing values in numpy array?

Find the number and position of missing values in iris_2d's `sepallength` (1st column)

````python
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float')
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan
````

Desired Output:

````python
# The number and positions will be different since we're assigning random nan's
Number of missing values: 
 6
Position of missing values: 
 (array([  8,  21,  71, 106, 134, 135]),)
````


In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan

print("Number of missing values: \n", np.isnan(iris_2d[:, 0]).sum())
print("Position of missing values: \n", np.where(np.isnan(iris_2d[:, 0])))

## 17. How to find if a given array has any null values?

Find out if iris_2d has any missing values.

````python
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
````

Desired output:
````python
False
````

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])

np.isnan(iris_2d).any()

## 18. How to replace all missing values with 0 in a numpy array?

Replace all ccurrences of `nan` with 0 in numpy array

````python
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan
````

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris_2d = np.genfromtxt(url, delimiter=',', dtype='float', usecols=[0,1,2,3])
iris_2d[np.random.randint(150, size=20), np.random.randint(4, size=20)] = np.nan

iris_2d[np.isnan(iris_2d)] = 0
print(np.isnan(iris_2d).any())

iris_2d[iris_2d == 0]

## 19. How to find the most frequent value in a numpy array?

Find the most frequent value of `petallength` (3rd column) in iris dataset.

````python
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')
````

Desired output:

````python
b'1.5'
````

In [None]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data'
iris = np.genfromtxt(url, delimiter=',', dtype='object')

vals, counts = np.unique(iris[:, 2], return_counts=True)
print(vals[np.argmax(counts)])