## NumPy

### The NumPy library is Numerical python core library for scientific computing in Python. It provides a high-performance multidimensional array object, and tools for working with these arrays.

In [2]:
import numpy as np

### NumPy Arrays
**1D array(axis 0), 2D array(axis 0, axis 1), and 3D array(axis 0, axis 1, axis 2)**

In [3]:
a = np.array([1,2,3])

In [3]:
a

array([1, 2, 3])

In [7]:
# 2D array is a matrix with rows and columns.
b = np.array([(1.5,2,3), (4,5,6)], dtype=float)

b

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])

In [6]:
# A 3D array is a collection of 2D arrays (matrices)
c = np.array([[(1, 2, 3), (4,5,6)], [(3,2,1), (4,5,6)]], dtype=int)

c

array([[[1, 2, 3],
        [4, 5, 6]],

       [[3, 2, 1],
        [4, 5, 6]]])

In [8]:
new_array = np.array([ [ [1,2,3] ], [ [3,4,2] ] ], dtype=int)

In [9]:
# slice or depth(layers), rows in each slice, columns in each slice
new_array.shape

(2, 1, 3)

In [12]:
# dimensions of array

b.ndim

2

### Initial Placeholders

In [14]:
# Create an array of zeros
np.zeros((3, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [14]:
# How to return empty array
arr = np.zeros((1,0))    #returns empty array

arr

array([], shape=(1, 0), dtype=float64)

In [16]:
# Create an array of ones
np.ones((3, 4), dtype=np.int16)

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]], dtype=int16)

In [17]:
# Create an array of evenly spaced values (step value)
d = np.arange(10, 26, 5)

In [18]:
d

array([10, 15, 20, 25])

In [19]:
# Create an array of evenly spaced values (number of samples)
np.linspace(0, 2, 9)

array([0.  , 0.25, 0.5 , 0.75, 1.  , 1.25, 1.5 , 1.75, 2.  ])

In [22]:
# Create a constant array
e = np.full((2,2), 69)

In [23]:
e

array([[69, 69],
       [69, 69]])

In [24]:
# Create a 2X2 identity matrix
f = np.eye(3)

In [25]:
f

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [27]:
# Create an array with random values
np.random.random((3, 3))

array([[0.00542819, 0.09055783, 0.5042433 ],
       [0.89726327, 0.28447411, 0.36742965],
       [0.41320489, 0.37241411, 0.37472612]])

In [32]:
# fills it with uninitialized, arbitrary values (any random values from memory).
np.empty((3, 2))

array([[1.5, 2. ],
       [3. , 4. ],
       [5. , 6. ]])

## IO

In [33]:
### Saving & Loading On Disk

In [34]:
a

array([1, 2, 3])

In [35]:
np.save('my_array', a)

In [36]:
np.load('my_array.npy')

array([1, 2, 3])

In [41]:
### Save multiple arrays

In [42]:
np.savez('array.npz', a=a, b=b)

In [43]:
loaded_arrays  = np.load('array.npz')

In [44]:
loaded_array1 = loaded_arrays['a']

In [45]:
loaded_array1

array([1, 2, 3])

In [46]:
### Saving & Loading Text Files

In [47]:
np.loadtxt("myfile.txt")

array([[1., 2., 3.],
       [4., 5., 6.],
       [7., 8., 9.]])

### Data Types

In [26]:
np.int64

numpy.int64

In [27]:
np.float32

numpy.float32

In [50]:
 np.complex128

numpy.complex128

In [51]:
np.bool8

numpy.bool_

In [52]:
np.object0

numpy.object_

In [53]:
np.string_

numpy.bytes_

In [54]:
np.unicode_

numpy.str_

## Inspecting Your Array

In [28]:
b.shape

(2, 3)

In [29]:
len(a)

3

In [30]:
b.ndim

2

In [31]:
a

array([1, 2, 3])

In [32]:
b

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])

In [33]:
c

array([[[1, 2, 3],
        [4, 5, 6]],

       [[3, 2, 1],
        [4, 5, 6]]])

In [34]:
len(c)

2

In [35]:
e

array([[69, 69],
       [69, 69]])

In [36]:
e.size

4

In [59]:
b.dtype.name

'float64'

In [60]:
# Convert an array to a different type
b.astype('int64')

array([[1, 2, 3],
       [4, 5, 6]], dtype=int64)

In [61]:
b.dtype

dtype('float64')

### Array Mathematics
**Arithmetic Operations**

In [63]:
a

array([1, 2, 3])

In [64]:
b

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])

In [68]:
# Subtraction
g = a - b

In [69]:
g

array([[-0.5,  0. ,  0. ],
       [-3. , -3. , -3. ]])

In [70]:
np.subtract(a,b)

array([[-0.5,  0. ,  0. ],
       [-3. , -3. , -3. ]])

In [71]:
# Addition
b + a

array([[2.5, 4. , 6. ],
       [5. , 7. , 9. ]])

In [72]:
np.add(b, a)

array([[2.5, 4. , 6. ],
       [5. , 7. , 9. ]])

In [73]:
# Division
a / b

array([[0.66666667, 1.        , 1.        ],
       [0.25      , 0.4       , 0.5       ]])

In [74]:
np.divide(a, b)

array([[0.66666667, 1.        , 1.        ],
       [0.25      , 0.4       , 0.5       ]])

In [75]:
# Multiplication
a * b

array([[ 1.5,  4. ,  9. ],
       [ 4. , 10. , 18. ]])

In [76]:
np.multiply(a,b)

array([[ 1.5,  4. ,  9. ],
       [ 4. , 10. , 18. ]])

In [77]:
# Exponentiation
np.exp(b)

array([[  4.48168907,   7.3890561 ,  20.08553692],
       [ 54.59815003, 148.4131591 , 403.42879349]])

In [78]:
# Square root
np.sqrt(b)

array([[1.22474487, 1.41421356, 1.73205081],
       [2.        , 2.23606798, 2.44948974]])

In [79]:
# Print sines of an array
np.sin(a)

array([0.84147098, 0.90929743, 0.14112001])

In [80]:
# Element-wise cosine
np.cos(b)

array([[ 0.0707372 , -0.41614684, -0.9899925 ],
       [-0.65364362,  0.28366219,  0.96017029]])

In [81]:
# Element-wise natural logarithm
np.log(a)

array([0.        , 0.69314718, 1.09861229])

In [83]:
e

array([[69, 69],
       [69, 69]])

In [84]:
f = np.eye(2)
f

array([[1., 0.],
       [0., 1.]])

In [85]:
e.dot(f)

array([[69., 69.],
       [69., 69.]])

### Comparison

In [86]:
# Element-wise comparison
a == b

array([[False,  True,  True],
       [False, False, False]])

In [87]:
a < 2

array([ True, False, False])

In [88]:
# Array-wise comparison
np.array_equal(a, b)

False

### Aggregate Functions

In [89]:
# Array-wise sum
a

array([1, 2, 3])

In [90]:
a.sum()

6

In [11]:
# count the frequency of a given positive value appearing in the NumPy array.
np.bincount(a)

array([0, 1, 1, 1], dtype=int64)

In [91]:
# Array-wise minimum value
a.min()

1

In [92]:
# Maximum value of an array row
b

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])

In [93]:
b.max(axis=0)

array([4., 5., 6.])

In [96]:
# Cumulative sum of the elements
b.cumsum(axis=1)

array([[ 1.5,  3.5,  6.5],
       [ 4. ,  9. , 15. ]])

In [97]:
# Mean
a

array([1, 2, 3])

In [8]:
# Computes the arithmetic mean (average) of the array elements along a specified axis.
np.mean(a)

2.0

In [9]:
# Computes the weighted average of the array elements. If no weights are provided, it functions the same as np.mean().
np.average(a, weights=)

2.0

In [99]:
# Median
np.median(b)

3.5

In [100]:
# Correlation coefficient
np.corrcoef(a, b)

array([[1.        , 0.98198051, 1.        ],
       [0.98198051, 1.        , 0.98198051],
       [1.        , 0.98198051, 1.        ]])

In [101]:
# Standard deviation
np.std(b)

1.5920810978785667

### Copying Arrays

In [109]:
# Create a view of the array with the same data
h = a.view()

In [110]:
h

array([1, 2, 3])

In [111]:
# Create a copy of the array
h = np.copy(a)

In [112]:
h

array([1, 2, 3])

In [113]:
# Create a deep copy of the array
h = a.copy()

h

array([1, 2, 3])

### Sorting Arrays

In [114]:
# Sort an array
a.sort()

In [115]:
a

array([1, 2, 3])

In [122]:
b.sort()

In [123]:
b

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])

### Subsetting, Slicing, Indexing
**Subsetting**

In [124]:
# Select the element at the 2nd index
a

array([1, 2, 3])

In [125]:
a[2]

3

In [120]:
b

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])

In [126]:
# Select the element at row 0 column 2
b[0, 2]

3.0

**Slicing**

In [127]:
# Select items at index 0 and 1
a[0:2]

array([1, 2])

In [128]:
# Select items at rows 0 and 1 in column 1
b[0:2,1]

array([2., 5.])

In [129]:
# Select all items at row 0
b[:1]

array([[1.5, 2. , 3. ]])

In [5]:
# Reversed array a
a[ : :-1]

array([3, 2, 1])

In [19]:
# Another method to reverse
np.flipud(a)

array([3, 2, 1])

**Boolean Indexing**

In [131]:
# Select elements from a less than 2
a[a < 2]

array([1])

### Array Manipulation
**Transposing Array**

In [132]:
# Permute array dimensions
i = np.transpose(b)

In [133]:
i

array([[1.5, 4. ],
       [2. , 5. ],
       [3. , 6. ]])

In [134]:
i.T

array([[1.5, 2. , 3. ],
       [4. , 5. , 6. ]])

**Changing Array Shape**

In [135]:
# Flatten the array
b.ravel()

array([1.5, 2. , 3. , 4. , 5. , 6. ])

In [136]:
# Reshape, but don’t change data
g

array([[-0.5,  0. ,  0. ],
       [-3. , -3. , -3. ]])

In [140]:
g.reshape(3, -2)

array([[-0.5,  0. ],
       [ 0. , -3. ],
       [-3. , -3. ]])

**Adding/Removing Elements**

In [150]:
# Return a new array with shape (2,6)
g

array([[-0.5,  0. ],
       [ 0. , -3. ],
       [-3. , -3. ]])

In [151]:
g.resize((3, 2))

g

array([[-0.5,  0. ],
       [ 0. , -3. ],
       [-3. , -3. ]])

In [152]:
# Append items to an array
np.append(h, g)

array([ 1. ,  2. ,  3. , -0.5,  0. ,  0. , -3. , -3. , -3. ])

In [153]:
# Insert items in an array
a = np.insert(a, 1, 5)

In [154]:
a

array([1, 5, 2, 3])

In [155]:
# Delete items from an array
np.delete(a,[1])

array([1, 2, 3])

**Combining Arrays**

In [156]:
#  Concatenate arrays
np.concatenate((a,d),axis=0)

array([ 1,  5,  2,  3, 10, 15, 20, 25])

In [158]:
# Stack arrays vertically (row-wise)
np.vstack((a,d))

array([[ 1,  5,  2,  3],
       [10, 15, 20, 25]])

**Splitting Arrays**

In [162]:
# Split the array horizontally at the 2nd
np.hsplit(a, 2)

[array([1, 5]), array([2, 3])]

In [163]:
a

array([1, 5, 2, 3])

In [164]:
# Split the array vertically at the 2nd index
np.vsplit(c, 2)

[array([[[1, 2, 3],
         [4, 5, 6]]]),
 array([[[3, 2, 1],
         [4, 5, 6]]])]

### Convert Pandas Dataframe into Numpy Array

In [15]:
import pandas as pd
import numpy as np
# Pandas DataFrame
df = pd.DataFrame(data={'A': [3, 2, 1], 'B': [6,5,4], 'C': [9, 8, 7]}, 
                  index=['i', 'j', 'k'])
print("Pandas DataFrame: ")
print(df)

# Convert Pandas DataFrame to NumPy Array
np_arr = df.to_numpy()
print("Pandas DataFrame to NumPy array: ")
print(np_arr)


# Convert specific columns of Pandas DataFrame to NumPy array
arr = df[['B', 'C']].to_numpy()
print("Convert B and C columns of Pandas DataFrame to NumPy Array: ")
print (arr)

Pandas DataFrame: 
   A  B  C
i  3  6  9
j  2  5  8
k  1  4  7
Pandas DataFrame to NumPy array: 
[[3 6 9]
 [2 5 8]
 [1 4 7]]
Convert B and C columns of Pandas DataFrame to NumPy Array: 
[[6 9]
 [5 8]
 [4 7]]


### Vectorization in Numpy
**Function Vectorization technically means that the function is applied to all elements in the array.**

In [16]:
import numpy as np
# Define your function
def add(arr1, arr2):
    return (arr1 + arr2)

arr1 = np.array([1,2,3])
arr2 = np.array([4,5,6])

#vectorize add method
vectorized_add = np.vectorize(add)

#call vectorized method
result = vectorized_add(arr1, arr2)

print(result)

[5 7 9]


### How do you find the local peaks (or maxima) in a 1-D NumPy Array?

In [17]:
# Using .where() method:

import numpy as np
# define NumPy array
arr = np.array([1, 4, 8, 1, 3, 5, 1, 6, 1, -5, -1, 19, 2])


maxima_peaks_positions = np.where((arr[1:-1] > arr[0:-2]) * (arr[1:-1] > arr[2:]))[0] + 1
print(maxima_peaks_positions)

[ 2  5  7 11]


### Broadcasting in NumPy

**Broadcasting solves the problem of mismatched shaped arrays by replicating the smaller array along the larger array to ensure both arrays are having compatible shapes for NumPy operations. Performing Broadcasting before Vectorization helps to vectorize operations which support arrays of different dimensions.**

## Pickling and Unpickling

In [165]:
import pickle
import seaborn as sns

In [166]:
df = sns.load_dataset('tips')

In [167]:
df.head()

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.5,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4


In [168]:
filename = 'demo.pickle'

In [169]:
pickle.dump(df, open(filename, 'wb'))

In [170]:
#unserialize it
df = pickle.load(open(filename, 'rb'))

In [171]:
df

Unnamed: 0,total_bill,tip,sex,smoker,day,time,size
0,16.99,1.01,Female,No,Sun,Dinner,2
1,10.34,1.66,Male,No,Sun,Dinner,3
2,21.01,3.50,Male,No,Sun,Dinner,3
3,23.68,3.31,Male,No,Sun,Dinner,2
4,24.59,3.61,Female,No,Sun,Dinner,4
...,...,...,...,...,...,...,...
239,29.03,5.92,Male,No,Sat,Dinner,3
240,27.18,2.00,Female,Yes,Sat,Dinner,2
241,22.67,2.00,Male,Yes,Sat,Dinner,2
242,17.82,1.75,Male,No,Sat,Dinner,2


In [3]:
with open("demo.pickle","wb") as file_handle:
   pickle.dump(my_data, file_handle, pickle.HIGHEST_PROTOCOL)

In [5]:
with open("demo.pickle","rb") as file_handle:
   res_data = pickle.load(file_handle)
   print(res_data)

{'Benz', 'Audi', 'Toyota', 'BMW'}
