# Table of Content

- NumPy in Python
- N dimensional array
- Sorting with NumPy array
- Searching with NumPy array
- Time complexity
- Indexing and Slicing
- Selection Techniques
- Pandas in Python
- Series
- Dataframe
- Indexing
- Loading and saving dataframes
- Summary

## NumPy

### Numpy Array

In [None]:
import numpy as np

# 0-D array
new_array = np.array(40)
print(f" \nArray : {new_array} ")
print(f" Type of array : {type(new_array)} ")
print(f" Array dimension : {new_array.ndim}")

#1-D array
new_array = np.array([20,40,60,80])
print(f" \nArray : {new_array} ")
print(f" Type of array : {type(new_array)} ")
print(f" Array dimension : {new_array.ndim}")

#2-D array
new_array = np.array([[20,40], [60,80]])
print(f" \nArray : {new_array} ")
print(f" Type of array : {type(new_array)} ")
print(f" Array dimension : {new_array.ndim}")

#3-D array
new_array = np.array([[[20,40] ,[60,80]]])
print(f" \nArray : {new_array} ")
print(f" Type of array : {type(new_array)} ")
print(f" Array dimension : {new_array.ndim}")

 
Array : 40 
 Type of array : <class 'numpy.ndarray'> 
 Array dimension : 0
 
Array : [20 40 60 80] 
 Type of array : <class 'numpy.ndarray'> 
 Array dimension : 1
 
Array : [[20 40]
 [60 80]] 
 Type of array : <class 'numpy.ndarray'> 
 Array dimension : 2
 
Array : [[[20 40]
  [60 80]]] 
 Type of array : <class 'numpy.ndarray'> 
 Array dimension : 3


### Functions for creating numpy arrays

zeros() function

In [None]:
zeros_array = np.zeros(4, dtype=int)
zeros_array

array([0, 0, 0, 0])

In [None]:
zeros_array = np.zeros((4,2), dtype=int)
zeros_array

array([[0, 0],
       [0, 0],
       [0, 0],
       [0, 0]])

ones() function

In [None]:
ones_array = np.ones((2,4), dtype=int)
ones_array

array([[1, 1, 1, 1],
       [1, 1, 1, 1]])

empty() function

In [None]:
empty_array = np.empty([2, 2])
empty_array

array([[1.27319748e-313, 6.79038653e-313],
       [9.88131292e-323, 1.13948317e-311]])

full() function

In [None]:
full_array = np.full((2, 2), np.inf)
full_array

array([[inf, inf],
       [inf, inf]])

In [None]:
full_array = np.full((2, 2), 10)
full_array

array([[10, 10],
       [10, 10]])

zeros_like() function

In [None]:
num_array = np.arange(8)
num_array = num_array.reshape((4,2))
zeros_like_array = np.zeros_like(num_array)
zeros_like_array

array([[0, 0],
       [0, 0],
       [0, 0],
       [0, 0]])

## Sorting with Numpy array

### Quick sort

#### Using in-build sort() function

In [None]:
import numpy as np
number_array = np.array([20,4,10,8])
print(f"Before sorting : {number_array} ")
sorted_array = np.sort(number_array, kind='quick sort')
print(f"After sorting : {sorted_array} ")

Before sorting : [20  4 10  8] 
After sorting : [ 4  8 10 20] 


#### Using user defined sort function

In [None]:
def quick_sort(numpy_array):

    left_element = []
    equal_element = []
    right_element = []

    if len(numpy_array) > 1:
        pivot_value = numpy_array[0]
        for num in numpy_array:
            if num < pivot_value:
                left_element.append(num)
            elif num == pivot_value:
                equal_element.append(num)
            elif num > pivot_value:
                right_element.append(num)

        return quick_sort(left_element) + equal_element + quick_sort(right_element) #recursively call quick_sort() function
# If there exists only 1 element, then return the array    
    else:  
        return numpy_array
quick_sort(np.array([4,3,5,6]))

[3, 4, 5, 6]

#### Time and space complexity of Quick sort is best as it balances both complexities

| Time Complexity | Best | Average | Worst |
|---|---|---|---|
| Quick Sort | O(nlog(n)) | O(nlog(n)) | O(n^2) |

| Space complexity |
|---|
| O(n) |

## Searching with Numpy array

#### Using in-build searchsorted() function

In [None]:
num_array = np.array([3,4,5,6,11,12,23])
search_element = np.searchsorted(num_array,11) # returns the index of search element of sorted array
search_element

4

In [None]:
search_element = np.searchsorted(num_array,[11, 23, 4, 6]) # returns the list of indices of search elements
search_element

array([4, 6, 1, 3], dtype=int64)

#### Using user defined searching function

In [None]:
def binary_search_algorithm(num_array, low_value, high_value, pivot):
 
    # Check base case
    if high_value >= low_value:
 
        middle_value = (high_value + low_value) // 2
 
        if num_array[middle_value] == pivot: # When element is equal to middle value
            return middle_value
 
        # When element < middle_value, then follow left subarray
        elif num_array[middle_value] > pivot:
            return binary_search(num_array, low_value, middle_value - 1, pivot)
 
        else:       #Else follow right subarray
            return binary_search(num_array, middle_value + 1, high_value, pivot)
 
    else:
        # return -1 when element is not present in the array
        return -1
 
 
num_array = [3,4,5,6,11,12,23]
pivot = 11
element = binary_search_algorithm(num_array, 0, len(num_array)-1, pivot)
 
if element != -1:
    print(f"Given element is present at the {str(element)}th index ")
else:
    print(f"Element is not present in array")

Given element is present at the 4th index 


## Numpy Indexing and slicing

In [None]:
new_array = np.array([1,10,20,2,30,3,40,4,50,5])
new_array[1:6:2]

array([10,  2,  3])

### Field access

In [None]:
new_array[-3:3:-1]

array([ 4, 40,  3, 30])

In [None]:
new_array = np.array([[[1],[2],[3]], [[4],[5],[6]]])
new_array.shape

(2, 3, 1)

In [None]:
new_array[1:2]

array([[[4],
        [5],
        [6]]])

### Slicing

In [None]:
new_array = np.arange(8)
new_array[slice(1,6,2)]

array([1, 3, 5])

### Advance indexing

#### Integer Indexing

In [None]:
new_array = np.array([[10, 2], [6, 4], [5, 1]]) 
integer_indexing = new_array[[0,1,2], [0,1,0]] # row index represents row numbers, column index represnts elemets to be chosen
integer_indexing

array([10,  4,  5])

#### Boolean Indexing

In [None]:
new_array[new_array > 5]

array([10,  6])

In [None]:
new_array = np.array([np.nan, np.nan, 2 ,4, 6 ,10, np.nan])
new_array[~np.isnan(new_array)]

array([ 2.,  4.,  6., 10.])

## Numpy selection techniques

In [None]:
# create an array
arr = np.arange(20)

In [None]:
# numpy.select()
np.select(condlist= [arr<5, arr>7], choicelist= [arr, arr**2])

array([  0,   1,   2,   3,   4,   0,   0,   0,  64,  81, 100, 121, 144,
       169, 196, 225, 256, 289, 324, 361])

In [None]:
# numpy.where()
np.where(arr < 6, arr, 2*arr)

array([ 0,  1,  2,  3,  4,  5, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32,
       34, 36, 38])

In [None]:
# numpy.choose()
choices = [[0, 1, 10, 9], [10, 20, 30, 40],
  [50, 51, 52, 53], [60, 61, 62, 63]]
np.choose([2,3,1,0], choices)

array([50, 61, 30,  9])

In [None]:
# numpy.random.choice()
#creates a uniformr random sample with np.arange() with specified size
np.random.choice(20,4)

array([ 3, 19,  7, 18])

In [None]:
# numpy.invert() - invert the specified number
# 12 binary representation - 00001100
# inverted 12 - 11110011
np.invert(np.array(12, dtype=np.uint8))

243

## Pandas

### Series

In [None]:
# create an empty series
import pandas as pd
series = pd.Series(dtype=object)
series

In [None]:
# creating series from array
arr = np.array([20,30])
series = pd.Series(arr, copy=False)
series

0    20
1    30
dtype: int32

In [None]:
# creating series from dictionary
dictionary = {0: 21, 1: 400, 2: 39, 3: 10}
# keys and Series index should match, if not nan values are added
series = pd.Series(data=dictionary, index=['w', 'x', 'y', 'z'])
series

w   NaN
x   NaN
y   NaN
z   NaN
dtype: float64

In [None]:
# creating series from dictionary
dictionary = {'w': 21, 'x': 400, 'y': 39, 'z': 10}
# keys and Series index should match, if not nan values are added
series = pd.Series(data=dictionary, index=['w', 'x', 'y', 'z'])
series

w     21
x    400
y     39
z     10
dtype: int64

### Series methods

In [None]:
# describe() method
series = pd.Series([2,4,6,8,10,12])
series.describe()

count     6.000000
mean      7.000000
std       3.741657
min       2.000000
25%       4.500000
50%       7.000000
75%       9.500000
max      12.000000
dtype: float64

In [None]:
# count() method
series = pd.Series([2,4,6,8,10,12,np.nan])
series.count() # nan is not counted

6

In [None]:
# append() method
series1 = pd.Series([10, 20])
series2 = pd.Series([40, 60], index=[2, 3])
series1.append(series2)

0    10
1    20
2    40
3    60
dtype: int64

In [None]:
# apply method
# lambda is one liner function
series2.apply(lambda x: x**2)

2    1600
3    3600
dtype: int64

In [None]:
# copy() method
series1_copy = series1.copy()
series1_copy

0    10
1    20
dtype: int64

### Dataframe

Dataframe creation

In [None]:
# creating dataframe(df) from dictionary
dictionary = {'col1': [10.0, 20.0], 'col2': [30.0, np.nan]}
df = pd.DataFrame(data=dictionary)
df

Unnamed: 0,col1,col2
0,10.0,30.0
1,20.0,


In [None]:
# creating df from nd array
df1 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
                   columns=['col1', 'col2', 'col3'])
df1

Unnamed: 0,col1,col2,col3
0,1,2,3
1,4,5,6
2,7,8,9


In [None]:
# creating df from list of dictionaries
list_data = [{"col1": 10, "col2": 20}, {"col1": 50, "col2": 100, "col3": 200}]
df2 = pd.DataFrame(list_data)
df2

Unnamed: 0,col1,col2,col3
0,10,20,
1,50,100,200.0


In [None]:
# creating df from dictionary of lists
dict_data = {"col1": [10.0, 20.0, 30.0, 40.0], "col2": [40.0, 30.0, 20.0, 10.0]}
df3 = pd.DataFrame(dict_data)
df3

Unnamed: 0,col1,col2
0,10.0,40.0
1,20.0,30.0
2,30.0,20.0
3,40.0,10.0


### Dataframe methods

In [None]:
# describe() method
df.describe()

Unnamed: 0,col1,col2
count,2.0,1.0
mean,15.0,30.0
std,7.071068,
min,10.0,30.0
25%,12.5,30.0
50%,15.0,30.0
75%,17.5,30.0
max,20.0,30.0


In [None]:
# count() method
df2.count()

col1    2
col2    2
col3    1
dtype: int64

In [None]:
# append() method
df.append(df1)

Unnamed: 0,col1,col2,col3
0,10.0,30.0,
1,20.0,,
0,1.0,2.0,3.0
1,4.0,5.0,6.0
2,7.0,8.0,9.0


In [None]:
# apply method
df1.apply(lambda x: x**2)

Unnamed: 0,col1,col2,col3
0,1,4,9
1,16,25,36
2,49,64,81


In [None]:
# copy() method
df1_copy = df1.copy()
df1_copy

Unnamed: 0,col1,col2,col3
0,1,2,3
1,4,5,6
2,7,8,9


#### Dataframe groupby method

In [None]:
import pandas as pd
df = pd.read_csv("Week3_Groupby.csv")
df

Unnamed: 0,Employee Id,Name,City,Age,Country
0,1,Anil,Pune,30,India
1,2,Sam,Mumbai,22,India
2,3,Dave,Pune,25,India
3,4,Sakshi,Pune,10,India
4,5,Rana,Nagpur,40,India
5,6,Wasim,Mumbai,33,India
6,7,Raj,Nagpur,43,India
7,8,Ram,Mumbai,44,India
8,9,Sunita,Pune,22,India
9,10,Sarika,Nagpur,49,India


In [None]:
group_df = df.groupby(by="City")
group_df

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001C08A818550>

In [None]:
# To see first values of each group
group_df.first()

Unnamed: 0_level_0,Employee Id,Name,Age,Country
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Mumbai,2,Sam,22,India
Nagpur,5,Rana,40,India
Pune,1,Anil,30,India


In [None]:
group_df.get_group("Pune")

Unnamed: 0,Employee Id,Name,City,Age,Country
0,1,Anil,Pune,30,India
2,3,Dave,Pune,25,India
3,4,Sakshi,Pune,10,India
8,9,Sunita,Pune,22,India


In [None]:
group_df.get_group("Nagpur")

Unnamed: 0,Employee Id,Name,City,Age,Country
4,5,Rana,Nagpur,40,India
6,7,Raj,Nagpur,43,India
9,10,Sarika,Nagpur,49,India


### Pandas Indexing

In [None]:
df4 = pd.DataFrame([[100, 2500], [490, 789], [500, 1500]],
     index=['pune', 'mumbai', 'nagpur'],
     columns=['max_speed(kmph)', 'distance(km)'])
df4

Unnamed: 0,max_speed(kmph),distance(km)
pune,100,2500
mumbai,490,789
nagpur,500,1500


#### loc based indexing

In [None]:
df4.loc['pune']


max_speed(kmph)     100
distance(km)       2500
Name: pune, dtype: int64

In [None]:
df4.loc[['pune', 'nagpur']]


Unnamed: 0,max_speed(kmph),distance(km)
pune,100,2500
nagpur,500,1500


#### iloc based indexing

In [None]:
df4.iloc[[0,2]]

Unnamed: 0,max_speed(kmph),distance(km)
pune,100,2500
nagpur,500,1500


In [None]:
df4.iloc[0]

max_speed(kmph)     100
distance(km)       2500
Name: pune, dtype: int64

In [None]:
df4.iloc[[0]]

Unnamed: 0,max_speed(kmph),distance(km)
pune,100,2500


## Saving and loading dataframe

### CSV data loading and saving

In [None]:
df = pd.read_csv("Week3_Profile_Data.csv")
df

Unnamed: 0,Name,Age,Sex,City,Country
0,Alex,30,Male,NY,USA
1,Anita,23,Female,Pune,India
2,Bliss,60,Female,Mumbai,India
3,Max,40,Male,Chicago,USA


In [None]:
df.to_csv("Week3_Profile_Data.csv")

### Pickle method

Do not unpickle data from untrusted source

In [None]:
import pickle
df.to_pickle("Week3_Profile_Data.pkl")

In [None]:
df1 = pd.read_pickle("Week3_Profile_Data.pkl")
df1

Unnamed: 0,Name,Age,Sex,City,Country
0,Alex,30,Male,NY,USA
1,Anita,23,Female,Pune,India
2,Bliss,60,Female,Mumbai,India
3,Max,40,Male,Chicago,USA


## Happy Learning :)