## Let us start with NumPy

In [1]:
import numpy as np

In [2]:
# Creating a one-dimensional array
one_d = np.array([1, 2, 3, 4, 5, 6])

In [3]:
# Creating a two-dimensional array
two_d = np.array([(1, 2, 3, 4, 5, 6), (7, 8, 9, 10, 11, 12)])

In [4]:
one_d

array([1, 2, 3, 4, 5, 6])

In [5]:
two_d

array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12]])

In [6]:
# Creating a 3x4 array in which all elements are 0
zero = np.zeros( (3,4) )

In [7]:
zero

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [8]:
# Squaring the array one_d
one_d**2

array([ 1,  4,  9, 16, 25, 36], dtype=int32)

In [9]:
# Note that the result from above isn't stored in place
one_d

array([1, 2, 3, 4, 5, 6])

In [10]:
# Remeber to assign result if you want to use it later
squared_one_d = one_d**2
squared_one_d

array([ 1,  4,  9, 16, 25, 36], dtype=int32)

In [11]:
# Some operations are in-place, i.e. modify the existing array instead of creating a new one
one_d *= 2

In [12]:
one_d

array([ 2,  4,  6,  8, 10, 12])

In [13]:
# A simple example of indexing
two_d[1][2]

9

In [14]:
# Some simple examples of slicing
two_d[0][3:6]

array([4, 5, 6])

In [15]:
two_d[1][3:6]

array([10, 11, 12])

In [16]:
# You get all elements in the second row since it has only 6 elements
two_d[1][:6]

array([ 7,  8,  9, 10, 11, 12])

In [17]:
# Let's play around with the shape
two_d.shape

(2, 6)

In [18]:
# Transpose
two_d.T

array([[ 1,  7],
       [ 2,  8],
       [ 3,  9],
       [ 4, 10],
       [ 5, 11],
       [ 6, 12]])

In [19]:
# Let's try to reshape it 
two_d.reshape(3, 4)

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

In [20]:
# Again, remember that not all operations are in-place! 
two_d

array([[ 1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12]])

In [21]:
# Copying data could be confusing for beginners 

In [22]:
# Method 1: copy1 and one_d are the SAME array (doesn't actually create a copy)
# If you modify copy1, it will modify one_d too! Be careful!!
copy1 = one_d

In [23]:
copy1 is one_d

True

In [24]:
# Method 2: Shallow copy: Creates a new object that points to the same data as one_d
copy2 = one_d.view()

In [25]:
copy2 is one_d

False

In [26]:
copy2.base is one_d

True

In [27]:
# Changing copy2 will also alter one_d 
copy2[3] = 100

In [28]:
copy2

array([  2,   4,   6, 100,  10,  12])

In [29]:
one_d

array([  2,   4,   6, 100,  10,  12])

In [30]:
# Method 3: Deep copy: Creates a new object and its own copy of the data
copy3 = one_d.copy()

In [31]:
# Altering this deep copy does NOT alter one_d
copy3[5] = 1000

In [32]:
copy3

array([   2,    4,    6,  100,   10, 1000])

In [33]:
copy1

array([  2,   4,   6, 100,  10,  12])

## Moving on to Pandas

In [34]:
import pandas as pd

In [35]:
# This is a dictionary
data = {
    'INF552': [77, 81, 80, 78, 79, 79, 80, 81], 
    'INF556': [76, 78, 82, 76, 77, 83, 81, 77]
}

In [36]:
data

{'INF552': [77, 81, 80, 78, 79, 79, 80, 81],
 'INF556': [76, 78, 82, 76, 77, 83, 81, 77]}

In [37]:
# Converting it into a Pandas dataframe
dataframe = pd.DataFrame(data)

In [38]:
dataframe

Unnamed: 0,INF552,INF556
0,77,76
1,81,78
2,80,82
3,78,76
4,79,77
5,79,83
6,80,81
7,81,77


In [39]:
# Let's now add an index for our dataframe
enrollment = pd.DataFrame(data, index=['2019', '2018', '2017', '2016', '2015', '2014', '2013', '2012'])

In [40]:
enrollment

Unnamed: 0,INF552,INF556
2019,77,76
2018,81,78
2017,80,82
2016,78,76
2015,79,77
2014,79,83
2013,80,81
2012,81,77


In [41]:
# Having an index helps us in the way you'd expect:
print("Enrollment in 2019 was: \n%s" % enrollment.loc['2019'])

Enrollment in 2019 was: 
INF552    77
INF556    76
Name: 2019, dtype: int64


In [42]:
# Getting the main high-level information of a dataframe
enrollment.info()

<class 'pandas.core.frame.DataFrame'>
Index: 8 entries, 2019 to 2012
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   INF552  8 non-null      int64
 1   INF556  8 non-null      int64
dtypes: int64(2)
memory usage: 512.0+ bytes


In [43]:
enrollment.shape

(8, 2)

In [44]:
# Sometimes getting a quick look at the top few rows of a dataframe is helpful 
enrollment.head()

Unnamed: 0,INF552,INF556
2019,77,76
2018,81,78
2017,80,82
2016,78,76
2015,79,77


In [45]:
# Or sometimes you just wanna see the last few rows!
enrollment.tail()

Unnamed: 0,INF552,INF556
2016,78,76
2015,79,77
2014,79,83
2013,80,81
2012,81,77


In [46]:
# Can also easily read in data in different formats (eg. CSV, JSON, SQL db) and then manipulate as usual
# This next command would import the data stored in 'your_file_name.csv' into the dataframe df
# df = pd.read_csv('your_file_name.csv')

In [47]:
# Some examples of how Pandas can help do data manipualtion quickly 
temp = enrollment.append(enrollment)

In [48]:
temp.shape

(16, 2)

In [49]:
# Not in-place though!!
temp.drop_duplicates()

Unnamed: 0,INF552,INF556
2019,77,76
2018,81,78
2017,80,82
2016,78,76
2015,79,77
2014,79,83
2013,80,81
2012,81,77


In [50]:
temp.shape

(16, 2)

In [51]:
# This is how to do it in-place
temp.drop_duplicates(inplace = True)

In [52]:
temp.shape

(8, 2)

## Moving on to Scikit-learn

In [53]:
from sklearn import datasets
from sklearn import metrics
from sklearn.neighbors import KNeighborsClassifier

In [54]:
# Loading an in-built dataset
# See more at: https://scikit-learn.org/stable/auto_examples/datasets/plot_iris_dataset.html
# And https://machinelearningmastery.com/a-gentle-introduction-to-scikit-learn-a-python-machine-learning-library/ 
# Dataset consists of 3 different types of irises’ (Setosa, Versicolour, and Virginica) petal and 
# sepal length, stored in a 150x4 numpy.ndarray
dataset = datasets.load_iris()

In [55]:
dataset.target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

In [56]:
# Running a K-Nearest Neighbors model on the data
knn = KNeighborsClassifier()
knn.fit(dataset.data, dataset.target)

KNeighborsClassifier()

In [57]:
ground_truth = dataset.target
predictions = knn.predict(dataset.data)

In [58]:
print(metrics.classification_report(ground_truth, predictions))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        50
           1       0.96      0.94      0.95        50
           2       0.94      0.96      0.95        50

    accuracy                           0.97       150
   macro avg       0.97      0.97      0.97       150
weighted avg       0.97      0.97      0.97       150

