# Introduction to NumPy and pandas

In [1]:
import numpy as np

In [2]:
array = np.arange(20)
print(type(array))
print(array)

<class 'numpy.ndarray'>
[ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19]


In [3]:
print(array.shape)
print(type(array.shape))

(20,)
<class 'tuple'>


In [4]:
array[3]

3

In [5]:
array[3] = 100
print(array)

[  0   1   2 100   4   5   6   7   8   9  10  11  12  13  14  15  16  17
  18  19]


In [6]:
array = np.arange(9)
print(array)
x = array.reshape(3, 3)
print(x)

[0 1 2 3 4 5 6 7 8]
[[0 1 2]
 [3 4 5]
 [6 7 8]]


In [7]:
np.arange(0, 10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [8]:
np.arange(10, 35, 3)

array([10, 13, 16, 19, 22, 25, 28, 31, 34])

In [9]:
np.zeros((2, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [10]:
np.ones((3, 4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [11]:
np.full((2, 2), 3)

array([[3, 3],
       [3, 3]])

In [12]:
np.eye(3, 3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [13]:
my_list = [1, 2, 3, 4, 5, 6, 7, 8]
my_array = np.array(my_list)
print(my_array)
print(type(my_array))

[1 2 3 4 5 6 7 8]
<class 'numpy.ndarray'>


In [14]:
my_array = my_array.reshape(2, 4)
print(my_array)

[[1 2 3 4]
 [5 6 7 8]]


In [15]:
my_array = my_array.T
print(my_array)

[[1 5]
 [2 6]
 [3 7]
 [4 8]]


In [16]:
max = my_array.max()
min = my_array.min()
mean = my_array.mean()
std = my_array.std(axis = 1)
print("Max: ", max)
print("Min: ", min)
print("Mean: ", mean)
print("Standard Deviation: ", std)

Max:  8
Min:  1
Mean:  4.5
Standard Deviation:  [2. 2. 2. 2.]


In [17]:
num = []
for i in range(0, 5):
    num.append(np.random.randint(0, 2))
num = np.array(num)
print(num)
print(np.unique(num))

[0 1 0 0 0]
[0 1]


In [18]:
x = np.arange(1, 4)
y = np.arange(1, 7, 2)
print(x)
print(y)
np.add(x, y)

[1 2 3]
[1 3 5]


array([2, 5, 8])

In [19]:
num = np.arange(1, 10, dtype = float).reshape(3, 3)
print(num)
print(np.max(num))
print(np.max(num, axis = 0))
print(np.max(num, axis = 1))

[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]]
9.0
[7. 8. 9.]
[3. 6. 9.]


In [20]:
num[1, 2] = np.NaN
print(num)
np.max(num)
np.max(num, axis = 0)

[[ 1.  2.  3.]
 [ 4.  5. nan]
 [ 7.  8.  9.]]


array([ 7.,  8., nan])

In [21]:
import pandas as pd

In [22]:
data = pd.read_excel("test_data.xlsx")

In [23]:
data.head()

Unnamed: 0,f1,f2,f3,f4
0,1,2,3.0,4.0
1,5,6,,7.0
2,0,6,9.0,


In [24]:
data.isnull().sum()

f1    0
f2    0
f3    1
f4    1
dtype: int64

In [25]:
data.dropna()

Unnamed: 0,f1,f2,f3,f4
0,1,2,3.0,4.0


In [26]:
data.dropna(axis = 1)

Unnamed: 0,f1,f2
0,1,2
1,5,6
2,0,6


In [27]:
import numpy as np
from sklearn.impute import SimpleImputer
imr = SimpleImputer(missing_values = np.nan, strategy ='mean')
imr = imr.fit(data)
imputed_data = imr.transform(data)
print(data)
print(imputed_data)

   f1  f2   f3   f4
0   1   2  3.0  4.0
1   5   6  NaN  7.0
2   0   6  9.0  NaN
[[1.  2.  3.  4. ]
 [5.  6.  6.  7. ]
 [0.  6.  9.  5.5]]


In [28]:
data = pd.read_csv('iris.csv')
data.head()

Unnamed: 0,5.1,3.5,1.4,0.2,Iris-setosa
0,4.9,3.0,1.4,0.2,Iris-setosa
1,4.7,3.2,1.3,0.2,Iris-setosa
2,4.6,3.1,1.5,0.2,Iris-setosa
3,5.0,3.6,1.4,0.2,Iris-setosa
4,5.4,3.9,1.7,0.4,Iris-setosa


In [29]:
data = pd.read_csv('iris.csv', header = None)
data.head()

Unnamed: 0,0,1,2,3,4
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [30]:
data.columns = ['sepal length', 'sepal width', 'petal length', 'petal width', 'class']
data.head()

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [31]:
np.unique(data['class'])

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [32]:
mapping = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
data['class'] = data['class'].map(mapping)
data.head()

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [33]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
data['class'] = le.fit_transform(data['class'])
data.head()

Unnamed: 0,sepal length,sepal width,petal length,petal width,class
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0
