# Module 7 Intro to **NDarray**

In [None]:
import numpy as np
x = np.array([[1, 2, 3], [4, 5, 6]], np.int32)
x

array([[1, 2, 3],
       [4, 5, 6]], dtype=int32)

In [None]:
type(x)

numpy.ndarray

In [None]:
x.shape

(2, 3)

In [None]:
x.dtype

dtype('int32')

In [None]:
x[1,2]

6

In [None]:
y = x[:,1]
y

array([2, 5], dtype=int32)

In [None]:
y[0] = 9
y

array([9, 5], dtype=int32)

In [None]:
x

array([[1, 9, 3],
       [4, 5, 6]], dtype=int32)

# **# Module 7 Numpy Basic Operations**

In [None]:
A = np.array([[3,2],[0,1]])
B = np.array([[3,1],[2,1]])
A+B

array([[6, 3],
       [2, 2]])

In [None]:
A*B

array([[9, 2],
       [0, 1]])

In [None]:
A@B

array([[13,  5],
       [ 2,  1]])

In [None]:
A.dot(B)

array([[13,  5],
       [ 2,  1]])

In [None]:
A.transpose()

array([[3, 0],
       [2, 1]])

In [None]:
np.linalg.inv(A)

array([[ 0.33333333, -0.66666667],
       [ 0.        ,  1.        ]])

# **Module 7 Broadcasting**

In [None]:
from numpy import array
a = array([1.0, 2.0, 3.0])
b = array([2.0, 2.0, 2.0])
a * b

array([2., 4., 6.])

In [None]:
a = array([1.0,2.0,3.0])
b = 2.0
a * b

array([2., 4., 6.])

In [None]:
a = array([[ 0.0,  0.0,  0.0],
...            [10.0, 10.0, 10.0],
...            [20.0, 20.0, 20.0],
...            [30.0, 30.0, 30.0]])
b = array([1.0, 2.0, 3.0])
a + b

array([[ 1.,  2.,  3.],
       [11., 12., 13.],
       [21., 22., 23.],
       [31., 32., 33.]])

# **Module 8 Index 取值**

In [None]:
import numpy as np
y = np.arange(35).reshape(5,7)
y

array([[ 0,  1,  2,  3,  4,  5,  6],
       [ 7,  8,  9, 10, 11, 12, 13],
       [14, 15, 16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25, 26, 27],
       [28, 29, 30, 31, 32, 33, 34]])

In [None]:
y[1:5:2,:]

array([[ 7,  8,  9, 10, 11, 12, 13],
       [21, 22, 23, 24, 25, 26, 27]])

In [None]:
y[1:5:2,::3]

array([[ 7, 10, 13],
       [21, 24, 27]])

In [None]:
x = np.arange(10,1,-1)
x

array([10,  9,  8,  7,  6,  5,  4,  3,  2])

In [None]:
x[np.array([3,3,-3,8])]

array([7, 7, 4, 2])

In [None]:
x[np.array([[1,1],[2,3]])]

array([[9, 9],
       [8, 7]])

# **Module 8 Mask & Fancy Indexing**

In [None]:
import numpy as np

rand = np.random.RandomState(42)
x = rand.randint(100, size=10)
print(x)

[51 92 14 71 60 20 82 86 74 74]


In [None]:
[x[3], x[7], x[2]]

[71, 86, 14]

In [None]:
ind = [3, 7, 4]
x[ind]

array([71, 86, 60])

In [None]:
ind = np.array([[3, 7],
        [4, 5]])
x[ind]

array([[71, 86],
       [60, 20]])

# **Module 8** **NDarray** **Statistics**

In [None]:
import numpy as np
normal_array = np.random.normal(5, 0.5, 10)
print(normal_array)

[4.67511302 5.8306368  5.23662839 4.5302857  4.77728861 5.0535762
 5.20500984 4.57895788 5.03124508 5.32208671]


In [None]:
## Min 
print(np.min(normal_array))

### Max 
print(np.max(normal_array))

### Mean 
print(np.mean(normal_array))

### Median
print(np.median(normal_array))

### Sd
print(np.std(normal_array))

4.530285699255719
5.830636795381895
5.024082821957012
5.042410637801121
0.37984550888881646


In [None]:
x = np.array([[4, 9, 2, 10],
        [6, 9, 7, 12]])
np.ptp(x, axis=1)

array([8, 6])

In [None]:
np.ptp(x, axis=0)

array([2, 0, 5, 2])

In [None]:
np.ptp(x)

10

In [None]:
a = np.arange(4).reshape((2,2))
np.amin(a)

0

In [None]:
np.amin(a, axis=0) 

array([0, 1])

In [None]:
np.amin(a, axis=1) 

array([0, 2])

# **Module 9 Numpy NDarray Concatenate**

In [None]:
array = np.arange(9)
array

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [None]:
array2D_1 = array.reshape((3,3))
array2D_1

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [None]:
array2D_2 = np.arange(10,19).reshape(3,3)
array2D_2

array([[10, 11, 12],
       [13, 14, 15],
       [16, 17, 18]])

In [None]:
np.concatenate((array2D_1, array2D_2))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [10, 11, 12],
       [13, 14, 15],
       [16, 17, 18]])

In [None]:
np.concatenate((array2D_1,array2D_2),axis=1)

array([[ 0,  1,  2, 10, 11, 12],
       [ 3,  4,  5, 13, 14, 15],
       [ 6,  7,  8, 16, 17, 18]])

In [None]:
np.concatenate((array2D_1, array2D_2, array2D_1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [10, 11, 12],
       [13, 14, 15],
       [16, 17, 18],
       [ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8]])

In [None]:
np.vstack((array2D_1, array2D_2))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [10, 11, 12],
       [13, 14, 15],
       [16, 17, 18]])

In [None]:
np.hstack((array2D_1, array2D_2))

array([[ 0,  1,  2, 10, 11, 12],
       [ 3,  4,  5, 13, 14, 15],
       [ 6,  7,  8, 16, 17, 18]])

# **Module 9 Numpy Stack**

In [None]:
import numpy as np
arrays = [np.random.randn(3, 4) for _ in range(10)]
arrays

[array([[-0.11958632, -1.35134693,  0.67737591,  1.32835634],
        [-1.25605323, -1.12779919,  1.0014644 ,  0.23392331],
        [ 1.09501375, -2.1593645 ,  0.17782761, -1.92245071]]),
 array([[ 1.42160139,  0.15759594, -1.54611692,  0.43773315],
        [-1.1715107 , -0.92016124,  0.25726774,  1.60036533],
        [-0.58692342, -1.43987218,  1.21932302,  1.18135104]]),
 array([[-0.90235229,  0.99151438,  0.75502257, -0.9669196 ],
        [ 0.07034296, -0.06005171,  0.76332242, -0.06715314],
        [-0.53447863,  1.13304194,  1.1124687 , -0.74478193]]),
 array([[-1.27426822,  0.32484906,  0.86668313, -2.55617078],
        [ 0.84592933,  0.14917312,  1.26763911, -0.03341952],
        [-0.14180917, -1.54555467, -1.031803  , -0.25178066]]),
 array([[-1.04967125, -0.42954821,  1.38669802,  0.22593735],
        [-1.36705396,  2.20343055, -0.41619418, -0.79881153],
        [-1.45332832, -0.05008695, -0.18368011,  2.0016793 ]]),
 array([[-0.8031412 , -1.36474001, -1.67812127,  1.44352029]

In [None]:
np.stack(arrays, axis=0).shape

(10, 3, 4)

In [None]:
np.stack(arrays, axis=1).shape

(3, 10, 4)

In [None]:
np.stack(arrays, axis=2).shape

(3, 4, 10)

In [None]:
import numpy as np
a = np.array([1, 2, 3])
b = np.array([2, 3, 4])
np.stack((a, b))


array([[1, 2, 3],
       [2, 3, 4]])

In [None]:
np.stack((a, b), axis=-1)		  

array([[1, 2],
       [2, 3],
       [3, 4]])

In [None]:
np.stack((a,b),axis=1)

array([[1, 2],
       [2, 3],
       [3, 4]])

In [None]:
np.concatenate((a,b),axis=1)

AxisError: ignored

In [None]:
np.concatenate((a,b),axis=0)

array([1, 2, 3, 2, 3, 4])

In [None]:
import numpy as np
#arrays = [np.array([[1,2,3],[4,5,6]]) for _ in range(3)]
arrays = [np.array([[111,112],[121,122]]),np.array([[211,212],[221,222]]),np.array([[311,312],[321,322]])]
print (arrays)

[array([[111, 112],
       [121, 122]]), array([[211, 212],
       [221, 222]]), array([[311, 312],
       [321, 322]])]


In [None]:
np.stack(arrays, axis=0)

array([[[111, 112],
        [121, 122]],

       [[211, 212],
        [221, 222]],

       [[311, 312],
        [321, 322]]])

In [None]:
np.stack(arrays, axis=1)

array([[[111, 112],
        [211, 212],
        [311, 312]],

       [[121, 122],
        [221, 222],
        [321, 322]]])

In [None]:
np.stack(arrays, axis=2)

array([[[111, 211, 311],
        [112, 212, 312]],

       [[121, 221, 321],
        [122, 222, 322]]])

# **Module 10 Axis in Pandas Dataframe**

In [None]:
import pandas as pd
srs_a = pd.Series([10,30,60,80,90])
srs_b = pd.Series([22, 44, 55, 77, 101])
df = pd.DataFrame({'a': srs_a, 'b': srs_b})
df

Unnamed: 0,a,b
0,10,22
1,30,44
2,60,55
3,80,77
4,90,101


In [None]:
dfx = df.sum(axis=0)
dfx

a    270
b    299
dtype: int64

In [None]:
dfx.loc['b']

299

In [None]:
dfx = df.sum(axis=1)
dfx

0     32
1     74
2    115
3    157
4    191
dtype: int64

In [None]:
dfx.loc[2]

115

# **Module 10 Pandas Missing Data**

In [None]:
import numpy as np
dict = {'First Score':[100, 90, np.nan, 95], 
        'Second Score': [30, 45, 56, np.nan], 
        'Third Score':[np.nan, 40, 80, 98]} 
dict

{'First Score': [100, 90, nan, 95],
 'Second Score': [30, 45, 56, nan],
 'Third Score': [nan, 40, 80, 98]}

In [None]:
import pandas as pd
df = pd.DataFrame(dict) 
# using isnull() function   
df.isnull() 

Unnamed: 0,First Score,Second Score,Third Score
0,False,False,True
1,False,False,False
2,True,False,False
3,False,True,False


# **Module 10 Numpy Missing Data**

In [None]:
import numpy as np
print (np.log(-1), np.log(0), np.log(1))
np.isnan([np.log(-1), np.log(0), np.log(1)])

nan -inf 0.0


  
  
  This is separate from the ipykernel package so we can avoid doing imports until
  This is separate from the ipykernel package so we can avoid doing imports until


array([ True, False, False])

In [None]:
np.array([1.0, 2.0, np.NaN, 7.0])

array([ 1.,  2., nan,  7.])

In [None]:
# sample input ndarray:
x = np.array([1.0, 2.5, np.nan, 1.3, np.inf, 7.2])
print("input array with bad values:")
print(x)

xm = np.ma.masked_invalid(x)
print("masked version:")
print(xm)

input array with bad values:
[1.  2.5 nan 1.3 inf 7.2]
masked version:
[1.0 2.5 -- 1.3 -- 7.2]


In [None]:
x = np.ma.array([1, 2, 3], mask=[False, False, True])
y = np.ma.array([1, 0, 1])
print("x * y =", x * y)
print("x/y =", x/y)
z = np.array([1, np.nan, 2])
print("x * z = ", x * z)
print (" x/z =", x/z)

x * y = [1 0 --]
x/y = [1.0 -- --]
x * z =  [1.0 nan --]
 x/z = [1.0 -- --]


# **Module 10 Pandas Duplicated Data**

In [None]:
# importing pandas package 
import pandas as pd 

# making data frame from csv file 
data = pd.read_csv("./sample_data/employees.csv") 

# sorting by first name 
data.sort_values("First Name", inplace = True) 

# making a bool series 
bool_series = data["First Name"].duplicated() 
bool_series



101    False
327     True
440     True
937     True
137    False
       ...  
902     True
925     True
946     True
947     True
951     True
Name: First Name, Length: 1000, dtype: bool

In [None]:
# displaying data 
print(data.head()) 

# display data 
data[bool_series] 


    First Name Gender Start Date  ... Bonus %  Senior Management             Team
101      Aaron   Male  2/17/2012  ...  11.849               True        Marketing
327      Aaron   Male  1/29/1994  ...   5.097               True        Marketing
440      Aaron   Male  7/22/1990  ...  11.343               True  Client Services
937      Aaron    NaN  1/22/1986  ...  18.424              False  Client Services
137       Adam   Male  5/21/2011  ...  15.120              False     Distribution

[5 rows x 8 columns]


Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
327,Aaron,Male,1/29/1994,6:48 PM,58755,5.097,True,Marketing
440,Aaron,Male,7/22/1990,2:53 PM,52119,11.343,True,Client Services
937,Aaron,,1/22/1986,7:39 PM,63126,18.424,False,Client Services
141,Adam,Male,12/24/1990,8:57 PM,110194,14.727,True,Product
302,Adam,Male,7/5/2007,11:59 AM,71276,5.027,True,Human Resources
...,...,...,...,...,...,...,...,...
902,,Male,5/23/2001,7:52 PM,103877,6.322,,Distribution
925,,Female,8/23/2000,4:19 PM,95866,19.388,,Sales
946,,Female,9/15/1985,1:50 AM,133472,16.941,,Distribution
947,,Male,7/30/2012,3:07 PM,107351,5.329,,Marketing


In [None]:


# passing NOT of bool series to see unique values only 
data = data[~bool_series] 

# displaying data 
print(data.info() )
data 


<class 'pandas.core.frame.DataFrame'>
Int64Index: 201 entries, 101 to 7
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   First Name         200 non-null    object 
 1   Gender             178 non-null    object 
 2   Start Date         201 non-null    object 
 3   Last Login Time    201 non-null    object 
 4   Salary             201 non-null    int64  
 5   Bonus %            201 non-null    float64
 6   Senior Management  200 non-null    object 
 7   Team               197 non-null    object 
dtypes: float64(1), int64(1), object(6)
memory usage: 14.1+ KB
None


Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
101,Aaron,Male,2/17/2012,10:20 AM,61602,11.849,True,Marketing
137,Adam,Male,5/21/2011,1:45 AM,95327,15.120,False,Distribution
300,Alan,Male,6/26/1988,3:54 AM,111786,3.592,True,Engineering
372,Albert,Male,2/1/1997,4:20 PM,67827,19.717,True,Engineering
988,Alice,Female,10/5/2004,9:34 AM,47638,11.209,False,Human Resources
...,...,...,...,...,...,...,...,...
433,Wanda,Female,7/20/2008,1:44 PM,65362,7.132,True,Legal
177,Wayne,Male,4/7/2012,8:00 AM,102652,14.085,True,Distribution
820,William,Male,11/18/1993,12:27 PM,54058,5.182,True,Human Resources
450,Willie,Male,8/22/2009,1:03 PM,55038,19.691,False,Legal


In [None]:
data.drop_duplicates(subset=['First Name'])
data

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
101,Aaron,Male,2/17/2012,10:20 AM,61602,11.849,True,Marketing
137,Adam,Male,5/21/2011,1:45 AM,95327,15.120,False,Distribution
300,Alan,Male,6/26/1988,3:54 AM,111786,3.592,True,Engineering
372,Albert,Male,2/1/1997,4:20 PM,67827,19.717,True,Engineering
988,Alice,Female,10/5/2004,9:34 AM,47638,11.209,False,Human Resources
...,...,...,...,...,...,...,...,...
433,Wanda,Female,7/20/2008,1:44 PM,65362,7.132,True,Legal
177,Wayne,Male,4/7/2012,8:00 AM,102652,14.085,True,Distribution
820,William,Male,11/18/1993,12:27 PM,54058,5.182,True,Human Resources
450,Willie,Male,8/22/2009,1:03 PM,55038,19.691,False,Legal


# **Array Dimensions**

3D Array

In [13]:
import numpy as np
My3DArray = np.arange(12).reshape(2,2,3)
print (My3DArray)

[[[ 0  1  2]
  [ 3  4  5]]

 [[ 6  7  8]
  [ 9 10 11]]]


3D Array contains 2D info

In [10]:
import numpy as np
My3DArray1 = np.arange(4).reshape(2,2,1)
print (My3DArray1)

[[[0]
  [1]]

 [[2]
  [3]]]


In [15]:
My3DArray1 = np.array([[[0],[1]],[[2],[3]]])
print (My3DArray1)

[[[0]
  [1]]

 [[2]
  [3]]]


2D Array

In [11]:
import numpy as np
My2DArray = np.arange(4).reshape(2,2)
print (My2DArray)

[[0 1]
 [2 3]]


In [17]:
My2DArray = np.array([[0,1],[2,3]])
print (My2DArray)

[[0 1]
 [2 3]]


Expand Dimension

In [18]:
y = np.expand_dims(My2DArray, axis=0)
print (y)

[[[0 1]
  [2 3]]]


In [19]:
y = np.expand_dims(My2DArray, axis=1)
print (y)

[[[0 1]]

 [[2 3]]]


In [21]:
y = np.expand_dims(My2DArray, axis=2)
print (y)

[[[0]
  [1]]

 [[2]
  [3]]]
