# Numpy

In [1]:
import numpy as np

In [2]:
np.linspace(2, 6, 3)

array([2., 4., 6.])

In [8]:
np.linspace(0,(2,2), 4)

array([[0.        , 0.        ],
       [0.66666667, 0.66666667],
       [1.33333333, 1.33333333],
       [2.        , 2.        ]])

In [4]:
np.random.seed(2)
100 * np.random.randn(4,2)

array([[ -41.67578474,   -5.62668272],
       [-213.61960957,  164.02708084],
       [-179.34355852,  -84.17473657],
       [  50.28814172, -124.52880866]])

# Linear Algebra Refresher

### Vector Vector Multiplication

In [12]:
def vector_vector_multiplication(u, v):
    assert u.shape[0] == v.shape[0]

    n = u.shape[0]

    result = 0.0

    for i in range(n):
        result = result + u[i] * v[i]

    return result

In [10]:
u = np.array([2,4,5,6])
v = np.array([1,0,0,2])

In [13]:
vector_vector_multiplication(u, v)

np.float64(14.0)

In [15]:
# This is how numpy does the vector multiplication
u.dot(v)

np.int64(14)

### Matrix Vector Multiplication

In [16]:
U = np.array([
    [2,4,5,6],
    [1,2,1,2],
    [3,1,2,1]
])

In [21]:
def matrix_vector_multiplication(U, v):
    assert U.shape[1] == v.shape[0]
    num_rows = U.shape[0]
    result = np.zeros(num_rows)

    for i in range(num_rows):
        result[i] = vector_vector_multiplication(U[i], v)

    return result

In [22]:
matrix_vector_multiplication(U, v)

array([14.,  5.,  5.])

In [23]:
# This is how numpy does the matrix-vector multiplication
U.dot(v)

array([14,  5,  5])

### Matrix Matrix Multiplication

In [33]:
U = np.array([
    [2,4,5,6],
    [1,2,1,2],
    [3,1,2,1]
])

V = np.array([
    [1,1,2],
    [0,0.5,1],
    [0,2,1],
    [2, 1, 0]
])

In [47]:
def matrix_matrix_multiplication(U, V):
    assert U.shape[1] == V.shape[0] # U.shape[1] is 4, V.shape[0] is 4

    num_rows = U.shape[0] # equal to 3
    num_cols = V.shape[1] # equal to 3
    
    result = np.zeros((num_rows, num_cols)) # 3 by 3 matrix
    
    for i in range(num_cols):
        vi = V[:, i] # i th column of V
        Uvi =  matrix_vector_multiplication(U, vi)
        result[:,i] = Uvi
    return result
    

In [48]:
matrix_matrix_multiplication(U,V)

array([[14. , 20. , 13. ],
       [ 5. ,  6. ,  5. ],
       [ 5. ,  8.5,  9. ]])

# Identity Matrix (I)

Identity Matrix (I): like number 1 for matrices. There are 1s in diagonal and 0s everywhere else. You multiply it with any matrix U and you get matrix U back. It is used for inverse matrices

In [49]:
Vs = V[[0,1,2]]
Vs

array([[1. , 1. , 2. ],
       [0. , 0.5, 1. ],
       [0. , 2. , 1. ]])

In [50]:
Vs_inv = np.linalg.inv(Vs)
Vs_inv

array([[ 1.        , -2.        ,  0.        ],
       [ 0.        , -0.66666667,  0.66666667],
       [ 0.        ,  1.33333333, -0.33333333]])

In [53]:
Vs_inv.dot(Vs)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

### A practical use case for matrix inversion use

Solving linear regression is the most practical use case you'll encounter in analytics.

When you fit a linear model (y = Xβ + ε), the solution for coefficients is:
β = (X'X)⁻¹X'y
This requires inverting the X'X matrix to find the best-fit parameters.
Concrete example:
You're analyzing production efficiency at Lego. You have data on:

y = defect rate
X = [temperature, humidity, machine_speed]

In [62]:
import numpy as np

# Your data
X = np.array([[20, 60, 100],
              [22, 65, 105],
              [25, 70, 110]])
y = np.array([2.5, 2.8, 3.2])

# Solve for coefficients using matrix inversion
XtX = X.T @ X
XtX_inv = np.linalg.inv(XtX)
beta = XtX_inv @ X.T @ y

print("Coefficients:", beta)
# Shows how each factor affects defect rate

Coefficients: [ 0.1     0.0375 -0.0175]


In [56]:
X.T

array([[ 20,  22,  25],
       [ 60,  65,  70],
       [100, 105, 110]])

In [57]:
X

array([[ 20,  60, 100],
       [ 22,  65, 105],
       [ 25,  70, 110]])

In [60]:
20 * 20 + 22*22 + 25*25

1509

In [55]:
XtX

array([[ 1509,  4380,  7060],
       [ 4380, 12725, 20525],
       [ 7060, 20525, 33125]])

In [61]:
XtX_inv

array([[ 6.      , -4.525   ,  1.525   ],
       [-4.525   ,  3.550625, -1.235625],
       [ 1.525   , -1.235625,  0.440625]])

In [64]:
XtX = X.T @ X
XtX

array([[ 1509,  4380,  7060],
       [ 4380, 12725, 20525],
       [ 7060, 20525, 33125]])

# Pandas

In [1]:
import pandas as pd

In [11]:
data = [
    ['Nissan', 'Stanza', 1991, 138, 4, 'MANUAL', 'sedan', 2000],
    ['Hyundai', 'Sonata', 2017, None, 4, 'AUTOMATIC', 'Sedan', 27150],
    ['Lotus', 'Elise', 2010, 218, 4, 'MANUAL', 'convertible', 54990],
    ['GMC', 'Acadia',  2017, 194, 4, 'AUTOMATIC', '4dr SUV', 34450],
    ['Nissan', 'Frontier', 2017, 261, 6, 'MANUAL', 'Pickup', 32340],
]

columns = [
    'Make', 'Model', 'Year', 'Engine HP', 'Engine Cylinders',
    'Transmission Type', 'Vehicle_Style', 'MSRP'
]

In [12]:
df = pd.DataFrame(data, columns=columns)

In [13]:
df

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
2,Lotus,Elise,2010,218.0,4,MANUAL,convertible,54990
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


In [16]:
df.loc[df["Make"]== "Lotus"]

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
2,Lotus,Elise,2010,218.0,4,MANUAL,convertible,54990


In [17]:
df.index = ['a', 'b', 'c', 'd', 'e']
df

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
a,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
b,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
c,Lotus,Elise,2010,218.0,4,MANUAL,convertible,54990
d,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
e,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


In [22]:
df = df.reset_index(drop=True)
df

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
2,Lotus,Elise,2010,218.0,4,MANUAL,convertible,54990
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


In [23]:
df['Engine HP'] / 2

0     69.0
1      NaN
2    109.0
3     97.0
4    130.5
Name: Engine HP, dtype: float64

In [25]:
df[df['Year'] > 2014]

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
1,Hyundai,Sonata,2017,,4,AUTOMATIC,Sedan,27150
3,GMC,Acadia,2017,194.0,4,AUTOMATIC,4dr SUV,34450
4,Nissan,Frontier,2017,261.0,6,MANUAL,Pickup,32340


### Filtering

In [29]:
df[(df['Make']== 'Nissan') & (df['Model']== 'Stanza')]

Unnamed: 0,Make,Model,Year,Engine HP,Engine Cylinders,Transmission Type,Vehicle_Style,MSRP
0,Nissan,Stanza,1991,138.0,4,MANUAL,sedan,2000


In [32]:
df['Transmission Type'].str.lower()

0       manual
1    automatic
2       manual
3    automatic
4       manual
Name: Transmission Type, dtype: object

In [37]:
df['Engine HP'].fillna(0)

0    138.0
1      0.0
2    218.0
3    194.0
4    261.0
Name: Engine HP, dtype: float64

In [38]:
df.MSRP.min()

np.int64(2000)

In [45]:
df.nunique()

Make                 4
Model                5
Year                 3
Engine HP            4
Engine Cylinders     2
Transmission Type    2
Vehicle_Style        5
MSRP                 5
dtype: int64

### Grouping

In [49]:
df.groupby('Make').Make.count()

Make
GMC        1
Hyundai    1
Lotus      1
Nissan     2
Name: Make, dtype: int64

In [50]:
df.to_dict(orient = 'records')

[{'Make': 'Nissan',
  'Model': 'Stanza',
  'Year': 1991,
  'Engine HP': 138.0,
  'Engine Cylinders': 4,
  'Transmission Type': 'MANUAL',
  'Vehicle_Style': 'sedan',
  'MSRP': 2000},
 {'Make': 'Hyundai',
  'Model': 'Sonata',
  'Year': 2017,
  'Engine HP': nan,
  'Engine Cylinders': 4,
  'Transmission Type': 'AUTOMATIC',
  'Vehicle_Style': 'Sedan',
  'MSRP': 27150},
 {'Make': 'Lotus',
  'Model': 'Elise',
  'Year': 2010,
  'Engine HP': 218.0,
  'Engine Cylinders': 4,
  'Transmission Type': 'MANUAL',
  'Vehicle_Style': 'convertible',
  'MSRP': 54990},
 {'Make': 'GMC',
  'Model': 'Acadia',
  'Year': 2017,
  'Engine HP': 194.0,
  'Engine Cylinders': 4,
  'Transmission Type': 'AUTOMATIC',
  'Vehicle_Style': '4dr SUV',
  'MSRP': 34450},
 {'Make': 'Nissan',
  'Model': 'Frontier',
  'Year': 2017,
  'Engine HP': 261.0,
  'Engine Cylinders': 6,
  'Transmission Type': 'MANUAL',
  'Vehicle_Style': 'Pickup',
  'MSRP': 32340}]