### Basics of Linear Algebra for Machine Learning -- Discover the Mathematical Language of Data in Python

## Introduction
- linear algebra is the math of data; matrices and vectors are the language of data
- linear algebra is a valuable tool in statistics
- matrix fatorization (used in PCA), linear least squares (used in regression), eigendecomposition, singular value decomposition (used for dimension reduction, feature selection, visualization, noise reduction...)
- row -> each observation; column -> features


## Numpy

### Basic ndarray operations

In [2]:
# np.array()
from numpy import array
import numpy as np
data = [1,2,3,4,5]

data = array(data)
type(data)

numpy.ndarray

In [21]:
# array indexing

data2 = [[1,2,3,4,5],[6,7,8,9,0]]  # 2x5
data2 = array(data2)

# both methods will do
print(data2[0,1])
print(data2[0][1])

# same method
print(data2[0])
print(data2[0,])

2
2
[1 2 3 4 5]
[1 2 3 4 5]


In [37]:
# slicing
data2 = [[1,2,3,4,5],
         [6,7,8,9,0]]
data2 = array(data2)

data2[0:1][2:] # this returns empty of shape=(0,5)
data2[0:1,2:] # this is the correct expression

# for slicing X (input) and y (output features)
X = data2[:, :-1]
y = data2[:, -1]  # [5,0], becoming 1D array, y.shape = (2,)

# for spliting train and test rows
split = 3
train = data2[:split, :]
test = data2[split:, :]
train, test = data2[:split, :], data2[split:, :]




In [65]:
# reshape 1D to 2D
# after slicing, y may become 1D array
# eg. scikit-learn requires 2-D array to be the output variable

# np.reshape(array, (n,m))
# ndarray.reshape(n,m)

arrayA = np.arange(8)
# arrayA = array([0, 1, 2, 3, 4, 5, 6, 7])

# same expression
np.reshape(arrayA, (2, 4))  # if nxm not match -> arise error
arrayA.reshape(2,4)
arrayA.reshape((2,4)) # both legit
arrayA.reshape(-1,4) # use -1 as an unspecified argument

arrayA = arrayA.reshape(-1,1)  # convert to nx1, 2D array
arrayA.shape # (8,) to (8,1)

data = array([[11,22],
       [33,44],
       [55,66]])   # shape = (3,2)
data = data.reshape(data.shape[0],data.shape[1],1)  # (3,2) 2D to (3,2,1) 3D array
data

array([[[11],
        [22]],

       [[33],
        [44]],

       [[55],
        [66]]])

In [5]:
# Array addition, broad casting

A = array([[1,2,3],
         [1,2,3]])
B = array([1])  
C = array([3,2,1]) 
A+B ## B form a 2x3 array, to add on A; 234,234
A+C # C form 2x3 array ,to add on A; 444,444

array([[4, 4, 4],
       [4, 4, 4]])

### Vectors and vector arithmetic
- np.dot(a,b)
- for 1D array -> inner product
- for 2D array -> matrix mutiplication

In [27]:
from numpy import array
v = array([1,2,3,4,5])

# vector multiplication
# NOT dot product
a = array([1,2,3])
b = array([4])
a*b  # array([ 4,  8, 12])

c = array([3,4,5])
a*c  # array([ 3,  8, 15])

# vector division
a = array([1,2,3])
a/a  # array([1., 1., 1.])

# dot product
a.dot(a)  # 1*1 + 2*2 + 3*3 = 14
np.dot(a,a) # 14

# for 1D array -> inner product; for 2D array -> matrix mutiplication


14

In [6]:
# Vector norm = vector length

# L1 norm -> sum of absolute values of the vector, aka taxicab norm, Manhattan norm
# L2 norm -> 平方和開根號
# max norm -> calculated as maxinum vector values

from numpy.linalg import norm

a = array([1,2,3])
norm(a,1)  # 6
norm(a,2)  # 3.7416573867739413
norm(a) # 3.7416573867739413, default parameter is 2
norm(a,np.inf) # aka maxnorm, 3

3.0

In [24]:
# Matrices, matrix arithmetic
from numpy import array
import numpy as np

A = array([[1,2,3],
          [4,5,6]])
A[0,2]==A[0][2]  # True, same expression

# Madamard product  C = A o B, element x element
A*A  # [1,4,9],[16,25,36]

C = array([2,2,2])
A*C  # [2,4,6],[8,10,12], broadcasting

# Division is also legit
A/A  # [1,1,1],[1,1,1]

# Matrix-Matrix multiplication, dot product, C = A•B = AB
A = array([[1,2,3],
           [4,5,6]])
B = array([[1,2],
           [3,4],
           [5,6]])
np.dot(A,B)
A@B 
A.dot(B)  # all the same

array([[22, 28],
       [49, 64]])

In [25]:
A = array([[1,2,3],
           [4,5,6]])
B = array([1,2,3])
A@B

array([14, 32])

In [70]:
# Forms of matrices
# square, symmetric, upper triangular, lower triangular, diagonal, identity, orthogonal

# calculating triangular form
import numpy as np
import numpy.linalg as la

M = array([[1,2,3],
          [4,5,6],
          [7,8,9]])
np.triu(M)
np.tril(M)  # NOT in LU factorization

# diagonal matrix
# 對角線不為零，其他為零； 不一定要 square
np.diag(M)  # array([1, 5, 9])
np.diag(np.diag(M))  # 回傳剩下的matrix  [1,0,0],[0,5,0],[0,0,9]

# creating identity matrix
np.identity(4)

# orthogonal matrix 正交矩陣
# 2 vectors are orthogonal if their dot product =0
# orthonomal -> orthogonal and normalized (lenth=1)
# transpose of orthogonal matrix = inverse of orthogonal matrix
# Q.T = Q.inverse
# Q•Q.T = Q.T•Q = I

AA = np.array([[1,0],[0,-1]])
AA.T == la.inv(AA)  # True for all, AA.transpose = AA.inverse -> AA is orthogonal
AA@AA.T # = I2

array([[1, 0],
       [0, 1]])

In [83]:
# inverse of matrix
import numpy.linalg as la

X = np.array([[1,2],[3,4]])
la.inv(X)


array([[-2. ,  1. ],
       [ 1.5, -0.5]])

In [96]:
# Trace, square matrix
import numpy as np
np.trace([[1,2],
          [3,4]])  # 5

# Determinant, square matrix, noted as |A| or det(A)
import numpy.linalg as la
la.det([[1,2],
        [3,4]])  # -2

# Rank
# num of independent row/col 
# python uses single value decomposition method
import numpy.linalg as la
la.matrix_rank([[1,2],
                [3,4]])  # 2

2

In [107]:
# Sparse matrix
# comprised of mostly 0s -> computational savings
# sparsity = num of 0s / num of elements
# one hot encoding, count encoding, NLP, recommendation system
# CSR, compressed sparse row is often used to represent sparse matrices in machine learning

import numpy as np
from scipy.sparse import csr_matrix

A = array([[1,0,0,1,0,0],
          [0,0,2,0,0,1],
          [0,0,0,2,0,0]])
# convert to sparse matrix, CSR method
S = csr_matrix(A)
print(S)
B = S.todense()
print(B)

sparsity = 1-np.count_nonzero(A)/A.size
sparsity

  (0, 0)	1
  (0, 3)	1
  (1, 2)	2
  (1, 5)	1
  (2, 3)	2
[[1 0 0 1 0 0]
 [0 0 2 0 0 1]
 [0 0 0 2 0 0]]


0.7222222222222222

In [110]:
# tensor dot
# A of m-dimension, B of n-dimension, tensordot(A,B) = A⨂B -> m+n dimension
import numpy as np

A = np.array([[1,2],[3,4]])  # 2D
B = np.array([[1,2,3],[4,5,6],[7,8,9]]) # 3D
np.tensordot(A,B,axes=0)  # axes must be 0

array([[[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8,  9]],

        [[ 2,  4,  6],
         [ 8, 10, 12],
         [14, 16, 18]]],


       [[[ 3,  6,  9],
         [12, 15, 18],
         [21, 24, 27]],

        [[ 4,  8, 12],
         [16, 20, 24],
         [28, 32, 36]]]])

### Matrix factorization, decomposition
- LU factorization
- QR factorization
- Cholesky decomposition

In [116]:
# LU factorization, PLU factorization
# for square matrix
# A = LU
# L = lower triangle matrix; U = upper triangle matrix
# A = LUP, P to permute the rows of parent matrix

import numpy as np
from scipy.linalg import lu

A = array([[1,2,3],
           [4,5,6],
           [7,8,9]])
# convert A to P, L, U
P,L,U = lu(A) 
print(P)
print(L)
print(U)
print(L@U) # LU 
print(P@L@U)  # PLU = A

[[0. 1. 0.]
 [0. 0. 1.]
 [1. 0. 0.]]
[[1.         0.         0.        ]
 [0.14285714 1.         0.        ]
 [0.57142857 0.5        1.        ]]
[[ 7.00000000e+00  8.00000000e+00  9.00000000e+00]
 [ 0.00000000e+00  8.57142857e-01  1.71428571e+00]
 [ 0.00000000e+00  0.00000000e+00 -1.58603289e-16]]
[[7. 8. 9.]
 [1. 2. 3.]
 [4. 5. 6.]]
[[1. 2. 3.]
 [4. 5. 6.]
 [7. 8. 9.]]


In [123]:
# QR factorization
# 不一定要 square
# Q is a square matrix, of mxm size
# R is a upper triangle matrix, of mxn size
import numpy as np
import numpy.linalg as la

A = array([[1,2,3],
           [4,5,6],
           [7,8,9]])

# convert A to Q, R

Q,R = la.qr(A, mode='complete')  # mode='reduced' by default
print(Q)
print(R)

[[-0.12309149  0.90453403  0.40824829]
 [-0.49236596  0.30151134 -0.81649658]
 [-0.86164044 -0.30151134  0.40824829]]
[[-8.12403840e+00 -9.60113630e+00 -1.10782342e+01]
 [ 0.00000000e+00  9.04534034e-01  1.80906807e+00]
 [ 0.00000000e+00  0.00000000e+00 -1.11164740e-15]]


In [126]:
# Cholesky decomposition
# for square, symmetric matrix, with values >=0 
# A = L•L.T , L a lower triangular matrix
# A = U.T•U , U a upper triangular matrix
# used to solve linear least squares for linear regression, simulation, optimization methods
# Cholesky decomposition is mor efficient than LU decomposition

import numpy as np
import numpy.linalg as la

A = array([[2,1,1],
           [1,2,1],
           [1,1,2]])
L = la.cholesky(A)  # return L
L@L.T  # 回來本來的 matrix A



array([[2., 1., 1.],
       [1., 2., 1.],
       [1., 1., 2.]])

### Eigendecomposition

In [160]:
# Eigendecomposition
# decomposing a square matrix, into a set of eigenvectors and eigenvalues
# to decompose matrix A -> A𝝊 = 𝝀𝝊
# 𝝊 -> eigenvector; 𝝀 -> eigenvalue, a scalar
# a matrix could have one eigenvector and eigenvalue for each dimension of the parent matrix
# not all square matrices can be decomposed into eigenvectors/eigenvalues
# A = Q•𝜦•Q-1
# Q -> matrix comporised of eigenvectors
# 𝜦 -> upper case of 𝝀, a diagonal matrix comprised of eigenvalues
# eigenvectors are unit vectors, linear length =1
# eigenvalue -> give eigenvector magnitude, scaling it

import numpy
import numpy.linalg as la

A = array([[1,2,3],[4,5,6],[7,8,9]])
value, vector = la.eig(A)  # return sets of eigenvalues and vector
print(value)
print(vector)

# A𝝊 = 𝝀𝝊
eval0 = value[0]
evec0 = vector[:,0] # 1st column, 1st eigenvector, which pairs with 1st eigenvalue

# 𝝀𝝊
print(eval0*evec0)

# A𝝊
print(A@evec0)

# A = Q•𝜦•Q-1
# Q -> matrix comporised of eigenvectors
# 𝜦 -> upper case of 𝝀, a diagonal matrix comprised of eigenvalues
L = np.diag(value)  # create a diagonal matrix, comprised of eigen values

print(vector)
vector@L@la.inv(vector) # A = Q•𝜦•Q-1


[ 1.61168440e+01 -1.11684397e+00 -9.75918483e-16]
[[-0.23197069 -0.78583024  0.40824829]
 [-0.52532209 -0.08675134 -0.81649658]
 [-0.8186735   0.61232756  0.40824829]]
[ -3.73863537  -8.46653421 -13.19443305]
[ -3.73863537  -8.46653421 -13.19443305]
[[-0.23197069 -0.78583024  0.40824829]
 [-0.52532209 -0.08675134 -0.81649658]
 [-0.8186735   0.61232756  0.40824829]]


array([[1., 2., 3.],
       [4., 5., 6.],
       [7., 8., 9.]])