In [149]:
from scipy.linalg import qr, cholesky, pinv, solve, norm
from numpy.random import randn, rand
from numpy.linalg import lstsq, eigvalsh
import numpy as np

In [150]:
# generate random data matrix
n,d = 6,4
X = randn(n,d)

# optional: give it linearly dependent columns
# X[:,3] = X[:,2]

# Understanding the pseudoinverse

In [151]:
# form pseudoinverse
Xd = pinv(X)

In [152]:
# X†X ≈ I_d
Xd @ X

array([[ 1.00000000e+00,  8.79885735e-17, -2.96358894e-17,
         4.76292800e-17],
       [ 5.38808617e-16,  1.00000000e+00,  1.47170786e-16,
        -1.01578453e-17],
       [-9.40987519e-16, -3.79978029e-16,  1.00000000e+00,
        -4.65500701e-17],
       [-1.39198888e-15, -1.41215391e-17,  1.61924690e-16,
         1.00000000e+00]])

In [153]:
np.allclose(Xd @ X, np.identity(4))

True

In [154]:
# XX† !≈ I_n
X @ Xd

array([[ 0.73201794,  0.31517833, -0.30545364, -0.01662556,  0.03318319,
         0.04637713],
       [ 0.31517833,  0.36700078,  0.14039856,  0.18309066,  0.24330578,
         0.14332543],
       [-0.30545364,  0.14039856,  0.4692452 ,  0.11749077,  0.27337729,
         0.21794778],
       [-0.01662556,  0.18309066,  0.11749077,  0.89701245, -0.17396005,
        -0.12048376],
       [ 0.03318319,  0.24330578,  0.27337729, -0.17396005,  0.69200936,
        -0.21871519],
       [ 0.04637713,  0.14332543,  0.21794778, -0.12048376, -0.21871519,
         0.84271428]])

In [155]:
np.allclose(X @ Xd, np.identity(6))

False

In [171]:
Q,R = qr(X)
Q,R = qr(X, mode='economic')

In [172]:
np.allclose(X, Q @ R)

True

In [173]:
Q

array([[-0.0251521 , -0.18701833, -0.08783951,  0.8298757 ],
       [ 0.18544771, -0.12204774, -0.4721129 ,  0.30793455],
       [ 0.36407333, -0.02796421, -0.41261844, -0.40701331],
       [-0.62210597, -0.09666413, -0.69472381, -0.13420684],
       [ 0.55308795,  0.50216719, -0.34060482,  0.13386393],
       [ 0.37350368, -0.82935557, -0.0220644 , -0.12203187]])

In [174]:
R

array([[ 4.10378606, -0.72465696, -0.26191898,  0.21376887],
       [ 0.        , -2.09803959, -0.27895404, -0.62470393],
       [ 0.        ,  0.        , -0.97093259,  0.63815253],
       [ 0.        ,  0.        ,  0.        ,  1.685643  ]])

In [175]:
print(np.allclose(Q.T @ Q, np.identity(Q.shape[1])))
Q.T @ Q

True


array([[ 1.00000000e+00,  1.51461084e-16,  4.07030745e-17,
        -3.08845298e-16],
       [ 1.51461084e-16,  1.00000000e+00, -5.76214059e-18,
        -7.16653888e-17],
       [ 4.07030745e-17, -5.76214059e-18,  1.00000000e+00,
        -1.19357030e-16],
       [-3.08845298e-16, -7.16653888e-17, -1.19357030e-16,
         1.00000000e+00]])

In [176]:
# form data from noisy linear model
wtrue = randn(d)
y = X.dot(wtrue) + .01*randn(n)

In [177]:
# solve least squares problem to estimate w
Q,R = qr(X, mode='economic')
w = solve(R, Q.T @ y)

In [178]:
# how good is our estimate?
norm(w - wtrue)

0.012594473275523564

In [179]:
# compute mean square error
def mse(y,z):
    return sum((y-z)**2)/len(y)
    
mse(y,X.dot(w))

4.6996908866091594e-05

In [181]:
# we can use the numpy.lstsq call instead
w_lstsq = np.linalg.lstsq(X, y, rcond=None)[0]
norm(w_lstsq - w)

1.3256011772999124e-15

# Let's try some stuff from class questions

In [185]:
w = solve(R, Q.T)
w

array([[-0.0252142 ,  0.06080263,  0.12708617, -0.10836708,  0.10045678,
         0.16739217],
       [-0.11250352, -0.0768373 ,  0.04982135, -0.01839755, -0.31657898,
         0.42016123],
       [ 0.4140501 ,  0.6063151 ,  0.26627071,  0.66319295,  0.40299726,
        -0.02485709],
       [ 0.49231996,  0.18268076, -0.24145878, -0.0796176 ,  0.07941417,
        -0.07239485]])

In [187]:
pinv(X)

array([[-0.0252142 ,  0.06080263,  0.12708617, -0.10836708,  0.10045678,
         0.16739217],
       [-0.11250352, -0.0768373 ,  0.04982135, -0.01839755, -0.31657898,
         0.42016123],
       [ 0.4140501 ,  0.6063151 ,  0.26627071,  0.66319295,  0.40299726,
        -0.02485709],
       [ 0.49231996,  0.18268076, -0.24145878, -0.0796176 ,  0.07941417,
        -0.07239485]])

# Compute QR by hand

In [208]:
np.random.seed(0)
n,d = 6,4
X = randn(n,d)

n,d = X.shape 
X0 = X.copy()
R = np.zeros((n,d))
Q = np.zeros((n,n))

# first column of Q points in direction of first column of X
r = norm(X[:,0])
Q[:,0] = X[:,0]/r
Q

array([[ 0.44184904,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ],
       [ 0.4677745 ,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ],
       [-0.02585363,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ],
       [ 0.19062007,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ],
       [ 0.37422778,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ],
       [-0.63945727,  0.        ,  0.        ,  0.        ,  0.        ,
         0.        ]])

In [209]:
# ensure Q*R matches X on first column
R[0,0] = r

In [210]:
# verify Q*R matches X in first column
(Q@R - X)[:,0]

array([0., 0., 0., 0., 0., 0.])

In [211]:
# now delete that part from X; we've covered it already
X[:,0] -= Q[:,0]*R[0,0]

In [212]:
# verify Q*R + X = X0
np.isclose(Q@R + X, X0)

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [213]:
# eliminate component of other columns in direction of first column of Q 
for j in range(1,d):
    R[0,j] = Q[:,0].dot(X[:,j])
    X[:,j] -= Q[:,0]*R[0,j]
R

array([[ 3.99243226, -0.76249543,  0.52215538,  1.1002987 ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ]])

In [214]:
# verify Q*R + X = X0
np.isclose(Q@R + X, X0)

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [215]:
# now for all the columns!
X = X0.copy()
Q *= 0
R *= 0

# compute the QR decomposition
for i in range(d):
    r = norm(X[:,i])
    Q[:,i] = X[:,i]/r
    for j in range(i,d):
        R[i,j] = Q[:,i].dot(X[:,j])
        X[:,j] -= Q[:,i]*R[i,j]
    print("iteration",i,": QR + X = X0?", np.isclose(Q@R + X, X0).all())

iteration 0 : QR + X = X0? True
iteration 1 : QR + X = X0? True
iteration 2 : QR + X = X0? True
iteration 3 : QR + X = X0? True


In [216]:
Q

array([[ 4.41849036e-01,  6.76661796e-01,  2.88664404e-01,
         2.08483287e-01,  0.00000000e+00,  0.00000000e+00],
       [ 4.67774496e-01, -5.69742958e-01,  6.08427434e-01,
         2.65340927e-01,  0.00000000e+00,  0.00000000e+00],
       [-2.58536263e-02,  3.58851761e-01,  6.27117449e-04,
         4.20404933e-01,  0.00000000e+00,  0.00000000e+00],
       [ 1.90620072e-01,  2.45139212e-01,  1.51328983e-01,
        -2.10382069e-01,  0.00000000e+00,  0.00000000e+00],
       [ 3.74227783e-01,  7.36171596e-02,  5.45344851e-02,
        -7.89702969e-01,  0.00000000e+00,  0.00000000e+00],
       [-6.39457265e-01,  1.52428573e-01,  7.21535900e-01,
        -2.03708641e-01,  0.00000000e+00,  0.00000000e+00]])

In [217]:
R

array([[ 3.99243226, -0.76249543,  0.52215538,  1.1002987 ],
       [ 0.        ,  1.08926658,  0.43627892,  2.03022353],
       [ 0.        ,  0.        ,  1.56864112,  0.02410644],
       [ 0.        ,  0.        ,  0.        ,  1.79387955],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ]])

In [218]:
"""Our very own QR function to compute the economy QR"""
def ourQR(X0):
    X = X0.copy()
    n,d = X.shape
    R = np.zeros((n,d))
    Q = np.zeros((n,n))

    # compute the QR decomposition
    for i in range(d):
        r = norm(X[:,i])
        Q[:,i] = X[:,i]/r
        for j in range(i,d):
            R[i,j] = Q[:,i].dot(X[:,j])
            X[:,j] -= Q[:,i]*R[i,j]
    return Q,R

In [219]:
# solve least squares problem to estimate w
Q,R = ourQR(X0)
w_byhand = solve(R[:d,:d], (Q.T @ y)[:d])

In [220]:
norm(w_byhand - w)

ValueError: operands could not be broadcast together with shapes (4,) (4,6) 

# The importance of the permutation

In [221]:
X0 = np.zeros((3,3))
X0[:,0] = [1,1,0]
X0[:,1] = [0,1,0]
X0[:,2] = [0,1,1]
X0

Y = X0.copy()
Y[:,1] = X0[:,0]
Y[:,0] = X0[:,1]
X0

array([[1., 0., 0.],
       [1., 1., 1.],
       [0., 0., 1.]])

In [222]:
Y

array([[0., 1., 0.],
       [1., 1., 1.],
       [0., 0., 1.]])

In [226]:
# try X0 vs Y 
X = X0.copy()
X = Y.copy()
X

array([[0., 1., 0.],
       [1., 1., 1.],
       [0., 0., 1.]])

In [227]:
# now for all the columns!
n,d = X.shape
Q = np.zeros((n,n))
R = np.zeros((n,d))

# compute the QR decomposition
for i in range(d):
    r = norm(X[:,i])
    Q[:,i] = X[:,i]/r
    for j in range(i,d):
        R[i,j] = Q[:,i].dot(X[:,j])
        X[:,j] -= Q[:,i]*R[i,j]
    print("iteration",i,": QR + X = X0?", np.isclose(Q@R + X, X0).all())

iteration 0 : QR + X = X0? False
iteration 1 : QR + X = X0? False
iteration 2 : QR + X = X0? False


In [228]:
Q,R

(array([[0., 1., 0.],
        [1., 0., 0.],
        [0., 0., 1.]]),
 array([[1., 1., 1.],
        [0., 1., 0.],
        [0., 0., 1.]]))

The number of nonzeros in Q and R is larger using the first permutation (X0) than the second (Y). Why?

In [147]:
zero = 1e-16
(Q>zero).sum(), (R>zero).sum()

(4, 6)

In [148]:
X0, Y

(array([[1., 0., 0.],
        [1., 1., 1.],
        [0., 0., 1.]]),
 array([[0., 1., 0.],
        [1., 1., 1.],
        [0., 0., 1.]]))

# Same, but symmetric

In [233]:
np.random.seed(0)
X0 = np.zeros((5,5))
X0[:,0] = [rand(),0,0,0,0]
X0[:,1] = [0,rand(),0,0,0]
X0[:,2] = [rand(),0,rand(),0,0]
X0[:,3] = [rand(),rand(),0,0,0]
X0[:,4] = [0,0,rand(),0,rand()]
X0 = X0 + X0.T + np.eye(5)
eigvalsh(X0)

array([0.62140537, 1.48865546, 2.46040092, 2.72872784, 3.10212851])

In [230]:
def swap(X0,i,j):
    Y = X0.copy()
    Y[:,i] = X0[:,j]
    Y[:,j] = X0[:,i]
    return Y

def swap_sym(X0,i,j):
    Y = X0.copy()
    Y[:,i] = X0[:,j]
    Y[:,j] = X0[:,i]
    Y1 = Y.copy()
    Y1[i,:] = Y[j,:]
    Y1[j,:] = Y[i,:]
    return Y1

In [242]:
Y = X0.copy()
Y = swap_sym(X0, 2, 4) # try turning this on and off
# Y = swap_sym(X0, 0, 2) # try turning this on and off
Y

array([[2.09762701, 0.        , 0.        , 0.4236548 , 0.60276338],
       [0.        , 2.43037873, 0.        , 0.64589411, 0.        ],
       [0.        , 0.        , 2.783546  , 0.        , 0.43758721],
       [0.4236548 , 0.64589411, 0.        , 1.        , 0.        ],
       [0.60276338, 0.        , 0.43758721, 0.        , 2.08976637]])

In [243]:
R = cholesky(Y)
R

array([[ 1.44831868,  0.        ,  0.        ,  0.2925149 ,  0.41618145],
       [ 0.        ,  1.5589672 ,  0.        ,  0.41430898,  0.        ],
       [ 0.        ,  0.        ,  1.66839624,  0.        ,  0.26228015],
       [ 0.        ,  0.        ,  0.        ,  0.86184865, -0.14125366],
       [ 0.        ,  0.        ,  0.        ,  0.        ,  1.35196741]])

In [244]:
# check it's a factorization 
R.T@R - Y

array([[ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
         5.55111512e-17,  0.00000000e+00],
       [ 0.00000000e+00,  4.44089210e-16,  0.00000000e+00,
         0.00000000e+00,  0.00000000e+00],
       [ 0.00000000e+00,  0.00000000e+00,  4.44089210e-16,
         0.00000000e+00, -5.55111512e-17],
       [ 5.55111512e-17,  0.00000000e+00,  0.00000000e+00,
         0.00000000e+00,  1.21273863e-17],
       [ 0.00000000e+00,  0.00000000e+00, -5.55111512e-17,
         1.21273863e-17,  0.00000000e+00]])

In [245]:
# fill-in 
(R!=0) & (Y==0)

array([[False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False, False,  True],
       [False, False, False, False, False]])