# 有效率的QR分解实现

In [1]:
import numpy as np
import scipy
from scipy import linalg
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import display,Latex,Math
%matplotlib inline

from IPython.core.interactiveshell import InteractiveShell
sh = InteractiveShell.instance()

def number_to_str(n,cut=5):
    ns=str(n)
    format_='{0:.'+str(cut)+'f}'
    if 'e' in ns or ('.' in ns and len(ns)>cut+1):
        return format_.format(n)
    else:
        return str(n)

def matrix_to_latex(mat,style='bmatrix'):
    if type(mat)==np.matrixlib.defmatrix.matrix:
        mat=mat.A
    head=r'\begin{'+style+'}'
    tail=r'\end{'+style+'}'
    if len(mat.shape)==1:
        body=r'\\'.join([str(el) for el in mat])
        return head+body+tail
    elif len(mat.shape)==2:
        lines=[]
        for row in mat:
            lines.append('&'.join([number_to_str(el)  for el in row])+r'\\')
        s=head+' '.join(lines)+tail
        return s
    return None

sh.display_formatter.formatters['text/latex'].type_printers[np.ndarray]=matrix_to_latex

def show_decomposition(*args):
    latex=''
    for arg in args:
        if type(arg)==str:
            latex+=arg
        else:
            latex+=matrix_to_latex(arg)
    latex='$'+latex+'$'
    display(Math(latex))


In [2]:
def get_Q1(x):
    size=x.shape[0]
    norm_x=np.sqrt(np.dot(x,x))
    e1=np.zeros(size)
    e1[0]=1
    u=x-norm_x*e1
    norm_u=np.sqrt(np.dot(u,u))
    v=u/norm_u
    Q=np.identity(size)-2*np.outer(v,np.transpose(v))
    return Q

from functools import reduce

def qr(A):
    size=A.shape[1]
    QList=[]
    for i in range(size):
        x=A[i:,i]
        Q=get_Q1(x)
        Qn=np.identity(A.shape[0])
        #print(Qn.shape,Q.shape)
        Qn[i:,i:]=Q
        A=np.dot(Qn,A)
        QList.append(Qn)
    Q=reduce(lambda x,y:x.dot(y),QList)
    R=A
    return Q,R
    


In [3]:
A=np.array([12,-51,4,6,167,-68,-4,24,-41]).reshape(3,3)
Q,R=qr(A)

show_decomposition(Q.dot(R),'=',Q,R)

<IPython.core.display.Math object>

```
for j = 1 to p
{
	define r[j,j] = sqrt( sum_i x[i,j]^2 )  

    # r[j,j] is the norm of the jth column of X

	for i = 1 to n
	{
		x[i,j] = x[i,j] / r[j,j]
	}

	for k = j+1 to p
	{
		r[j,k] = sum_{i=1}^n x[i,j]x[i,k]
		for i = 1 to n
		{
			x[i,k] = x[i,k] - x[i,j] r[j,k]
		}
	}
}
```

In [54]:
def qr2(x):
    n,p=x.shape
    r=np.zeros((p,p))
    for j in range(p):
        r[j][j]=np.sqrt(sum([x[i][j]**2 for i in range(n)]))
        for i in range(n):
            x[i][j]=x[i][j]/r[j][j]
        for k in range(j+1,p):
            r[j][k]=sum([(x[i][j]*x[i][k]) for i in range(n)])
            # it would error
            for i in range(n):
                x[i][k]=x[i][k]-x[i][j]*r[j][k]
    return x,r



In [48]:
A=np.array([12,-51,4,6,167,-68,-4,24,-41]).reshape(3,3)
Q,R=qr(A)

show_decomposition(Q.dot(R),'=',Q,R)

<IPython.core.display.Math object>

In [55]:
A=np.array([float(i) for i in[12,-51,4,6,167,-68,-4,24,-41]]).reshape(3,3)
Q,R=qr2(A)

show_decomposition(Q.dot(R),'=',Q,R)

<IPython.core.display.Math object>

In [26]:
A=np.random.random((1000,5))

%time qr(A)

Wall time: 355 ms


(array([[  4.72613896e-02,   2.30784462e-02,   1.33113767e-02, ...,
           2.63372029e-02,   5.18083156e-02,   3.56110975e-02],
        [  3.08956543e-02,  -1.09834310e-04,   5.12442621e-02, ...,
           3.48277836e-02,  -3.95627654e-02,  -1.78523418e-02],
        [  4.79711800e-02,  -2.03296298e-02,  -4.06284852e-02, ...,
          -3.23403510e-02,   3.10065848e-02,   4.41111254e-02],
        ..., 
        [  1.90507089e-02,   2.95406843e-02,  -3.48901210e-02, ...,
           9.93068054e-01,   2.35462020e-03,  -2.10790806e-03],
        [  5.24776027e-02,  -3.67845266e-02,   3.46058404e-02, ...,
           1.80720208e-03,   9.92288840e-01,  -3.13011679e-03],
        [  2.79505375e-02,  -1.69012193e-02,   4.56932000e-02, ...,
          -2.16714687e-03,  -2.56918305e-03,   9.94056395e-01]]),
 array([[  1.83658283e+01,   1.36198702e+01,   1.37827514e+01,
           1.34715491e+01,   1.39141192e+01],
        [ -4.80335526e-17,   1.22818897e+01,   5.64814134e+00,
           5.1606698

In [25]:
A=np.random.random((1000,5))

%time qr2(A)

Wall time: 95 ms


(array([[  1.11950380e-14,  -6.63358257e-15,   1.43461631e-15,
           5.50046120e-14,  -4.15154022e-14],
        [  3.69357323e-14,  -7.38992201e-15,   5.42621503e-15,
           4.85340934e-14,  -3.88647448e-14],
        [  6.63288868e-14,  -1.24292937e-15,   3.24046345e-15,
           3.62071484e-14,  -3.23872873e-14],
        ..., 
        [  5.28257993e-14,  -2.78249646e-15,   4.48946436e-15,
           5.72666914e-14,  -5.69128078e-14],
        [  3.46181417e-14,  -9.52363188e-16,   4.16767315e-16,
           3.81535081e-14,  -6.43166076e-14],
        [  7.86246068e-14,  -6.58847976e-15,   4.23966418e-15,
           4.46934156e-14,  -6.64676647e-14]]),
 array([[ 1.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.],
        [ 0.,  0.,  1.,  0.,  0.],
        [ 0.,  0.,  0.,  1.,  0.],
        [ 0.,  0.,  0.,  0.,  1.]]))

In [27]:
import statsmodels.api as sm

In [30]:
def load1(data):
    df=pd.DataFrame(data.exog)
    df.columns=data.exog_name
    return df
    
    

data=sm.datasets.anes96.load()
exog=load1(data)
endog=pd.Series(data.endog)


In [34]:
mat=exog.as_matrix()

In [37]:
%time qr(mat)

Wall time: 308 ms


(array([[-0.01858599,  0.07231401, -0.04089145, ..., -0.01809627,
         -0.01853284,  0.02795317],
        [ 0.04235713, -0.00239098, -0.02293204, ...,  0.06358211,
          0.06250948,  0.0198534 ],
        [ 0.02774442, -0.00126651,  0.00163617, ..., -0.01583678,
         -0.01345472,  0.02724068],
        ..., 
        [-0.01858599,  0.06374479, -0.01525903, ...,  0.99461667,
         -0.00540734, -0.00140664],
        [-0.01858599,  0.06374479, -0.01053388, ..., -0.00547984,
          0.99410203, -0.0020664 ],
        [ 0.0233752 ,  0.01877037,  0.03179344, ..., -0.00147374,
         -0.00198473,  0.99646272]]),
 array([[  1.23888234e+02,   7.74136492e+01,   8.77000427e+02,
           8.54056218e+01,   2.95338147e+02],
        [ -1.86661379e-14,   1.16696731e+02,   1.08225137e+03,
           1.01120564e+02,   3.77875165e+02],
        [  2.25008263e-15,  -3.43380576e-15,   6.34900954e+02,
           2.28135816e+01,   7.88113538e+01],
        ..., 
        [  1.18540185e-15,  -1.

In [38]:
%time qr2(mat)

Wall time: 105 ms


(array([[  8.57092175e-14,   3.66720543e-14,   3.45036499e-14,
          -1.14235010e-13,  -1.60854403e-14],
        [ -1.95329863e-13,   1.57165947e-14,   1.91686944e-14,
          -1.52312191e-13,  -1.60854403e-14],
        [ -1.27942795e-13,   1.04777298e-14,   2.30024333e-14,
          -2.28470021e-13,  -1.60854403e-14],
        ..., 
        [  8.57092175e-14,   3.14331894e-14,   4.12135603e-14,
          -2.28470021e-13,  -3.86052301e-13],
        [  8.57092175e-14,   3.14331894e-14,   4.40897319e-14,
          -2.66550670e-13,  -3.86052301e-13],
        [ -1.07792247e-13,   2.09554596e-14,   5.84671200e-14,
          -2.66550670e-13,  -3.86052301e-13]]),
 array([[ 1.,  0.,  0.,  0.,  0.],
        [ 0.,  1.,  0.,  0.,  0.],
        [ 0.,  0.,  1.,  0.,  0.],
        [ 0.,  0.,  0.,  1.,  0.],
        [ 0.,  0.,  0.,  0.,  1.]]))

In [39]:
str(mat)

'[[  8.57092175e-14   3.66720543e-14   3.45036499e-14  -1.14235010e-13\n   -1.60854403e-14]\n [ -1.95329863e-13   1.57165947e-14   1.91686944e-14  -1.52312191e-13\n   -1.60854403e-14]\n [ -1.27942795e-13   1.04777298e-14   2.30024333e-14  -2.28470021e-13\n   -1.60854403e-14]\n ..., \n [  8.57092175e-14   3.14331894e-14   4.12135603e-14  -2.28470021e-13\n   -3.86052301e-13]\n [  8.57092175e-14   3.14331894e-14   4.40897319e-14  -2.66550670e-13\n   -3.86052301e-13]\n [ -1.07792247e-13   2.09554596e-14   5.84671200e-14  -2.66550670e-13\n   -3.86052301e-13]]'