In [1]:
import numpy as np
import pandas as pd

# Show coefficients become undetermined in the case of collinearity

let

\begin{align*}
X = \begin{bmatrix}
| & | & | \\ 
\mathbf{x}_1 & \mathbf{x}_2 & \mathbf{x}_3 \\ 
| & | & |
\end{bmatrix}
\end{align*}

Suppose the model is

$$
\mathbf{y} = w_1 \mathbf{x}_1 + w_2 \mathbf{x}_2 + w_3 \mathbf{x}_3
$$

and $\mathbf{x}_3$ is dependent of $\mathbf{x}_1$ and $\mathbf{x}_2$

$$
\mathbf{x}_3 = a \mathbf{x}_1 + b \mathbf{x}_2
$$

So the model can be converted into

\begin{align*}
\mathbf{y} 
&= w_1 \mathbf{x}_1 + w_2 \mathbf{x}_2 + w_3 \mathbf{x}_3 \\
&= w_1 \mathbf{x}_1 + w_2 \mathbf{x}_2 + w_3 (a \mathbf{x}_1 + b \mathbf{x}_2) \\
&= (w_1 + a w_3) \mathbf{x}_1 + (w_2 + b w_3) \mathbf{x}_2 \\
&= \gamma_1 \mathbf{x}_1 + \gamma_2 \mathbf{x}_2
\end{align*}

where

\begin{align*}
\gamma_1 &= w_1 + a w_3 \\
\gamma_2 &= w_2 + b w_3
\end{align*}

So there are actually just two variables $\mathbf{x}_1$ and $\mathbf{x}_3$, a linear system with three coefficients ($w_1$, $w_2$, $w_3$) is undetermined.

Using the analytical solution to a linear system ($(X^TX)^{-1}X^T \mathbf{y}$) become impossible because $X^T X$ is singular with dependent columns.

Note, undetermined coefficients also mean the coefficients have infinite standard deviations.

# Show covariance matrix is singular for $X$ with dependent columns

Suppose X is $m \times n$.

\begin{align*}
X = \begin{bmatrix}
| & \cdots & | \\ 
\mathbf{x}_1 & \cdots & \mathbf{x}_n \\ 
| & \cdots & |
\end{bmatrix}
\end{align*}

For two random variables, $X_1$ and $X_2$, their covariance is defined as 

\begin{align}
    \text{Cov}(X_1, X_2)
    &= \mathbb{E}[(X_1 - \mathbb{E}[X_1])(X_2 - \mathbb{E}[X_2])] \nonumber \\
\end{align}

Standardize X by removing the column mean from each column,

\begin{align*}
\bar{X}
&= X - \frac{1}{m} \mathbb{1}_m \mathbb{1}_m^T X \\
&= \left( I_m - \frac{1}{m} \mathbb{1}_m \mathbb{1}_m^T \right) X \\
\end{align*}

Note,

* $I_m$ is a $m \times m$ identity matrix.
* $\mathbb{1}_m$ is a $m \times 1$ vector of ones.

Then the covariance matrix can be written as 

\begin{align*}
S 
&= \frac{1}{m - 1} \bar{X}^T \bar{X} \\
&= \frac{1}{m - 1} X^T \left( I_m - \frac{1}{m} \mathbb{1}_m \mathbb{1}_m^T \right)^T \left( I_m - \frac{1}{m} \mathbb{1}_m \mathbb{1}_m^T \right) X
\end{align*}

If $X$ has dependent columns, meaning there exists non-zero solution $\mathbf{w}$ for $X \mathbf{w} = \mathbf{0}$, hen it's also a solution to $\bar{X} \mathbf{w} = 0$, then this $\mathbf{w}$ is also a solution to $S \mathbf{w} = 0$, hence $S$ is singular.

# Show correlation matrix is singular for $X$ with dependent columns

The vector of standard deviations is

\begin{align*}
\boldsymbol{\sigma}
&= \left( \frac{1}{m - 1} \mathbb{1}_m^T \bar{X}^{\circ 2} \right)^{\circ \frac{1}{2}}
\end{align*}

Note,

* $\circ 2$ means element-wise product (Hadamard product).
* $\circ \frac{1}{2}$ means element-wise square root.
* $\boldsymbol{\sigma}$ is of shape $1 \times n$.

Ref: https://en.wikipedia.org/wiki/Hadamard_product_(matrices)#Analogous_operations

Define

\begin{align*}
Y_i 
&= \bar{X_i} / \sigma_i \\
Y 
&= \begin{bmatrix}
| & \cdots & | \\ 
\bar{X}_1 / \sigma_1 & \cdots & \bar{X}_n / \sigma_n \\ 
| & \cdots & |
\end{bmatrix}
\end{align*}

Then the Pearson correlation matrix can be written as

\begin{align*}
P 
&= Y^T Y \\
&= \begin{bmatrix}
\frac{\bar{X}_1^T \bar{X}_1}{\sigma_1 \sigma_1} & \cdots & \frac{\bar{X}_1^T \bar{X}_n}{\sigma_1 \sigma_n} \\ 
\vdots & \ddots & \vdots \\ 
\frac{\bar{X}_n^T \bar{X}_1}{\sigma_n \sigma_1}| & \cdots & \frac{\bar{X}_n^T \bar{X}_n}{\sigma_n \sigma_n}
\end{bmatrix}
\end{align*}

if $\mathbf{w}$ is an non-zero solution to $\bar{X} \mathbf{w} = 0$ as shown in the above covariance matrix section, then $\mathbf{w} \circ \boldsymbol{\sigma}$ is a non-zero solution to $P \mathbf{v} = \mathbf{0}$:

\begin{align*}
P (\mathbf{w} \circ \boldsymbol{\sigma})
&= \begin{bmatrix}
\frac{\bar{X}_1^T \bar{X}_1}{\sigma_1 \sigma_1} & \cdots & \frac{\bar{X}_1^T \bar{X}_n}{\sigma_1 \sigma_n} \\ 
\vdots & \ddots & \vdots \\ 
\frac{\bar{X}_n^T \bar{X}_1}{\sigma_n \sigma_1}| & \cdots & \frac{\bar{X}_n^T \bar{X}_n}{\sigma_n \sigma_n} 
\end{bmatrix}
\begin{bmatrix}
w_1 \sigma_1 \\ 
\vdots \\ 
w_n \sigma_n
\end{bmatrix} \\
&= \begin{bmatrix}
\frac{\bar{X}_1^T}{\sigma_1} \bar{X} \mathbf{w} \\ 
\vdots \\ 
\frac{\bar{X}_n^T}{\sigma_n} \bar{X} \mathbf{w} \\ 
\end{bmatrix} \\
&= \mathbf{0}
\end{align*}

# Demo

In [1]:
from numpy.random import default_rng

rng = default_rng()

vals = rng.standard_normal(size=(2, 13))

In [2]:
df = pd.DataFrame(vals.T, columns=['x1', 'x2'])

In [12]:
df['x3'] = df['x1'] + 2 * df['x2']

In [14]:
m, n = df.shape
X = df.to_numpy()

In [24]:
# known solution to Xw = 0
w = np.array([[1, 2, -1]]).reshape(-1, 1)

In [26]:
X @ w

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.]])

In [15]:
ones = np.ones(shape=(m, 1))

In [16]:
X_bar = (np.identity(m) - ones @ ones.T / m) @ X

In [17]:
# Covariance matrix
S = 1 / (m - 1) * X_bar.T @ X_bar 

In [18]:
S

array([[0.55084321, 0.02638258, 0.60360838],
       [0.02638258, 1.18907404, 2.40453067],
       [0.60360838, 2.40453067, 5.41266972]])

In [19]:
np.testing.assert_allclose(S, df.cov())

In [20]:
np.linalg.det(S)

1.1622655245749468e-15

In [55]:
# confirm w is also a solution to S
S @ w

array([[ 1.11022302e-16],
       [ 0.00000000e+00],
       [-1.77635684e-15]])

In [29]:
np.testing.assert_allclose(S @ w, np.zeros(shape=(n, 1)), atol=1e-8)

In [30]:
σ = np.sqrt(ones.T @ X_bar**2 / (m - 1))

In [31]:
σ

array([[0.74218812, 1.09044672, 2.3265145 ]])

In [36]:
np.testing.assert_allclose(σ.ravel(), df.std().to_numpy())

In [38]:
# confirm w \circ \sigma is also a solution to P

In [51]:
# Correlation matrix
Y = X_bar / σ
P = Y.T @ Y / (m - 1)

In [52]:
P

array([[1.        , 0.0325986 , 0.34957108],
       [0.0325986 , 1.        , 0.94780743],
       [0.34957108, 0.94780743, 1.        ]])

In [53]:
np.testing.assert_allclose(P, df.corr())

In [58]:
w * σ.T

array([[ 0.74218812],
       [ 2.18089343],
       [-2.3265145 ]])

In [56]:
# Confirm w \circ \sigma is a solution to P
P @ (w * σ.T)

array([[0.0000000e+00],
       [4.4408921e-16],
       [4.4408921e-16]])

In [57]:
np.testing.assert_allclose(P @ (w * σ.T), np.zeros(shape=(n, 1)), atol=1e-8)