In [None]:
import numpy as np
import matplotlib.pyplot as plt

def get_random_pointcloud(seed, N, sigma1=3, sigma2=1):
    """
    Parameters
    ----------
    seed: int
        Random seed for repeatable results
    N: int
        Number of points
    sigma1: float
        Standard deviation along first dimension
    sigma2: float
        Standard deviation along second dimension
    
    Return
    ------
    X: ndarray(N, 2)
        2D point cloud
    """
    np.random.seed(seed)
    u = np.random.rand(2)
    u = u/np.sqrt(np.sum(u**2))
    v = np.array([u[1], -u[0]])
    X = np.zeros((N, 2))
    X = sigma1*np.random.randn(N, 1)*u[None, :]
    X += np.random.randn(N, 1)*v[None, :]
    return X

In [None]:
def plot_proj(X, u):
    p = X.dot(u)
    sqrVar = np.sum(p*p)
    plt.scatter(X[:, 0], X[:, 1], c=p, cmap='magma')

    ax = plt.gca()
    ax.arrow(0, 0, u[0]*2, u[1]*2, head_width = 0.25, head_length = 0.5, fc = 'k', ec = 'k', width = 0.05)
    plt.title("Squared Variance: {:.6f}".format(sqrVar))
    plt.axis("equal")

In [None]:
X = get_random_pointcloud(0, 1000)

In [None]:
u = np.array([1, 0])
u = u/np.sqrt(np.sum(u**2)) # Normalizes the vector so that |u| = 1
plot_proj(X, u)