In [1]:
import numpy as np

In [2]:
class PCA:

    def __init__(self,num_components) -> None:
        self.num_components = num_components
        self.components = None
        self.mean = None
        self.variance_share = None

    def fit(self,X):
        self.mean = np.mean(X,axis=0)
        X -= self.mean

    # calculate eigenvalues
        matrix_product = np.matmul(X.T,X)
        eig_values, eig_vectors = np.linalg.eig(matrix_product)
        sort_idx = np.argsort(eig_values)[::-1]
        values = eig_values[sort_idx]
        vectors = eig_vectors[:,sort_idx]

        self.components = vectors[:self.num_components]
        self.variance_share = np.sum(values[:self.num_components]) / np.sum(values)


    def transform(self,X):

        X -= self.mean
        return np.dot(X,self.components.T)
    
    




In [3]:
X_old = np.random.normal(loc = 0, scale = 1, size = (1000, 10))
X_new = np.random.normal(loc = 0, scale = 1, size = (500, 10)) 

print(X_old.shape, X_new.shape)

(1000, 10) (500, 10)


In [4]:
pca = PCA(num_components=8)
pca.fit(X_old)

In [5]:
print(f'Explained variance is:{pca.variance_share:.4f}')

Explained variance is:0.8350


In [6]:
pca.transform(X_old)

array([[-0.01828269,  0.24804799,  1.36972764, ...,  0.73454248,
         0.04686528, -0.75514566],
       [-0.36855915,  2.51407443, -2.23605689, ..., -0.91787197,
         0.93598134, -1.97738015],
       [ 1.08311708, -0.1655728 , -0.43538611, ...,  0.55985354,
         0.21987032, -1.87541065],
       ...,
       [-0.0735813 ,  1.01163409,  0.4015191 , ..., -1.66356142,
        -0.07901387, -1.45962295],
       [-1.55960587, -0.08980286,  0.74310373, ..., -0.24536556,
        -1.07912716, -0.51429138],
       [-2.32925252,  0.12631297,  0.5748922 , ..., -0.2360596 ,
         0.67427566, -0.74092906]])