In [4]:
import numpy as np

def pca_step_by_step(X, k):
    # Step 1: Center the data
    print("Step 1: Centering the Data")
    X_mean = np.mean(X, axis=0)
    print("Mean of each feature:")
    print(X_mean)
    X_centered = X - X_mean
    print("\nCentered Data:")
    print(X_centered)

    # Step 2: Calculate covariance matrix
    print("\nStep 2: Calculating Covariance Matrix")
    cov_matrix = np.cov(X_centered.T)
    print("Covariance Matrix:")
    print(cov_matrix)

    # Step 3: Calculate eigenvalues and eigenvectors
    print("\nStep 3: Calculating Eigenvalues and Eigenvectors")
    eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
    print("Eigenvalues:")
    print(eigenvalues)
    print("Eigenvectors:")
    print(eigenvectors)

    # Step 4: Sort eigenvalues and eigenvectors
    print("\nStep 4: Sorting Eigenvalues and Eigenvectors")
    sorted_indices = np.argsort(eigenvalues)[::-1]
    sorted_eigenvalues = eigenvalues[sorted_indices]
    sorted_eigenvectors = eigenvectors[:, sorted_indices]
    print("Sorted Eigenvalues:")
    print(sorted_eigenvalues)
    print("Sorted Eigenvectors:")
    print(sorted_eigenvectors)

    # Step 5: Select top k eigenvectors
    print("\nStep 5: Selecting Top", k, "Eigenvectors")
    top_k_eigenvectors = sorted_eigenvectors[:, :k]
    print("Top", k, "Eigenvectors:")
    print(top_k_eigenvectors)

    # Step 6: Transform the data
    print("\nStep 6: Transforming the Data")
    transformed_data = np.dot(X_centered, top_k_eigenvectors)
    print("Transformed Data:")
    print(transformed_data)

    return transformed_data

# Example usage:
X = np.array([[6, 3, 7, 4, 6, 1],
              [9, 2, 6, 7, 4, 2],
              [3, 7, 7, 2, 5, 3],
              [4, 1, 7, 5, 1, 4],
              [4, 0, 9, 5, 8, 5],
              [0, 9, 2, 6, 3, 6]])

k = 6  # Number of principal components
transformed_data = pca_step_by_step(X, k)


Step 1: Centering the Data
Mean of each feature:
[4.33333333 3.66666667 6.33333333 4.83333333 4.5        3.5       ]

Centered Data:
[[ 1.66666667 -0.66666667  0.66666667 -0.83333333  1.5        -2.5       ]
 [ 4.66666667 -1.66666667 -0.33333333  2.16666667 -0.5        -1.5       ]
 [-1.33333333  3.33333333  0.66666667 -2.83333333  0.5        -0.5       ]
 [-0.33333333 -2.66666667  0.66666667  0.16666667 -3.5         0.5       ]
 [-0.33333333 -3.66666667  2.66666667  0.16666667  3.5         1.5       ]
 [-4.33333333  5.33333333 -4.33333333  1.16666667 -1.5         2.5       ]]

Step 2: Calculating Covariance Matrix
Covariance Matrix:
[[ 9.06666667 -6.86666667  3.26666667  1.46666667  1.2        -4.4       ]
 [-6.86666667 12.66666667 -6.46666667 -1.46666667 -2.          1.8       ]
 [ 3.26666667 -6.46666667  5.46666667 -1.53333333  3.         -1.6       ]
 [ 1.46666667 -1.46666667 -1.53333333  2.96666667 -1.1         0.7       ]
 [ 1.2        -2.          3.         -1.1         5.9    

In [6]:
import numpy as np

def pca(X, k):
    # Step 1: Center the data
    X_mean = np.mean(X, axis=0)
    X_centered = X - X_mean
    
    # Step 2: Calculate covariance matrix
    cov_matrix = np.cov(X_centered.T)
    
    # Step 3: Calculate eigenvalues and eigenvectors
    eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
    
    # Step 4: Sort eigenvalues and eigenvectors
    sorted_indices = np.argsort(eigenvalues)[::-1]
    sorted_eigenvalues = eigenvalues[sorted_indices]
    sorted_eigenvectors = eigenvectors[:, sorted_indices]
    
    # Step 5: Select top k eigenvectors
    top_k_eigenvectors = sorted_eigenvectors[:, :k]
    
    # Step 6: Transform the data
    transformed_data = np.dot(X_centered, top_k_eigenvectors)
    
    return transformed_data

# Example usage:
X = np.array([[6, 3, 7, 4, 6, 1],
              [9, 2, 6, 7, 4, 2],
              [3, 7, 7, 2, 5, 3],
              [4, 1, 7, 5, 1, 4],
              [4, 0, 9, 5, 8, 5],
              [0, 9, 2, 6, 3, 6]])

k = 6  # Number of principal components
transformed_data = pca(X, k)
print("Transformed Data:")
print(transformed_data)


Transformed Data:
[[ 2.41660696e+00  5.70159604e-01  2.40670441e+00  9.79781520e-02
   9.45628948e-01  2.88657986e-15]
 [ 3.78659457e+00 -3.65904713e+00  1.41495514e+00 -1.37128734e+00
  -5.01657029e-01 -2.10942375e-15]
 [-2.62779637e+00  2.45992591e+00  2.37163670e+00  1.70914350e+00
  -5.98940507e-01 -7.49400542e-16]
 [ 1.13892092e+00 -2.01038895e+00 -2.88212158e+00  2.55657235e+00
   1.13810596e-01  1.91513472e-15]
 [ 3.77618726e+00  3.55766211e+00 -2.42712420e+00 -1.53845663e+00
  -1.41823319e-01 -4.44089210e-16]
 [-8.49051335e+00 -9.18311541e-01 -8.84050466e-01 -1.45395003e+00
   1.82981311e-01  4.44089210e-16]]
