In [29]:
# Standardize the TS features
import numpy as np
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

X = [
  [ 0.2, -0.3],
  [-1.1, 2],
  [ 1, -2.2],
  [ 0.5, -1],
  [-0.6, 1 ]
]

scaler.fit(X)
scaled_data = scaler.transform(X)
print(scaled_data)

[[ 0.26444294 -0.13558154]
 [-1.45443618  1.42360613]
 [ 1.32221471 -1.42360613]
 [ 0.66110736 -0.61011691]
 [-0.79332883  0.74569845]]


In [30]:
# calculate the covariance matrix
cov_matrix = np.cov(scaled_data.T)

In [31]:
# calculate the eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
print('eigenvalues:', eigenvalues)
print('eigenvectors:', eigenvectors)

eigenvalues: [2.49591192 0.00408808]
eigenvectors: [[ 0.70710678  0.70710678]
 [-0.70710678  0.70710678]]


In [32]:
# keep the p (< n) eigenvectors with the highest eigenvalues
num_components = 1
projection_matrix = (eigenvectors.T[:][:num_components]).T

In [33]:
# matrix multiply the p eigenvectors with the standardized data to get a new TS with only p features
X_pca = scaled_data.dot(projection_matrix)

In [34]:
# percent of data kept:

# eigenvalues: [2.49591192 0.00408808]
# eigenvectors: 
# [[ 0.70710678  0.70710678]
#  [-0.70710678  0.70710678]]

percent_kept = 2.49591192 / (2.49591192 + 0.00408808)
print('percent kept in first principle component: ', percent_kept * 100, '%')
print('new Traing Set: \n', X_pca)

percent kept in first principle component:  99.83647680000001 %
new Traing Set: 
 [[ 0.28286002]
 [-2.03508324]
 [ 1.94158854]
 [ 0.8988913 ]
 [-1.08825662]]
