
# Multivariate Functional Principal Components Analysis

This notebook shows how to perform an multivariate functional principal
components analysis on an example dataset.


In [None]:
# Author: Zara Waheed <zara95@bu.edu>
# License: MIT

import matplotlib.pyplot as plt
import pandas as pd

from FDApy.representation.functional_data import MultivariateFunctionalData
from FDApy.preprocessing.dim_reduction.fpca import MFPCA
from FDApy.visualization.plot import plot
from FDApy.misc.loader import read_csv

In [None]:
def wf(x):
    return '/Users/zarawaheed/Documents/BostonUniversity/MA679/Final Project/Data/' + x

Load the data as DenseFunctionalData.



In [None]:
V_GRF_stance_N = read_csv(wf('V_GRF_stance_N.csv'))
ML_GRF_stance_N = read_csv(wf('ML_GRF_stance_N.csv'))
AP_GRF_stance_N = read_csv(wf('AP_GRF_stance_N.csv'))

#ML_GRF_stance_N = ML_GRF_stance_N.reset_index()
#AP_GRF_stance_N = AP_GRF_stance_N.reset_index()

In [None]:
# Create multivariate functional data

GRF = MultivariateFunctionalData([ML_GRF_stance_N, AP_GRF_stance_N, V_GRF_stance_N])

Perform a multivariate functional PCA and explore the results.



In [None]:
# Perform multivariate FPCA

n = 8
mfpca = MFPCA(n_components = [n, n, n])
# n_components is number of components to keep for each functions in data

mfpca.fit(GRF, method='NumInt')
# other method = PACE 

# Plot the results of the FPCA (eigenfunctions)
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8,6))
_ = plot(mfpca.basis[0], ax=ax1)
_.set_ylabel('ML_GRF')
_ = plot(mfpca.basis[1], ax=ax2)
_.set_ylabel('AP_GRF')
_ = plot(mfpca.basis[2], ax=ax3)
_.set_ylabel('V_GRF')
_.set_xlabel('Time')
ax1.set_title('MFPCA plot with 8 Principal Components', fontstyle='italic')

fig.savefig('/Users/zarawaheed/Documents/BostonUniversity/MA679/Final Project/Data/Processed/MFPCA.png', dpi=300)

In [None]:
# mfpca.basis

Compute the scores of the dailyTemp data into the eigenfunctions basis using
numerical integration.



In [None]:
# Compute the scores
GRF_proj = mfpca.transform(GRF)

# Plot the projection of the data onto the eigenfunctions
fig = pd.plotting.scatter_matrix(pd.DataFrame(GRF_proj), diagonal='kde', figsize=(15, 15))
plt.savefig('/Users/zarawaheed/Documents/BostonUniversity/MA679/Final Project/Data/Processed/MFPCA_scatter_matrix.png', dpi=300)

Then, we can test if the reconstruction of the data is good.



In [None]:
# Test if the reconstruction is good.
GRF_reconst = mfpca.inverse_transform(GRF_proj)

# Plot the reconstructed curves
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize = (15,12))
_ = plot(GRF_reconst[0], ax=ax1)
_.set_ylabel('GRF_ML')
_ = plot(GRF_reconst[1], ax=ax2)
_.set_ylabel('GRF_AP')
_ = plot(GRF_reconst[2], ax=ax3)
_.set_ylabel('GRF_V')
_.set_xlabel('Time')
ax1.set_title('MFPCA Reconstructed Data with 8 Principal Components', fontstyle='italic')
fig.savefig('/Users/zarawaheed/Documents/BostonUniversity/MA679/Final Project/Data/Processed/MFPCA_reconstructed_plot.png', dpi=300)

### Calculate MSE

In [None]:
# Define function for MSE

def mse(array1,array2):
    diff = array1 - array2
    sqdiff = diff**2
    n = len(sqdiff)
    
    sum_sqdiff = sum(sqdiff)
    return sum_sqdiff/n

In [None]:
# define number on components

n = 2

In [None]:
# run mfpca

mfpca = MFPCA(n_components = [n, n, n])
mfpca.fit(GRF, method='NumInt')
GRF_reconst = mfpca.inverse_transform(mfpca.transform(GRF))

In [None]:
# Create datasets

V_GRF_reconst = pd.DataFrame(GRF_reconst[2].values)
V_GRF_reconst_array = V_GRF_reconst.values.flatten()

V_GRF_stance_N_df = pd.DataFrame(V_GRF_stance_N.values)
V_GRF_stance_N_df_array = V_GRF_stance_N_df.values.flatten()

AP_GRF_reconst = pd.DataFrame(GRF_reconst[1].values)
AP_GRF_reconst_array = AP_GRF_reconst.values.flatten()

AP_GRF_stance_N_df = pd.DataFrame(AP_GRF_stance_N.values)
AP_GRF_stance_N_df_array = AP_GRF_stance_N_df.values.flatten()

ML_GRF_reconst = pd.DataFrame(GRF_reconst[0].values)
ML_GRF_reconst_array = ML_GRF_reconst.values.flatten()

ML_GRF_stance_N_df = pd.DataFrame(ML_GRF_stance_N.values)
ML_GRF_stance_N_df_array = ML_GRF_stance_N_df.values.flatten()

In [None]:
# Calculate mean

V_GRF_mean = str(round(mse(V_GRF_stance_N_df_array, V_GRF_reconst_array), 2))
AP_GRF_mean = str(round(mse(AP_GRF_stance_N_df_array, AP_GRF_reconst_array), 2))
ML_GRF_mean = str(round(mse(ML_GRF_stance_N_df_array, ML_GRF_reconst_array), 2))

In [None]:
# Create initial dataset

new_ML = pd.DataFrame({'Components' : [n],
                          'MSE_ML': [ML_GRF_mean]
                         })

new_AP = pd.DataFrame({'Components' : [n],
                          'MSE_AP': [AP_GRF_mean]
                         })

new_V = pd.DataFrame({'Components' : [n],
                          'MSE_V': [V_GRF_mean]
                         })

mse_ML = new_ML
mse_AP = new_AP
mse_V = new_V

In [None]:
# Create a loop that runs the model, creates reconstructed datasets, calculates mse and adds it to our dataset

for i in range(4, 22, 2):
    
    # fit the model and create reconstructed dataset
    mfpca = MFPCA(n_components = [i, i, i])
    mfpca.fit(GRF, method='NumInt')
    GRF_reconst = mfpca.inverse_transform(mfpca.transform(GRF))

    # create ML_GRF reconstructed and original data array
    ML_GRF_reconst = pd.DataFrame(GRF_reconst[0].values)
    ML_GRF_reconst_array = ML_GRF_reconst.values.flatten()

    ML_GRF_stance_N_df = pd.DataFrame(ML_GRF_stance_N.values)
    ML_GRF_stance_N_df_array = ML_GRF_stance_N_df.values.flatten()

    # create AP_GRF reconstructed and original data array
    AP_GRF_reconst = pd.DataFrame(GRF_reconst[1].values)
    AP_GRF_reconst_array = AP_GRF_reconst.values.flatten()

    AP_GRF_stance_N_df = pd.DataFrame(AP_GRF_stance_N.values)
    AP_GRF_stance_N_df_array = AP_GRF_stance_N_df.values.flatten()
    
    # create V_GRF reconstructed and original data array
    V_GRF_reconst = pd.DataFrame(GRF_reconst[2].values)
    V_GRF_reconst_array = V_GRF_reconst.values.flatten()

    V_GRF_stance_N_df = pd.DataFrame(V_GRF_stance_N.values)
    V_GRF_stance_N_df_array = V_GRF_stance_N_df.values.flatten()
    

    # Calculate mse for each GRF
    
    ML_GRF_mean = str(round(mse(ML_GRF_stance_N_df_array, ML_GRF_reconst_array), 2))
    AP_GRF_mean = str(round(mse(AP_GRF_stance_N_df_array, AP_GRF_reconst_array), 2))
    V_GRF_mean = str(round(mse(V_GRF_stance_N_df_array, V_GRF_reconst_array), 2))
    
    # Store the values in the dataframe
    
    new_ML = pd.DataFrame({'Components' : [i],
                          'MSE_ML': [ML_GRF_mean]
                         })

    new_AP = pd.DataFrame({'Components' : [i],
                          'MSE_AP': [AP_GRF_mean]
                         })

    new_V = pd.DataFrame({'Components' : [i],
                          'MSE_V': [V_GRF_mean]
                         })
    
    mse_ML = pd.concat([mse_ML, new_ML])
    mse_AP = pd.concat([mse_AP, new_AP])
    mse_V = pd.concat([mse_V, new_V])

In [None]:
# Save the dataset

result = pd.merge((pd.merge(mse_ML, mse_AP, on="Components", how='outer')), mse_V, on="Components", how='outer')
result.to_csv('/Users/zarawaheed/Documents/BostonUniversity/MA679/Final Project/Data/Processed/MFPCA_mse.csv')