# 2D PCA Analysis

In [19]:
import numpy as np
import dask.array as da
from sklearn.pipeline import Pipeline

from pymks import (
    PrimitiveTransformer,
    TwoPointCorrelation,
    GenericTransformer,
)

from dask_ml.decomposition import IncrementalPCA

In [20]:
def pca_steps():
    return (
        ("discritize",PrimitiveTransformer(n_state=2, min_=0.0, max_=1.0)),
        ("correlations",TwoPointCorrelation(periodic_boundary=True, cutoff=30, correlations=[(0, 0)])),
        ('flatten', GenericTransformer(lambda x: x.reshape(x.shape[0], -1))),
        ('pca', IncrementalPCA(n_components=15)),
    )

def make_pca_model():
    return Pipeline(steps=pca_steps())

In [21]:
data = np.load('data.npz')
x_data = data['x_data']
y_data = data['y_data'].reshape(-1)
print(x_data.shape)
print(y_data.shape)

(100, 101, 101)
(100,)


In [22]:
x_data_da = da.from_array(x_data, chunks=(100, 101, 101))
model = make_pca_model()
x_data_pca = model.fit_transform(x_data_da).compute()

  return func(*(_execute_task(a, cache) for a in args))


In [23]:
print(x_data_pca.shape)
print(y_data.shape)

(100, 15)
(100,)


In [24]:
np.savez('data_pca.npz', x_data_pca=x_data_pca, y_data=y_data)