In [None]:
#| default_exp dr

In [None]:
#| hide
%load_ext autoreload
%autoreload 2

# Dimensionality reduction

In [None]:
#| export
import umap
import cudf
import cuml
import pandas as pd
import numpy as np
from fastcore.all import *
from dvats.imports import *
from dvats.load import TSArtifact

In [None]:
#| export
def check_compatibility(dr_ar:TSArtifact, enc_ar:TSArtifact):
    "Function to check that the artifact used by the encoder model and the artifact that is \
    going to be passed through the DR are compatible"
    try:
        # Check that both artifacts have the same variables
        chk_vars = dr_ar.metadata['TS']['vars'] == enc_ar.metadata['TS']['vars']
        # Check that both artifacts have the same freq
        chk_freq = dr_ar.metadata['TS']['freq'] == enc_ar.metadata['TS']['freq']
        # Check that the dr artifact is not normalized (not normalized data has not the key normalization)
        chk_norm = dr_ar.metadata['TS'].get('normalization') is None
        # Check that the dr artifact has not missing values
        chk_miss = dr_ar.metadata['TS']['has_missing_values'] == "False"
        # Check all logical vars.
        if chk_vars and chk_freq and chk_norm and chk_miss:
            print("Artifacts are compatible.")
        else:
            raise Exception
    except Exception as e:
        print("Artifacts are not compatible.")
        raise e
    return None

## Get projections (UMAP, T-SNET, PCA)

In [None]:
#| export
import warnings
from numba.core.errors import NumbaPerformanceWarning
@delegates(cuml.UMAP)
def get_UMAP_prjs(input_data, cpu=True, **kwargs):
    "Compute the projections of `input_data` using UMAP, with a configuration contained in `**kwargs`."
    warnings.filterwarnings("ignore", category=NumbaPerformanceWarning) # silence NumbaPerformanceWarning
    reducer = umap.UMAP(**kwargs) if cpu else cuml.UMAP(**kwargs)
    projections = reducer.fit_transform(input_data)
    return projections

In [None]:
#| slow
foo = np.random.rand(5, 10)
bar = get_UMAP_prjs(foo, cpu=True, n_neighbors=3, min_dist=0.1)
test_eq(bar.shape, (foo.shape[0], 2))

If you want to have consistent results across executions, use `random_state`

In [None]:
bar = get_UMAP_prjs(foo, cpu=True, n_neighbors=3, random_state=1234)
baz = get_UMAP_prjs(foo, cpu=True, n_neighbors=3, random_state=1234)
test_eq(bar, baz)

  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")
  warn(f"n_jobs value {self.n_jobs} overridden to 1 by setting random_state. Use no seed for parallelism.")


In [None]:
#| export
@delegates(cuml.PCA)
def get_PCA_prjs(X, cpu=False, **kwargs):
    r"""
    Computes PCA projections of X
    """
    if cpu:
        raise NotImplementedError
    else:
        reducer = cuml.PCA(**kwargs)
    projections = reducer.fit_transform(X)
    return projections

In [None]:
#| hide
# Test the function get_PCA_prjs
foo = np.random.rand(5, 10)
bar = get_PCA_prjs(foo, cpu=False, n_components=2)

In [None]:
#| export
@delegates(cuml.TSNE)
def get_TSNE_prjs(X, cpu=False, **kwargs):
    r"""
    Computes TSNE projections of X
    """
    if cpu:
        raise NotImplementedError
    else:
        reducer = cuml.TSNE(**kwargs)
    projections = reducer.fit_transform(X)
    return projections

In [None]:
#| Test the function get_TSNE_prjs
foo = np.random.rand(90, 10)
bar = get_TSNE_prjs(foo, cpu=False)

  return func(**kwargs)


## Export 

In [None]:
#| hide
#from nbdev.export import notebook2script
#notebook2script()
beep(1)