# Getting the embeddings

> This notebook gets the embeddings (or latent space) from a multivariate time series 
given by a encoder (e.g., autoencoder).

In [1]:
model_patch_size = 8
verbose          = 0
reset_kernel     = False

In [2]:
from dvats.all import *
from tsai.data.preparation import SlidingWindow
from fastcore.all import *
import wandb
wandb_api = wandb.Api()
from yaml import load, FullLoader
import dvats.utils as ut



[?2004l
Octave is ready <oct2py.core.Oct2Py object at 0x7ff7a278a350>
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l


In [3]:
import torch
torch.cuda.set_device(0)

## Config parameters
> Configuration parameters are obtained from 'config\03-embeddings.yaml'

### Get configuration artifact

In [4]:
config, job_type = get_artifact_config_embeddings(verbose = 0)

In [5]:
dvats.config.show_attrdict(config)

use_wandb: True
wandb_group: embeddings
wandb_entity: mi-santamaria
wandb_project: deepvats
enc_artifact: mi-santamaria/deepvats/zeroshot-moirai-small:latest
input_ar: None
cpu: False


### Show configuration artifact

### Get the model from W&B
> Restore the encoder model and its associated configuration

In [6]:
entity = 'mi-santamaria'
project = 'deepvats'
folder = entity+'/'+project+'/'
model_family = 'zeroshot-moment'
task = 'embedding'
dataset = 'gtrends_kohls'
dataset_version = 'v2'
enc_artifact_dataset = folder + dataset + ':' + dataset_version
enc_artifact_small = folder + model_family + '-small-' + task + ':v0'
enc_artifact_base  =  folder + model_family + '-base-' + task + ':v0'
enc_artifact_large = folder + model_family + '-large-' + task + ':v0'

In [7]:
print("Getting dataset artifact: ", enc_artifact_dataset)
df_artifact = wandb_api.artifact(enc_artifact_dataset, type = 'dataset')
print("Getting small artifact: ", enc_artifact_small)
enc_artifact_small = wandb_api.artifact(enc_artifact_small, type='learner')
print("Getting base artifact: ", enc_artifact_base)
enc_artifact_base  = wandb_api.artifact(enc_artifact_base, type='learner')
print("Getting large artifact: ", enc_artifact_large)
enc_artifact_large = wandb_api.artifact(enc_artifact_large, type='learner')

Getting dataset artifact:  mi-santamaria/deepvats/gtrends_kohls:v2
Getting small artifact:  mi-santamaria/deepvats/zeroshot-moment-small-embedding:v0
Getting base artifact:  mi-santamaria/deepvats/zeroshot-moment-base-embedding:v0
Getting large artifact:  mi-santamaria/deepvats/zeroshot-moment-large-embedding:v0


In [8]:
print(df_artifact.name)
df = df_artifact.to_df()
display(df.head)
print(df.shape)

gtrends_kohls:v2


[34m[1mwandb[0m:   1 of 1 files downloaded.  


<bound method NDFrame.head of               volume
2004-01-01  0.010417
2004-01-08  0.010417
2004-01-15  0.010417
2004-01-22  0.000000
2004-01-29  0.000000
...              ...
2012-05-03  0.322917
2012-05-10  0.312500
2012-05-17  0.281250
2012-05-24  0.291667
2012-05-31  0.322917

[440 rows x 1 columns]>

(440, 1)


In [9]:
enc_learner_small = enc_artifact_small.to_obj()
enc_learner_base  = enc_artifact_base.to_obj()
enc_learner_large = enc_artifact_large.to_obj()

[34m[1mwandb[0m: Downloading large artifact zeroshot-moment-small-embedding:v0, 144.63MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.4
[34m[1mwandb[0m: Downloading large artifact zeroshot-moment-base-embedding:v0, 432.97MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:1.3
[34m[1mwandb[0m: Downloading large artifact zeroshot-moment-large-embedding:v0, 1321.42MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:4.0


In [10]:
def count_parameters(model):
    #return sum(p.numel() for p in model.parameters() if p.requires_grad)
    return sum(p.numel() for p in model.parameters())
print(count_parameters(enc_learner_small))
print(count_parameters(enc_learner_base))
print(count_parameters(enc_learner_large))

35341512
109641608
341248520


In [11]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters())
count_parameters(enc_learner_large)

341248520

In [12]:
from fastai.losses import MSELossFlat
from dvats.encoder import MAELossFlat, EvalMSE, EvalRMSE, EvalMAE, EvalSMAPE

In [13]:
import pandas as pd
results = pd.DataFrame(
    columns = [
        'model size',
        'n_epochs',
        'dataset_percent',
        'maskared_percent',
        'losses',
        'eval_results_pre',
        'eval_results_post',
        'time'
    ]
)

In [14]:
enc_input, _ = SlidingWindow(window_len=17, stride=2, get_y=[])(df)
enc_input.shape

(212, 1, 17)

In [15]:
vals = [5, 10, 20, 40, 80, 100]
epochs = vals[:2]
dataset_percents = [ val / 100 for val in vals ][:2]
maskared_percents = [ val / 100 for val in vals[:-1]] [:2]
sizes_percents = [5,10,20]
n_sizes = [int(np.floor(val*enc_input.shape[0]/100)) for val in sizes_percents]
n_sizes

[10, 21, 42]

In [16]:
common_args = {
    "X": df,
    "stride": 1,
    "batch_size": df_artifact['batch_size'],
    "cpu": False,
    "to_numpy": False,
    "time_flag": True,
    "n_windows": None,
    "n_windows_percent": None,
    "shot": True,
    "eval_pre": True,
    "eval_post": True,
    "lr": 0.4, #use enc_run lr,
    "lr_scheduler_flag": False,
    "lr_scheduler_num_warmup_steps": None,
    "window_sizes": None,
    "full_dataset": True,
    "window_sizes_offset": 0.05,
    "windows_min_distance": 2.5*enc_input.shape[0]/100,
    "print_to_path": False,
    "use_moment_masks": False,
    "mask_stateful": False,
    "mask_future": True,
    "mask_sync": False,
    "analysis_mode": "online",
    "use_wandb": False,
    "norm_by_sample": False,
    "norm_use_single_batch": False,
    "show_plot": False,
    "metrics": [EvalMSE, EvalRMSE, EvalMAE, EvalSMAPE],
    "metrics_args": [{'squared': False}, {'squared': True}, {}, {}],
    "metrics_names":["mse", "rmse", "mae", "smape"],
    "metrics_dict":None
}

In [17]:
i = 0
for n_epochs in [5]:
    for dataset_percent in dataset_percents[:1]:
        for maskared_percent in maskared_percents[:1]:
            for sizes in n_sizes[:1]:
                print(f"--> epoch {n_epochs}, dataset_percent {dataset_percent}, mask {maskared_percent}")
                print(f" sizes {sizes}")
                result = fine_tune(
                    enc_learn           = enc_learner_small,
                    window_mask_percent = maskared_percent,
                    training_percent    = dataset_percent,
                    validation_percent  = 1-dataset_percent if 1-dataset_percent != 0 else 0.3,
                    num_epochs          = n_epochs,
                    n_window_sizes      = sizes,
                    verbose             = 0,
                    **common_args    
                )
                results.append({
                    'model_size': "small",
                    'n_epochs': n_epochs,
                    'dataset_percent': dataset_percent,
                    'maskared_percent': maskared_percent,
                    'losses': result[0],
                    'eval_results_pre': result[1],
                    'eval_results_post': result[2],
                    'time': result[4]
                })
                print(f"epoch {n_epochs}, dataset_percent {dataset_percent}, mask {maskared_percent}")
                print(f" sizes {sizes} | time: {result[4]} -->")
                    


--> epoch 5, dataset_percent 0.05, mask 0.05
 sizes 10
[0] [ --> windowed_dataset ]
[0]  [ _get_enc_input ] X is a DataFrame, X~(440, 1) | window_sizes 0, n_window_sizes 10
[0]  [ _get_enc_input ] X is a DataFrame | Selecting Fourier's dominant frequences
[0] [ --> Find_dominant_window_sizes_list ]
[0]  [ Find_dominant_window_sizes_list ] X ~ (440, 1)
[0]  [ Find_dominant_window_sizes_list ] Grouping sizes
[0] [Find_dominant_window_sizes_list --> ]
[0]  [ windowed_dataset ] X is a DataFrame | Window sizes: 9
[0]  [ windowed_dataset ] Building the windows
[0]  [ windowed_dataset ] Number of windows: 9
[0] [windowed_dataset --> ]


100% 13/13 [00:02<00:00,  4.65it/s]
100% 5/5 [00:00<00:00, 32.51it/s]


TypeError: fine_tune_moment_eval_() got an unexpected keyword argument 'num_epochs'

In [None]:
#| export
if verbose > 0: print("Execution ended")
from dvats.imports import beep
beep(1)
beep(1)
beep(1)
beep(1)
beep(1)

In [None]:
#| hide
if reset_kernel:
    import os
    os._exit(00)