# Getting the embeddings

> This notebook gets the embeddings (or latent space) from a multivariate time series 
given by a encoder (e.g., autoencoder).

In [1]:
model_patch_size = 8
verbose          = 0
reset_kernel     = False

In [2]:
from dvats.all import *
from tsai.data.preparation import SlidingWindow
from fastcore.all import *
import wandb
wandb_api = wandb.Api()
from yaml import load, FullLoader
import dvats.utils as ut



[?2004l
Octave is ready <oct2py.core.Oct2Py object at 0x7fc923bf8250>
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l


In [3]:
import torch
torch.cuda.set_device(0)

## Config parameters
> Configuration parameters are obtained from 'config\03-embeddings.yaml'

### Get configuration artifact

In [4]:
config, job_type = get_artifact_config_embeddings(verbose = 0)

In [5]:
dvats.config.show_attrdict(config)

use_wandb: True
wandb_group: embeddings
wandb_entity: mi-santamaria
wandb_project: deepvats
enc_artifact: mi-santamaria/deepvats/zeroshot-moment-small-embedding:latest
input_ar: None
cpu: False


### Show configuration artifact

### Get the model from W&B
> Restore the encoder model and its associated configuration

In [6]:
entity = 'mi-santamaria'
project = 'deepvats'
folder = entity+'/'+project+'/'
model_family = 'zeroshot-moment'
task = 'embedding'
dataset = 'gtrends_kohls'
dataset_version = 'v2'
enc_artifact_dataset = folder + dataset + ':' + dataset_version
enc_artifact_small = folder + model_family + '-small-' + task + ':v0'
enc_artifact_base  =  folder + model_family + '-base-' + task + ':v0'
enc_artifact_large = folder + model_family + '-large-' + task + ':v0'

In [7]:
print("Getting dataset artifact: ", enc_artifact_dataset)
df_artifact = wandb_api.artifact(enc_artifact_dataset, type = 'dataset')
print("Getting small artifact: ", enc_artifact_small)
enc_artifact_small = wandb_api.artifact(enc_artifact_small, type='learner')
print("Getting base artifact: ", enc_artifact_base)
enc_artifact_base  = wandb_api.artifact(enc_artifact_base, type='learner')
print("Getting large artifact: ", enc_artifact_large)
enc_artifact_large = wandb_api.artifact(enc_artifact_large, type='learner')

Getting dataset artifact:  mi-santamaria/deepvats/gtrends_kohls:v2
Getting small artifact:  mi-santamaria/deepvats/zeroshot-moment-small-embedding:v0
Getting base artifact:  mi-santamaria/deepvats/zeroshot-moment-base-embedding:v0
Getting large artifact:  mi-santamaria/deepvats/zeroshot-moment-large-embedding:v0


In [8]:
print(df_artifact.name)
df = df_artifact.to_df()
display(df.head)
print(df.shape)

gtrends_kohls:v2


[34m[1mwandb[0m:   1 of 1 files downloaded.  


<bound method NDFrame.head of               volume
2004-01-01  0.010417
2004-01-08  0.010417
2004-01-15  0.010417
2004-01-22  0.000000
2004-01-29  0.000000
...              ...
2012-05-03  0.322917
2012-05-10  0.312500
2012-05-17  0.281250
2012-05-24  0.291667
2012-05-31  0.322917

[440 rows x 1 columns]>

(440, 1)


In [9]:
print(enc_artifact_small.name)
enc_learner_small = enc_artifact_small.to_obj()

zeroshot-moment-small-embedding:v0


[34m[1mwandb[0m: Downloading large artifact zeroshot-moment-small-embedding:v0, 144.63MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.4


In [10]:
print(enc_artifact_base.name)
enc_learner_base  = enc_artifact_base.to_obj()

zeroshot-moment-base-embedding:v0


[34m[1mwandb[0m: Downloading large artifact zeroshot-moment-base-embedding:v0, 432.97MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:1.3


In [11]:
print(enc_artifact_large.name)
enc_learner_large = enc_artifact_large.to_obj()

zeroshot-moment-large-embedding:v0


[34m[1mwandb[0m: Downloading large artifact zeroshot-moment-large-embedding:v0, 1321.42MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:4.0


In [12]:
def count_parameters(model):
    #return sum(p.numel() for p in model.parameters() if p.requires_grad)
    return sum(p.numel() for p in model.parameters())
print(count_parameters(enc_learner_small))
print(count_parameters(enc_learner_base))
print(count_parameters(enc_learner_large))

35341512
109641608
341248520


In [13]:
def count_parameters(model):
    return sum(p.numel() for p in model.parameters())
count_parameters(enc_learner_large)

341248520

In [14]:
from fastai.losses import MSELossFlat
from dvats.encoder import MAELossFlat, EvalMSE, EvalRMSE, EvalMAE, EvalSMAPE

In [15]:
enc_input, _ = SlidingWindow(window_len=17, stride=2, get_y=[])(df)
enc_input.shape

(212, 1, 17)

In [16]:
#| export
import dvats.config as cfg_

In [17]:
user, project, version, data, config, job_type = cfg_.get_artifact_config_MVP(False)

In [18]:
print(config['batch_size'])
print(config['r'])
print(config['analysis_mode'])

16
0.4
online


In [19]:
common_args = {
    "X": df,
    "stride": 1,
    "batch_size": config['batch_size'],
    "cpu": False,
    "to_numpy": False,
    "time_flag": True,
    "n_windows": None,
    "n_windows_percent": 0.8, # Comprobando si el None es el problema
    "shot": True,
    "eval_pre": True,
    "eval_post": True,
    "lr": config['r'], #use enc_run lr,
    "lr_scheduler_flag": False,
    "lr_scheduler_name": "cosine_with_restarts",
    "lr_scheduler_num_warmup_steps": None,
    "window_sizes": None,
    "full_dataset": True,
    "window_sizes_offset": 0.05,
    "windows_min_distance": 5, #2.5*enc_input.shape[0]/100,
    "print_to_path": False,
    "print_path": "~/data/logs.txt",
    "print_mode": "w",
    "use_moment_masks": False,
    "mask_stateful": config['mask_stateful'],
    "mask_future": config['mask_future'],
    "mask_sync": config['mask_sync'],
    "analysis_mode": config['analysis_mode'],
    "use_wandb": config['use_wandb'],
    "norm_by_sample": config['norm_by_sample'],
    "norm_use_single_batch": config['norm_use_single_batch'],
    "show_plot": True,
    "metrics": [EvalMSE, EvalRMSE, EvalMAE, EvalSMAPE],
    "metrics_args": [{'squared': False}, {'squared': True}, {}, {}],
    "metrics_names":["mse", "rmse", "mae", "smape"],
    "metrics_dict": None
}

In [20]:
import pandas as pd
results = pd.DataFrame(
    columns = [
        'model_size',
        'n_epochs',
        'dataset_percent',
        'maskared_percent',
        'losses',
        'eval_results_pre',
        'eval_results_post',
        'time'
    ]
)

errors = pd.DataFrame(
    columns = [
        'model_size',
        'n_epochs',
        'dataset_percent',
        'maskared_percent',
        'error'
    ]
)

In [26]:
from copy import deepcopy
i = 0
for n_epochs in [5]:
    for dataset_percent in [0.3]:
        for maskared_percent in [0.4]:
            for sizes in [3]:
                print(f"--> epoch {n_epochs}, dataset_percent {dataset_percent}, mask {maskared_percent}")
                print(f" sizes {sizes}")
                case = {
                        'model_size': "small",
                        'n_epochs': n_epochs,
                        'dataset_percent': dataset_percent,
                        'maskared_percent': maskared_percent,
                       }
                results_dict = deepcopy(case)
                error_dict = deepcopy(case)
                result_dict.update({
                        'losses': np.nan,
                        'eval_results_pre': np.nan, 
                        'eval_results_post': np.nan,
                        'time': np.nan
                    })
                error = False
                result = fine_tune(
                    enc_learn           = enc_learner_small,
                    window_mask_percent = maskared_percent,
                    training_percent    = dataset_percent,
                    validation_percent  = 0.3, #1-dataset_percent if 1-dataset_percent != 0 else 0.3,
                    num_epochs          = n_epochs,
                    n_window_sizes      = sizes,
                    verbose             = 0,
                    register_errors     = True,
                    **common_args    
                )
                internal_errors = result[8]
                result_dict['losses'] = result[0]
                result_dict['eval_results_pre'] = result[1]
                result_dict['eval_results_post'] = result[2]
                result_dict['time'] = result[4]
                results = pd.concat([results, pd.DataFrame([result_dict])], ignore_index=True)
                display(internal_errors)
                # Attach possible errors
                internal_errors['model_size'] = case['model_size']
                internal_errors['n_epochs'] = case['n_epochs']
                internal_errors['dataset_percent'] = case['dataset_percent']
                internal_errors['maskared_percent'] = case['maskared_percent']
                errors = pd.concat([errors, internal_errors])
                print(f"epoch {n_epochs}, dataset_percent {dataset_percent}, mask {maskared_percent}")
                if not error: print(f" sizes {sizes} | time: {result[4]} -->")
                    


--> epoch 5, dataset_percent 0.3, mask 0.4
 sizes 3







  0% 0/7 [00:00<?, ?it/s][A[A[A[A[A




 14% 1/7 [00:02<00:12,  2.08s/it][A[A[A[A[A




 57% 4/7 [00:02<00:01,  2.36it/s][A[A[A[A[A




100% 7/7 [00:02<00:00,  3.04it/s][A[A[A[A[A





  0% 0/30 [00:00<?, ?it/s][A[A[A[A[A




 17% 5/30 [00:00<00:00, 40.67it/s][A[A[A[A[A




 33% 10/30 [00:00<00:00, 40.59it/s][A[A[A[A[A




 50% 15/30 [00:00<00:00, 40.39it/s][A[A[A[A[A




 67% 20/30 [00:00<00:00, 40.41it/s][A[A[A[A[A




 83% 25/30 [00:00<00:00, 40.47it/s][A[A[A[A[A




100% 30/30 [00:00<00:00, 40.36it/s][A[A[A[A[A





  0% 0/7 [00:00<?, ?it/s][A[A[A[A[A




 14% 1/7 [00:02<00:12,  2.06s/it][A[A[A[A[A




 57% 4/7 [00:02<00:01,  2.38it/s][A[A[A[A[A




100% 7/7 [00:02<00:00,  3.07it/s][A[A[A[A[A





  0% 0/30 [00:00<?, ?it/s][A[A[A[A[A




 17% 5/30 [00:00<00:00, 42.36it/s][A[A[A[A[A




 33% 10/30 [00:00<00:00, 42.44it/s][A[A[A[A[A




 50% 15/30 [00:00<00:00, 42.49it/s][A[A[A[A[A





[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 







 23% 7/30 [00:00<00:00, 61.70it/s][A[A[A[A[A

[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 







 47% 14/30 [00:00<00:00, 61.59it/s][A[A[A[A[A

[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 







 70% 21/30 [00:00<00:00, 60.79it/s][A[A[A[A[A

[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 







 93% 28/30 [00:00<00:00, 61.03it/s][A[A[A[A[A

[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 
[-1] fine_tune_moment_train_loop_step_ | Execution failed | Output none 


100% 30/30 [00:00<00:00, 60.84it/s]





  0% 0/7 [00:00<?, ?it/s][A[A[A[A[A

[0] [91m [ fine_tune_moment_single ] Registering error in DataFrame | window: 4 | error: maximum size for tensor at dimension 2 is 4 but size is 8[0m


Unnamed: 0,window,error
0,4,maximum size for tensor at dimension 2 is 4 but size is 8


Unnamed: 0,window,error
0,4,maximum size for tensor at dimension 2 is 4 but size is 8


epoch 5, dataset_percent 0.3, mask 0.4
 sizes 3 | time: 1.9595823287963867 -->


In [28]:
results.head()

Unnamed: 0,model_size,n_epochs,dataset_percent,maskared_percent,losses,eval_results_pre,eval_results_post,time
0,small,5,0.3,0.4,"[[0.0018570800311863422, 0.0015878648264333606, 0.0009209270356222987, 0.002134652342647314, 0.0007532269228249788, 0.0015007125912234187, 0.0016267269384115934, 0.0010450328700244427, 0.0007697815890423954, 0.0015979971503838897, 0.001512599177658558, 0.0030997823923826218, 0.0014228237560018897, 0.0005730511620640755, 0.0011645907070487738, 0.0014733547577634454, 0.0016052743885666132, 0.0034402955789119005, 0.0006994965951889753, 0.0014398995554074645, 0.0011419517686590552, 0.001978500746190548, 0.0017194877145811915, 0.0013799204025417566, 0.0016481078928336501, 0.0015288335271179676,...","{'mse': 0.053732890570543285, 'rmse': 0.0029476201092852572, 'mae': 0.03946589066267938, 'smape': 1.3945297692180116}","{'mse': [0.053732890570543285, 0.05464786522408429], 'rmse': [0.0029476201092852572, 0.0030411378618556244], 'mae': [0.03946589066267938, 0.04058488292743348], 'smape': [1.3945297692180116, 1.519132180334581]}",2.200216
1,small,5,0.3,0.4,"[[0.001213251380249858, 0.0018667237600311637, 0.0009557272423990071, 0.0010845790384337306, 0.0009431237704120576, 0.0005253612180240452, 0.0007654708460904658, 0.0012873475207015872, 0.0012703860411420465, 0.0010890287812799215, 0.0018782730912789702, 6.284558912739158e-05, 0.0009932521497830749, 0.0014940031105652452, 0.0016170875169336796, 0.00048030074685811996, 0.001763221574947238, 0.003082903102040291, 0.001746735768392682, 0.0012614168226718903, 0.0012270922306925058, 0.00096009491244331, 0.0011571724899113178, 0.0003122887574136257, 0.0006992373964749277, 0.0013842410407960415, 0...","{'mse': 0.053732890570543285, 'rmse': 0.0029476201092852572, 'mae': 0.03946589066267938, 'smape': 1.3945297692180116}","{'mse': [0.053732890570543285, 0.05464786522408429], 'rmse': [0.0029476201092852572, 0.0030411378618556244], 'mae': [0.03946589066267938, 0.04058488292743348], 'smape': [1.3945297692180116, 1.519132180334581]}",2.224306
2,small,5,0.3,0.4,"[[0.0012401865096762776, 0.0018391698831692338, 0.0009610585984773934, 0.0010650383774191141, 0.002017770428210497, 0.0029011007864028215, 0.0021462934091687202, 0.0010054085869342089, 0.0013744119787588716, 0.001884149736724794, 0.001041881274431944, 0.00038169813342392445, 0.0011798416962847114, 0.0015886141918599606, 0.002019871724769473, 0.0013093806337565184, 0.0015014841919764876, 0.000382117839762941, 0.0009226393303833902, 0.0013169969897717237, 0.0019916254095733166, 0.00201159855350852, 0.0007617001538164914, 0.0022375488188117743, 0.001764774671755731, 0.0014229132793843746, 0.0...","{'mse': 0.053732890570543285, 'rmse': 0.0029476201092852572, 'mae': 0.03946589066267938, 'smape': 1.3945297692180116}","{'mse': [0.053732890570543285, 0.05464786522408429], 'rmse': [0.0029476201092852572, 0.0030411378618556244], 'mae': [0.03946589066267938, 0.04058488292743348], 'smape': [1.3945297692180116, 1.519132180334581]}",1.967512
3,small,5,0.3,0.4,"[[0.002227999735623598, 0.0017816995969042182, 0.001155250589363277, 0.0015658932970836759, 0.0010266038589179516, 0.0030290682334452868, 0.0014215514529496431, 0.0019348651403561234, 0.0014597258996218443, 0.0019633665215224028, 0.0016010177787393332, 6.699291407130659e-05, 0.0012974406126886606, 0.001756849349476397, 0.0019405761267989874, 0.0019149841973558068, 0.0011365432292222977, 0.0012109093368053436, 0.0013917410979047418, 0.0010956109035760164, 0.002981848083436489, 0.0013810684904456139, 0.0016998689388856292, 0.0013882333878427744, 0.0016452133422717452, 0.002200199756771326, 0...","{'mse': 0.053732890570543285, 'rmse': 0.0029476201092852572, 'mae': 0.03946589066267938, 'smape': 1.3945297692180116}","{'mse': [0.053732890570543285, 0.05464786522408429], 'rmse': [0.0029476201092852572, 0.0030411378618556244], 'mae': [0.03946589066267938, 0.04058488292743348], 'smape': [1.3945297692180116, 1.519132180334581]}",1.962035
4,small,5,0.3,0.4,"[[0.0015740660019218922, 0.001200165948830545, 0.002080070786178112, 0.002357157412916422, 0.0012424372835084796, 0.00013089895946905017, 0.001458610757254064, 0.0014932393096387386, 0.0017465681303292513, 0.0018863596487790346, 0.0013043942162767053, 7.548784196842462e-05, 0.0028164091054350138, 0.0013159075751900673, 0.0015457543777301908, 0.001068626530468464, 0.0010778156574815512, 0.0016223695129156113, 0.0012785454746335745, 0.001608490594662726, 0.001106350333429873, 0.002108949236571789, 0.0019885762594640255, 0.0023730637039989233, 0.0010583888506516814, 0.0017514225328341126, 0.0...","{'mse': 0.053732890570543285, 'rmse': 0.0029476201092852572, 'mae': 0.03946589066267938, 'smape': 1.3945297692180116}","{'mse': [0.053732890570543285, 0.05464786522408429], 'rmse': [0.0029476201092852572, 0.0030411378618556244], 'mae': [0.03946589066267938, 0.04058488292743348], 'smape': [1.3945297692180116, 1.519132180334581]}",1.959582


In [27]:
errors.head()

Unnamed: 0,model_size,n_epochs,dataset_percent,maskared_percent,error,window
0,small,5,0.3,0.4,maximum size for tensor at dimension 2 is 4 but size is 8,4


In [None]:
#| export
if verbose > 0: print("Execution ended")
from dvats.imports import beep
beep(1)
beep(1)
beep(1)
beep(1)
beep(1)

In [None]:
#| hide
if reset_kernel:
    import os
    os._exit(00)