# Getting the embeddings

> This notebook gets the embeddings (or latent space) from a multivariate time series 
given by a encoder (e.g., autoencoder).

In [1]:
model_patch_size = 8
verbose = 0
reset_kernel = False

In [2]:
from dvats.all import *
from tsai.data.preparation import SlidingWindow
from fastcore.all import *
import wandb
wandb_api = wandb.Api()
from yaml import load, FullLoader
import dvats.utils as ut

[?2004l
Octave is ready <oct2py.core.Oct2Py object at 0x7f20eeb50dc0>
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l


In [3]:
import torch
torch.cuda.set_device(1)

## Config parameters
> Configuration parameters are obtained from 'config\03-embeddings.yaml'

### Get configuration artifact

In [4]:
config, job_type = get_artifact_config_embeddings(verbose = 0)

In [5]:
dvats.config.show_attrdict(config)

use_wandb: True
wandb_group: embeddings
wandb_entity: mi-santamaria
wandb_project: deepvats
enc_artifact: mi-santamaria/deepvats/moment-small-embedding:latest
input_ar: None
cpu: False


### Show configuration artifact

In [6]:
for key, value in config.items():
    print(f"{key}: {value}")

use_wandb: True
wandb_group: embeddings
wandb_entity: mi-santamaria
wandb_project: deepvats
enc_artifact: mi-santamaria/deepvats/moment-small-embedding:latest
input_ar: None
cpu: False


## Build W&B artifact

In [7]:
import os
path = os.path.expanduser("~/work/nbs_pipeline/")
name="03a_embeddings"
os.environ["WANDB_NOTEBOOK_NAME"] = path+name+".ipynb"
runname=name
print("runname: "+runname)

runname: 03a_embeddings


In [8]:
run = wandb.init(
    entity      = config.wandb_entity,
    project     = config.wandb_project if config.use_wandb else 'work-nbs', 
    group       = config.wandb_group,
    job_type    = job_type,
    mode        = 'online' if config.use_wandb else 'disabled',
    anonymous   = 'never' if config.use_wandb else 'must',
    config      = config,
    resume      = 'allow',
    name        = runname
)

[34m[1mwandb[0m: Currently logged in as: [33mmi-santamaria[0m. Use [1m`wandb login --relogin`[0m to force relogin


## Get trained model artifact

### Build artifact selector
> Botch to use artifacts offline

In [9]:
artifacts_gettr = run.use_artifact if config.use_wandb else wandb_api.artifact

### Get the model from W&B
> Restore the encoder model and its associated configuration

In [10]:
enc_artifact = artifacts_gettr(config.enc_artifact, type='learner')

In [11]:
# TODO: This only works when you run it two timeS! WTF?
try:
    enc_learner = enc_artifact.to_obj()
except:
    enc_learner = enc_artifact.to_obj()

[34m[1mwandb[0m: Downloading large artifact moment-small-embedding:latest, 144.63MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.5


## Get dataset artifact from W&B
### Restore the dataset artifact used for training the encoder. 
> Even if we do not compute the dimensionality reduction over this dataset, we need to know the metadata of the encoder training set, to check that it matches with the dataset that we want to reduce.

In [12]:
enc_run = enc_artifact.logged_by()
enc_artifact_train = artifacts_gettr(enc_run.config['train_artifact'], type='dataset')
enc_artifact_train.name

'toy:v2'

In [13]:
dvats.config.show_attrdict(enc_run.config)

r: 0.71
w: 30
freq: 1s
alias: toy
epochs: 100
mvp_ws: [10, 30]
stride: 1
time_col: None
data_cols: []
mask_sync: False
use_wandb: True
batch_size: 32
csv_config: {}
data_fpath: ~/data/toy.csv
valid_size: 0.2
mask_future: False
wandb_group: None
analysis_mode: online
artifact_name: toy
mask_stateful: True
norm_by_sample: False
train_artifact: mi-santamaria/deepvats/toy:latest
valid_artifact: None
norm_use_single_batch: False
norm_use_by_single_batch: [False]


### Specify the dataset artifact that we want to get the embeddings from
> If no artifact is defined, the artifact to reduce will be the one used for validate the encoder.

In [14]:
enc_run.config['batch_size']

32

In [15]:
input_ar_name = ifnone(
    config.input_ar, 
    f'{enc_artifact_train.entity}/{enc_artifact_train.project}/{enc_artifact_train.name}'
)
wandb.config.update({'input_ar': input_ar_name}, allow_val_change=True)
input_ar = artifacts_gettr(input_ar_name)
input_ar.name

'toy:v2'

In [16]:
df = input_ar.to_df()
df.head()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


Unnamed: 0,T3,T2,T1
1970-01-01 00:00:00,0.741822,0.63718,0.565117
1970-01-01 00:00:01,0.739731,0.629415,0.493513
1970-01-01 00:00:02,0.718757,0.53922,0.46935
1970-01-01 00:00:03,0.730169,0.57767,0.4441
1970-01-01 00:00:04,0.752406,0.57018,0.373008


In [17]:
df.shape

(550, 3)

In [18]:
enc_input, _ = SlidingWindow(window_len=enc_run.config['w'], 
                             stride=enc_run.config['stride'], 
                             get_y=[])(df)
enc_input.shape

(521, 3, 30)

In [19]:
timer = ut.Time()
timer.start()

1729704161.470847

In [20]:
config.enc_artifact

'mi-santamaria/deepvats/moment-small-embedding:latest'

In [21]:
print(enc_learner)

MOMENTPipeline(
  (normalizer): RevIN()
  (tokenizer): Patching()
  (patch_embedding): PatchEmbedding(
    (value_embedding): Linear(in_features=8, out_features=512, bias=False)
    (position_embedding): PositionalEmbedding()
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): T5Stack(
    (embed_tokens): Embedding(32128, 512)
    (block): ModuleList(
      (0): T5Block(
        (layer): ModuleList(
          (0): T5LayerSelfAttention(
            (SelfAttention): T5Attention(
              (q): Linear(in_features=512, out_features=384, bias=False)
              (k): Linear(in_features=512, out_features=384, bias=False)
              (v): Linear(in_features=512, out_features=384, bias=False)
              (o): Linear(in_features=384, out_features=512, bias=False)
              (relative_attention_bias): Embedding(32, 6)
            )
            (layer_norm): T5LayerNorm()
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (1): T5LayerFF(
            (

In [22]:
enc_learn_class = str(enc_learner.__class__)[8:-2]

match enc_learn_class:
    case "momentfm.models.moment.MOMENTPipeline":
        get_embs_kwargs = {
            "cpu": config.cpu,
            "to_numpy": True,
            "verbose": 1
        }
    case "fastai.learner.Learner":
        get_embs_kwargs = {
            "stride": enc_run.config['stride'],
            "cpu": config.cpu,
            "to_numpy": True,
            "batch_size": enc_run.config['batch_size'],
            "average_seq_dim": True,
            "verbose": 1
        }
    case "uni2ts.model.moirai.module.MoiraiModule":
        get_embs_kwargs = {
            "cpu": config.cpu,
            "to_numpy": True,
            "batch_size": enc_run.config['batch_size'],
            "average_seq_dim": True,
            "verbose": 1,
            "patch_size": 8, #Modificar en config (añadir en base.yml & modificar lectura a "si existe, añadir"),
            "size": "small", #Modificar en config (añadir en base.yml & modificar lectura a "si existe, añadir"),
            "time": True
        }
    case _:
        print(f"Model embeddings implementation is not yet implemented for {enc_learn_class}.")

In [23]:
enc_learn_class

'momentfm.models.moment.MOMENTPipeline'

In [24]:
enc_learn_class = str(enc_learner.__class__)[8:-2]
enc_learn_class

'momentfm.models.moment.MOMENTPipeline'

In [25]:
match enc_learn_class:
    case "momentfm.models.moment.MOMENTPipeline":
        get_embs_kwargs = {
            "batch_size": enc_input.shape[0],
            "cpu": config.cpu,
            "to_numpy": True,
            "verbose": 1,
            "padd_step":2
        }
    case "fastai.learner.Learner":
        get_embs_kwargs = {
            "stride": enc_run.config['stride'],
            "cpu": config.cpu,
            "to_numpy": True,
            "batch_size": enc_run.config['batch_size'],
            "average_seq_dim": True,
            "verbose": 1
        }
    case "uni2ts.model.moirai.module.MoiraiModule":
        get_embs_kwargs = {
            "cpu": config.cpu,
            "to_numpy": True,
            "batch_size": enc_run.config['batch_size'],
            "average_seq_dim": True,
            "verbose": 2,
            "patch_size": model_patch_size, #Modificar en config (añadir en base.yml & modificar lectura a "si existe, añadir"),
            "time": True
        }
    case _:
        print(f"Model embeddings implementation is not yet implemented for {enc_learn_class}.")
print(f"Enc learn class {enc_learn_class}\nkwargs: {get_embs_kwargs}")

Enc learn class momentfm.models.moment.MOMENTPipeline
kwargs: {'batch_size': 521, 'cpu': False, 'to_numpy': True, 'verbose': 1, 'padd_step': 2}


In [26]:
ut.print_flush("patata", "xd")

In [27]:
? print_flush

[0;31mSignature:[0m
 [0mprint_flush[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mmssg[0m[0;34m:[0m [0mstr[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprint_to_path[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprint_path[0m[0;34m:[0m [0mstr[0m [0;34m=[0m [0;34m'~/data/logs/logs.txt'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprint_mode[0m[0;34m:[0m [0mstr[0m [0;34m=[0m [0;34m'a'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mverbose[0m[0;34m:[0m [0mint[0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprint_time[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mprint_both[0m[0;34m:[0m [0mbool[0m [0;34m=[0m [0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m**[0m[0mkwargs[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m <no docstring>
[0;31mFile:[0m      ~

In [None]:
( 
    losses, 
    eval_results_pre, eval_results_post, 
    t_shots, t_shot, 
    t_evals, t_eval
) = fine_tune_moment_(
    X                             = df,
    enc_learn                     = enc_learner, 
    stride                        = 1,      
    batch_size                    = 128,
    cpu                           = True, 
    to_numpy                      = True, 
    verbose                       = 0, 
    time_flag                     = True,
    #n_windows                     = 32,
    n_windows_percent             = 0.8, # Enmascaro el parte del entrenamiento
    training_percent              = 0.3, # Entreno con parte de los datos
    validation_percent            = 0.3, # Evalúo con parte de los datos
    num_epochs                    = 10,
    shot                          = True,
    eval_pre                      = True,
    eval_post                     = True,
    lr_scheduler_flag             = False,
    lr_scheduler_name             = False,
    lr_scheduler_num_warmup_steps = 0,
    window_sizes                  = None,
    n_window_sizes                = 3,
    window_sizes_offset           = 0.05,
    windows_min_distance          = 5,
    print_to_path                 = False,
    print_mode                    = 'w'
)

[0] fine_tune_moment_single | Eval Pre | wlen 17


  0%|          | 0/2 [00:00<?, ?it/s]

[0] fine_tune_moment_single | Train | wlen 17


  0%|          | 0/20 [00:00<?, ?it/s]

fine_tune_moment_step | Execution failed | Output none 
fine_tune_moment_train | batch 1 ~ torch.Size([1, 3, 17]) | epoch 0 | train 1 of 20 | Loss backward failed: 'int' object has no attribute 'item'
fine_tune_moment_step | Execution failed | Output none 
fine_tune_moment_train | batch 1 ~ torch.Size([1, 3, 17]) | epoch 1 | train 2 of 20 | Loss backward failed: 'int' object has no attribute 'item'
fine_tune_moment_step | Execution failed | Output none 
fine_tune_moment_train | batch 1 ~ torch.Size([1, 3, 17]) | epoch 2 | train 3 of 20 | Loss backward failed: 'int' object has no attribute 'item'
fine_tune_moment_step | Execution failed | Output none 
fine_tune_moment_train | batch 1 ~ torch.Size([1, 3, 17]) | epoch 3 | train 4 of 20 | Loss backward failed: 'int' object has no attribute 'item'
fine_tune_moment_step | Execution failed | Output none 
fine_tune_moment_train | batch 1 ~ torch.Size([1, 3, 17]) | epoch 4 | train 5 of 20 | Loss backward failed: 'int' object has no attribute 'i

  0%|          | 0/2 [00:00<?, ?it/s]

[0] fine_tune_moment_single | Train | wlen 10


  0%|          | 0/20 [00:00<?, ?it/s]

[0] fine_tune_moment_single | Eval Post | wlen 10


  0%|          | 0/2 [00:00<?, ?it/s]

[0] fine_tune_moment_single | Train | wlen 23


  0%|          | 0/10 [00:00<?, ?it/s]

In [None]:
print(losses)
print(eval_results_pre)
print(eval_results_post)
print(t_shots)
print(t_shot)
print(t_evals)
print(t_eval)

In [None]:
embs = get_enc_embs_set_stride_set_batch_size(
    X          = enc_input, 
    enc_learn  = enc_learner, 
    stride     = enc_run.config['stride'],
    **get_embs_kwargs
)

In [None]:
enc_learner.task_name

In [None]:
timer.end()
timer.show()

In [None]:
#| export
if verbose > 0: print("Execution ended")
from dvats.imports import beep
beep(1)
beep(1)
beep(1)
beep(1)
beep(1)

In [None]:
#| hide
if reset_kernel:
    import os
    os._exit(00)