# Getting the embeddings

> This notebook gets the embeddings (or latent space) from a multivariate time series 
given by a encoder (e.g., autoencoder).

In [1]:
model_patch_size = 8
verbose          = 0
reset_kernel     = False

In [2]:
from dvats.all import *
from tsai.data.preparation import SlidingWindow
from fastcore.all import *
import wandb
wandb_api = wandb.Api()
from yaml import load, FullLoader
import dvats.utils as ut



[?2004l
Octave is ready <oct2py.core.Oct2Py object at 0x7f8924574220>
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l
[?2004l


In [3]:
import torch
torch.cuda.set_device(0)

## Config parameters
> Configuration parameters are obtained from 'config\03-embeddings.yaml'

### Get configuration artifact

In [4]:
config, job_type = get_artifact_config_embeddings(verbose = 0)

In [5]:
dvats.config.show_attrdict(config)

use_wandb: True
wandb_group: embeddings
wandb_entity: mi-santamaria
wandb_project: deepvats
enc_artifact: mi-santamaria/deepvats/zeroshot-moirai-small:latest
input_ar: None
cpu: False


### Show configuration artifact

In [6]:
for key, value in config.items():
    print(f"{key}: {value}")

use_wandb: True
wandb_group: embeddings
wandb_entity: mi-santamaria
wandb_project: deepvats
enc_artifact: mi-santamaria/deepvats/zeroshot-moirai-small:latest
input_ar: None
cpu: False


## Build W&B artifact

In [7]:
import os
path = os.path.expanduser("~/work/nbs_pipeline/")
name="03a_embeddings"
os.environ["WANDB_NOTEBOOK_NAME"] = path+name+".ipynb"
runname=name
print("runname: "+runname)

runname: 03a_embeddings


In [8]:
run = wandb.init(
    entity      = config.wandb_entity,
    project     = config.wandb_project if config.use_wandb else 'work-nbs', 
    group       = config.wandb_group,
    job_type    = job_type,
    mode        = 'online' if config.use_wandb else 'disabled',
    anonymous   = 'never'  if config.use_wandb else 'must',
    config      = config,
    resume      = 'allow',
    name        = runname
)

[34m[1mwandb[0m: Currently logged in as: [33mmi-santamaria[0m. Use [1m`wandb login --relogin`[0m to force relogin


## Get trained model artifact

### Build artifact selector
> Botch to use artifacts offline

In [9]:
artifacts_gettr = run.use_artifact if config.use_wandb else wandb_api.artifact

### Get the model from W&B
> Restore the encoder model and its associated configuration

In [10]:
enc_artifact = artifacts_gettr(config.enc_artifact, type='learner')

In [11]:
# TODO: This only works when you run it two timeS! WTF?
try:
    enc_learner = enc_artifact.to_obj()
except:
    enc_learner = enc_artifact.to_obj()

[34m[1mwandb[0m: Downloading large artifact zeroshot-moirai-small:latest, 52.94MB. 1 files... 
[34m[1mwandb[0m:   1 of 1 files downloaded.  
Done. 0:0:0.2


## Get dataset artifact from W&B
### Restore the dataset artifact used for training the encoder. 
> Even if we do not compute the dimensionality reduction over this dataset, we need to know the metadata of the encoder training set, to check that it matches with the dataset that we want to reduce.

In [12]:
enc_run            = enc_artifact.logged_by()
enc_artifact_train = artifacts_gettr(
                        enc_run.config['train_artifact'], 
                        type='dataset'
                    )
enc_artifact_train.name

'gtrends_khols-normalized_yearly:v0'

In [13]:
dvats.config.show_attrdict(enc_run.config)

r: 0.4
w: 17
alias: gtrends_khols-normalized_yearly
epochs: 200
mvp_ws: [12, 17]
stride: 1
mask_sync: False
use_wandb: True
batch_size: 16
valid_size: 0.2
mask_future: True
wandb_group: None
analysis_mode: online
mask_stateful: False
norm_by_sample: False
train_artifact: mi-santamaria/deepvats/gtrends_khols-normalized_yearly:v0
valid_artifact: None
norm_use_single_batch: False


### Specify the dataset artifact that we want to get the embeddings from
> If no artifact is defined, the artifact to reduce will be the one used for validate the encoder.

In [14]:
enc_run.config['batch_size']

16

In [15]:
input_ar_name = ifnone(
    config.input_ar, 
    f'{enc_artifact_train.entity}/{enc_artifact_train.project}/{enc_artifact_train.name}'
)
wandb.config.update({'input_ar': input_ar_name}, allow_val_change=True)
input_ar = artifacts_gettr(input_ar_name)
input_ar.name

'gtrends_khols-normalized_yearly:v0'

In [16]:
df = input_ar.to_df()
df.head()

[34m[1mwandb[0m:   1 of 1 files downloaded.  


Unnamed: 0,volume
2004-01-01,0.090912
2004-01-08,0.090912
2004-01-15,0.090912
2004-01-22,0.0
2004-01-29,0.0


In [17]:
df.shape

(440, 1)

In [18]:
enc_run.config['w'] = 54
enc_run.config['stride'] = 2

In [19]:
enc_input, _ = SlidingWindow(window_len=enc_run.config['w'], 
                             stride=enc_run.config['stride'], 
                             get_y=[])(df)
enc_input.shape

(194, 1, 54)

In [20]:
timer = ut.Time()
timer.start()

1737366461.5566237

In [21]:
config.enc_artifact

'mi-santamaria/deepvats/zeroshot-moirai-small:latest'

In [22]:
print(enc_learner)

MoiraiModule(
  (mask_encoding): Embedding(1, 384)
  (scaler): PackedStdScaler()
  (in_proj): MultiInSizeLinear(in_features_ls=[8, 16, 32, 64, 128], out_features=384, bias=True, dtype=torch.float32)
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-5): 6 x TransformerEncoderLayer(
        (self_attn): GroupedQueryAttention(
          (var_attn_bias): BinaryAttentionBias(
            (emb): Embedding(2, 6)
          )
          (time_qk_proj): QueryKeyProjection(
            (query_proj): RotaryProjection()
            (key_proj): RotaryProjection()
          )
          (q_proj): Linear(in_features=384, out_features=384, bias=False)
          (k_proj): Linear(in_features=384, out_features=384, bias=False)
          (v_proj): Linear(in_features=384, out_features=384, bias=False)
          (q_norm): RMSNorm(normalized_shape=(64,), eps=1e-05, weight=True)
          (k_norm): RMSNorm(normalized_shape=(64,), eps=1e-05, weight=True)
          (out_proj): Linear(in_features=

In [23]:
enc_learn_class = str(enc_learner.__class__)[8:-2]

match enc_learn_class:
    case "momentfm.models.moment.MOMENTPipeline":
        get_embs_kwargs = {
            "cpu": config.cpu,
            "to_numpy": True,
            "verbose": 1
        }
    case "fastai.learner.Learner":
        get_embs_kwargs = {
            "stride": enc_run.config['stride'],
            "cpu": config.cpu,
            "to_numpy": True,
            "batch_size": enc_run.config['batch_size'],
            "average_seq_dim": True,
            "verbose": 1
        }
    case "uni2ts.model.moirai.module.MoiraiModule":
        get_embs_kwargs = {
            "cpu": config.cpu,
            "to_numpy": True,
            "batch_size": enc_run.config['batch_size'],
            "average_seq_dim": True,
            "verbose": 1,
            "patch_size": 8, #Modificar en config (añadir en base.yml & modificar lectura a "si existe, añadir"),
            "size": "small", #Modificar en config (añadir en base.yml & modificar lectura a "si existe, añadir"),
            "time": True
        }
    case _:
        print(f"Model embeddings implementation is not yet implemented for {enc_learn_class}.")

In [24]:
enc_learn_class

'uni2ts.model.moirai.module.MoiraiModule'

In [25]:
enc_learn_class = str(enc_learner.__class__)[8:-2]
enc_learn_class

'uni2ts.model.moirai.module.MoiraiModule'

In [26]:
match enc_learn_class:
    case "momentfm.models.moment.MOMENTPipeline":
        get_embs_kwargs = {
            "batch_size": enc_input.shape[0],
            "cpu"       : config.cpu,
            "to_numpy"  : True,
            "verbose"   : 1,
            "padd_step" : 10
        }
    case "fastai.learner.Learner":
        get_embs_kwargs = {
            "cpu"            : config.cpu,
            "to_numpy"       : True,
            "batch_size"     : enc_run.config['batch_size'],
            "average_seq_dim": True,
            "verbose"        : 4
        }
    case "uni2ts.model.moirai.module.MoiraiModule":
        get_embs_kwargs = {
            "cpu"            : config.cpu,
            "to_numpy"       : True,
            "batch_size"     : enc_run.config['batch_size'],
            "average_seq_dim": True,
            "verbose"        : 2,
            "patch_size"     : model_patch_size, #Modificar en config (añadir en base.yml & modificar lectura a "si existe, añadir"),
            "time"           : True
        }
    case _:
        print(f"Model embeddings implementation is not yet implemented for {enc_learn_class}.")
print(f"Enc learn class {enc_learn_class}\nkwargs: {get_embs_kwargs}")

Enc learn class uni2ts.model.moirai.module.MoiraiModule
kwargs: {'cpu': False, 'to_numpy': True, 'batch_size': 16, 'average_seq_dim': True, 'verbose': 2, 'patch_size': 8, 'time': True}


In [27]:
from fastai.losses import MSELossFlat
from dvats.encoder import MAELossFlat, EvalMSE, EvalRMSE, EvalMAE, EvalSMAPE
beep(1)
beep(1)
beep(1)
beep(1)
beep(1)

In [28]:
result = fine_tune(
    X                             = df,
    enc_learn                     = enc_learner, 
    stride                        = 1,      
    batch_size                    = enc_run.config['batch_size'],
    cpu                           = config['cpu'], 
    to_numpy                      = False, 
    verbose                       = 5, 
    time_flag                     = True,
    n_windows                     = None,
    n_windows_percent             = 0.8, #Ventanas a tener en cuenta
    window_mask_percent           = enc_run.config['r'],
    training_percent              = 0.3, # Entreno con parte de los datos
    validation_percent            = 0.3, # Evalúo con parte de los datos
    num_epochs                    = 5,
    shot                          = True,
    eval_pre                      = True,
    eval_post                     = True,
    lr                            = enc_run.config['r'],
    #lr_scheduler_flag             = True, #Don't work in mvp
    lr_scheduler_flag             = False,
    lr_scheduler_name             = "cosine_with_restarts",
    lr_scheduler_num_warmup_steps = None,
    window_sizes                  = None,
    n_window_sizes                = 3,
    full_dataset                  = True,
    window_sizes_offset           = 0.05,
    windows_min_distance          = 5,
    print_to_path                 = False,
    print_path                    ="~/data/logs.txt",
    print_mode                    = 'w',
    use_moment_masks              = False,
    mask_stateful                 = enc_run.config['mask_stateful'],
    mask_future                   = enc_run.config['mask_future'],
    mask_sync                     = enc_run.config['mask_sync'],
    analysis_mode                 = enc_run.config['analysis_mode'],
    use_wandb                     = enc_run.config['use_wandb'],
    norm_by_sample                = enc_run.config['norm_by_sample'],
    norm_use_single_batch         = enc_run.config['norm_use_single_batch'],
    show_plot                     = True,
    # mvp
    #metrics                       = [MSELossFlat, RMSELossFlat, SMAPELossFlat, MAELossFlat],
    # moment/moirai
    metrics                        = [EvalMSE, EvalRMSE, EvalMAE, EvalSMAPE],
    metrics_args                   = [{'squared': False}, {'squared': True}, {}, {}],
    metrics_names                  = ["mse", "rmse", "mae", "smape"],
    metrics_dict                   = None
    
)

[5] [ --> _get_encoder ]
[5]  [ _get_encoder ] About to exec _get_enc_input
[5] [ --> _get_enc_input ]
[5]  [ _get_enc_input ] is none enc_input? True
[5]  [ _get_enc_input ] About to get the windows
[5] [ --> windowed_dataset ]
[5]  [ _get_enc_input ] X is a DataFrame, X~(440, 1) | window_sizes 0, n_window_sizes 3
[5]  [ _get_enc_input ] X is a DataFrame | Selecting Fourier's dominant frequences
[5] [ --> Find_dominant_window_sizes_list ]
[5]  [ Find_dominant_window_sizes_list ] X ~ (440, 1)
[5]  [ Find_dominant_window_sizes_list ] Get sizes for var 0
[5] [ --> find_dominant_window_sizes_list_single ]
[5]  [ Find_dominant_window_sizes_list ] X ~ (440,)
[5]  [ Find_dominant_window_sizes_list ] Looking for - at most - the best 3 window sizes
[5]  [ Find_dominant_window_sizes_list ] Offset 0.05 max size: 22.0
[5]  [ Find_dominant_window_sizes_list ] --> Freqs
[5]  [ Find_dominant_window_sizes_list ] Find_dominant_window_sizes_list | Freqs [ 0.          0.00227273  0.00454545  0.00681818 

  0% 0/7 [00:00<?, ?it/s]

[3] --> get_enc_embs_moirai
[3] get_enc_embs_moirai | Using CUDA
[3] get_enc_embs_moirai | Get Outputs
--> get_enc_embs_moirai | past_target ~ torch.Size([16, 17, 1])
--> get_enc_embs_moirai | past_observed_target ~ torch.Size([16, 17, 1])
--> get_enc_embs_moirai | past_is_pad ~ torch.Size([16, 17])
--> get_enc_embs_moirai | Auxiliar model
--> get_enc_embs_moirai | Auxiliar model | Before Memory:
GPU | Used mem: 0
GPU | Used mem: 48
GPU | Memory Usage: [[90m--------------------[0m] [90m0%[0m
--> get_enc_embs_moirai | Auxiliar model | After Memory:
GPU | Used mem: 0
GPU | Used mem: 48
GPU | Memory Usage: [[90m--------------------[0m] [90m0%[0m
--> get_enc_embs_moirai | Convert sizes
get_enc_embs_moirai | target ~ torch.Size([16, 4, 128])
get_enc_embs_moirai | observed_mask ~ torch.Size([16, 4, 128])
get_enc_embs_moirai | sample_id ~ torch.Size([16, 4])
get_enc_embs_moirai | time_id ~ torch.Size([16, 4])
get_enc_embs_moirai | variate_id ~ torch.Size([16, 4])
get_enc_embs_moirai |

RuntimeError: The size of tensor a (384) must match the size of tensor b (128) at non-singleton dimension 2

In [None]:
( 
    losses, 
    eval_results_pre, eval_results_post, 
    t_shots, t_shot, 
    t_evals, t_eval, model
) = result
print("Eval results pre:")
show_attrdict(eval_results_pre)
print("Eval results post:")
print(eval_results_post)
len(eval_results_post)
#show_attrdict(eval_results_post)

In [None]:
# Cambiar en el momento que todo se haga con clases
enc = Encoder(mssg = Mssg(level = -1, verbose = 1))
enc.eval_stats_pre = eval_results_pre
enc.eval_stats_post = eval_results_post
enc.num_epochs = 5
print(enc.eval_stats_pre)
print(enc.eval_stats_post)
plot_eval_stats(enc)

In [None]:
embs = get_enc_embs_set_stride_set_batch_size(
    X          = enc_input, 
    enc_learn  = enc_learner, 
    stride     = enc_run.config['stride'],
    **get_embs_kwargs
)

In [None]:
embs.shape

In [None]:
#enc_learner.task_name

In [None]:
timer.end()
timer.show()

In [None]:
#| export
if verbose > 0: print("Execution ended")
from dvats.imports import beep
beep(1)
beep(1)
beep(1)
beep(1)
beep(1)

In [None]:
#| hide
if reset_kernel:
    import os
    os._exit(00)