In [1]:
#hide
%load_ext autoreload
%autoreload 2

# Encoder - MVP

> Self supervised learning masked value prediction as a way to create the embeddings

In [2]:
from tsai.all import *
from tchub.all import *
import wandb
from fastai.callback.wandb import WandbCallback

In [None]:
config = AttrDict(
    use_wandb = True, # Whether to use or not wandb for experiment tracking
    wandb_group = None, # Whether to group this run in a wandb group
    wandb_entity = 'vrodriguezf90',
    wandb_project = 'tchub',
    train_artifact = 'taxi:v4',
    valid_artifact = 'taxi:v4',
    w = 48, # window size for the sliding window
    stride = 1, # n datapoints the window is moved ahead along the sequence in the sliding window
    output_filter_size = 10,
    batch_size = 256,
    epochs = 50
)

In [None]:
#hide
run = wandb.init(entity = config.wandb_entity,
                      # work-nbs is a place to log draft runs
                      project=config.wandb_project if config.use_wandb else 'work-nbs',
                      group=config.wandb_group,
                      job_type='encoder_MVP',
                      allow_val_change=True,
                      mode='online', # Need to be always online to download artifacts
                      config=config,
                      # When use_wandb is false the run is not linked to a personal account
                      #NOTE: This is not working right now
                      anonymous = 'never' if config.use_wandb else 'must',
                      resume=False)
config = run.config  # Object for storing hyperparameters

In [None]:
train_artifact = run.use_artifact(f'{config.wandb_project}/{config.train_artifact}')
valid_artifact = run.use_artifact(f'{config.wandb_project}/{config.valid_artifact}')
df_train = train_artifact.to_df()
df_val = valid_artifact.to_df()
df_train.shape, df_val.shape

In [None]:
sw = SlidingWindow(window_len=config.w, stride=config.stride, get_y=[])
X_train, _ = sw(df_train)
X_valid, _ = sw(df_val)

print(f'Training shape: {X_train.shape}')
print(f'Validation shape: {X_valid.shape}')

In [None]:
# y is not needed, but something is breaking in the dataloaders if we do not incldue it
X, _, splits  = combine_split_data(xs=[X_train, X_valid])
splits

In [None]:
tfms = [ToFloat(), None]
batch_tfms = [TSStandardize(by_sample=True)]
dls = get_ts_dls(X, splits=splits, tfms=tfms, batch_tfms=batch_tfms)
dls.show_at(0)

In [None]:
learn = ts_learner(dls, InceptionTimePlus, 
                   cbs=[ShowGraph(), 
                        WandbCallback(log_preds=False),
                        MVP(r = 0.5, target_dir='./models', fname=f'encoder_MVP')])
lr_valley, lr_steep = learn.lr_find(suggest_funcs=(valley, steep))
learn.fit_one_cycle(n_epoch=50, lr_max=lr_valley)

In [None]:
learn.MVP.show_preds(sharey=True)

In [None]:
# Log the learner without the datasets
aux_learn = learn.export_and_get()
run.log_artifact(ReferenceArtifact(aux_learn, f'learner-mvp'), aliases=f'run-{run.project}-{run.id}')

In [None]:
run.finish()