In [1]:
%cd ..

/misc/vlgscratch4/LakeGroup/wentao/multimodal-baby


In [2]:
import itertools
import functools
import numpy as np
import torch
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import seaborn as sns
from multimodal.multimodal_data_module import PAD_TOKEN_ID, UNK_TOKEN_ID, SOS_TOKEN_ID, EOS_TOKEN_ID, SOS_TOKEN, EOS_TOKEN
from ngram import NGramModel
from analysis_tools.processing import examples_from_dataloader, get_pos_tags
from analysis_tools.build_data import build_data


figsize = (13, 12)
sns.set_theme(
    style='white',
    font='monospace',
    rc={
        'figure.figsize': figsize,
        'axes.grid': False,
        'xtick.bottom': False,
        'xtick.top': False,
        'ytick.left': False,
        'ytick.right': False,
        'axes.spines.left': False,
        'axes.spines.bottom': False,
        'axes.spines.right': False,
        'axes.spines.top': False,
    }
)

np.set_printoptions(suppress=True, precision=2, linewidth=120)
pd.options.display.width = 120

2022-07-10 10:57:34 INFO: Loading these models for language: en (English):
| Processor | Package  |
------------------------
| tokenize  | combined |
| pos       | combined |

2022-07-10 10:57:35 INFO: Use device: gpu
2022-07-10 10:57:35 INFO: Loading: tokenize
2022-07-10 10:57:35 INFO: Loading: pos
2022-07-10 10:57:38 INFO: Done loading processors!


In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# build data and vocab according to the model
data, args = build_data(return_args=True)
dataset_name = args.dataset
vocab = data.datasets['train'].vocab
vocab_size = len(vocab)
print(f'vocab_size = {vocab_size}')
word2idx = vocab
idx2word = {idx: word for word, idx in word2idx.items()}

  rank_zero_deprecation(


Using base transforms
Calling prepare_data!
SAYCam transcripts have already been downloaded. Skipping this step.
Transcripts have already been renamed. Skipping this step.
Transcripts have already been preprocessed. Skipping this step.
Training frames have already been extracted. Skipping this step.
Training metadata files have already been created. Skipping this step.
Evaluation frames have already been extracted. Skipping this step.
Evaluation metadata files have already been created. Skipping this step.
Evaluation metadata files have already been created. Skipping this step.
Extra evaluation metadata files have already been created. Skipping this step.
Vocabulary file already exists. Skipping this step.
Calling setup!
vocab_size = 2350


In [4]:
my_batch_size = 256
dataloader_fns = {
    'train': lambda: data.train_dataloader(batch_size=my_batch_size, shuffle=False, drop_last=False),
    'val': lambda: data.val_dataloader(batch_size=my_batch_size)[0],
    'test': lambda: data.test_dataloader(batch_size=my_batch_size)[0],
}


splits = ['val']

for split in splits:
    dataloader_fn = dataloader_fns[split]
    pos_tags = get_pos_tags(dataloader_fn(), dataset_name, split)

    for (x, y, y_len, raw_y), y_pos_tags in zip(examples_from_dataloader(dataloader_fn()), pos_tags):
        y_len = y_len.item()
        y = y[:y_len].tolist()
        y_tokens = [idx2word[token_id] for token_id in y]
        raw_y = raw_y[0]
        #print(' '.join(f'{token}-{pos}' for token, pos in zip(raw_y.split(), y_pos_tags[1:])))
        for i, pos in enumerate(y_pos_tags):
            if i >= y_len:
                break
            if pos.startswith('NN') or pos.startswith('VB'):
                print(' '.join('_' * len(token) if j == i else token for j, token in enumerate(y_tokens) if token not in [SOS_TOKEN, EOS_TOKEN]))

load cached pos tags: dataset_cache/saycam/val.pos.cache
i ____ .
we should ____ on some lights , huh ?
we should turn on some ______ , huh ?
here , ___ me show you .
here , let me ____ you .
yeah it __ this this one that 's really
yeah it 's this this ___ that 's really
yeah it 's this this one that __ really
what __ wrong with it ? "
we should _______ our <unk> in case it <unk> .
we should brought our <unk> in ____ it <unk> .
we should brought our <unk> in case it _____ .
that __ right !
here i should __ it
we can __ walk over there if you want .
we can go ____ over there if you want .
we can go walk over there if you ____ .
yeah , he __ dancing , he is wiggling , and he is <unk> .
yeah , he is _______ , he is wiggling , and he is <unk> .
yeah , he is dancing , he __ wiggling , and he is <unk> .
yeah , he is dancing , he is ________ , and he is <unk> .
yeah , he is dancing , he is wiggling , and he __ <unk> .
yeah , he is dancing , he is wiggling , and he is _____ .
i __ coming .
i '

there __ yummy milk , the baby ...
there 's yummy ____ , the baby ...
there 's yummy milk , the ____ ...
a ____ .
you can ____ the yogurt and the cupcake .
you can feel the ______ and the cupcake .
you can feel the yogurt and the _______ .
yeah , ____ is talking too .
yeah , toby __ talking too .
yeah , toby is _______ too .
______ is not for sam .
bottle __ not for sam .
bottle is not for ___ .
____ at me .
____ at all these plants and weeds .
look at all these ______ and weeds .
look at all these plants and _____ .
that __ the house for the animals .
that 's the _____ for the animals .
that 's the house for the _______ .
he _____ like you , sam .
he looks like you , ___ .
i _____ that 's all .
i think that __ all .
there you __ !
__ you want me to put a ribbon on the other dummy ?
do you ____ me to put a ribbon on the other dummy ?
do you want me to ___ a ribbon on the other dummy ?
do you want me to put a ______ on the other dummy ?
do you want me to put a ribbon on the other _____ 

all ____ !
how __ that ?
and we will __ the <unk> as well .
and we will do the _____ as well .
i __ n't know when one will come .
i do n't ____ when one will come .
i do n't know when one will ____ .
___ , he is cute .
see , he __ cute .
____ at that .
you ___ taking all the colors off one by one , are n't you ?
you 're ______ all the colors off one by one , are n't you ?
you 're taking all the ______ off one by one , are n't you ?
you 're taking all the colors off one by one , ___ n't you ?
_____ <unk> .
<unk> _____ .
___ .
a ____ and a pig .
a duck and a ___ .
that ____ with that .
uh oh , the ____ opened .
uh oh , the door ______ .
that __ right !
can you ___ me a big ball ?
can you get me a big ____ ?
i __ n't know what you 're saying sam .
i do n't ____ what you 're saying sam .
i do n't know what you ___ saying sam .
i do n't know what you 're ______ sam .
i do n't know what you 're saying ___ .
i __ gon na give you some .
i 'm ___ na give you some .
i 'm gon na ____ you some .
_

to ____ it open you have to push this in
to make it open you ____ to push this in
to make it open you have to ____ this in
what __ wrong ?
__ it all gone ?
is it all ____ ?
__ you want me to <unk> do you want to do it by youself
do you ____ me to <unk> do you want to do it by youself
do you want me to _____ do you want to do it by youself
do you want me to <unk> __ you want to do it by youself
do you want me to <unk> do you ____ to do it by youself
do you want me to <unk> do you want to __ it by youself
we ______ it .
" you ___ standing , you are , yeah "
" you are ________ , you are , yeah "
" you are standing , you ___ , yeah "
they _____ , did n't they ?
they broke , ___ n't they ?
__ you remember where your pants are ?
do you ________ where your pants are ?
do you remember where your _____ are ?
do you remember where your pants ___ ?
___ , he is gone .
ohp , he __ gone .
ohp , he is ____ .
____ some grapefruit ?
want some __________ ?
where __ the doggie ?
where 's the ______ ?
her