In [1]:
import os

import torch
from datasets import ClassLabel, load_dataset, load_metric
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer, DataCollatorWithPadding

os.environ["WANDB_DISABLED"] = "true"

In [2]:
raw_datasets = load_dataset('csv', data_files='dataset.csv')['train'].train_test_split(test_size=0.1)
raw_datasets

Using custom data configuration default-ef1d1d836b21f3db
Reusing dataset csv (/home/veselin/.cache/huggingface/datasets/csv/default-ef1d1d836b21f3db/0.0.0/433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519)


  0%|          | 0/1 [00:00<?, ?it/s]

DatasetDict({
    train: Dataset({
        features: ['Unnamed: 0', 'video_id', 'text', 'label'],
        num_rows: 193716
    })
    test: Dataset({
        features: ['Unnamed: 0', 'video_id', 'text', 'label'],
        num_rows: 21525
    })
})

In [3]:
tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)

  0%|          | 0/194 [00:00<?, ?ba/s]

  0%|          | 0/22 [00:00<?, ?ba/s]

In [4]:
cleaned_datasets = tokenized_datasets.remove_columns(['Unnamed: 0', 'text', 'video_id'])
cleaned_datasets = cleaned_datasets.class_encode_column('label')
train_dataset = cleaned_datasets['train']
test_dataset = cleaned_datasets['test']

small_train_dataset = train_dataset.shuffle(seed=42).select(range(1000))
small_eval_dataset = test_dataset.shuffle(seed=42).select(range(1000))

Casting to class labels:   0%|          | 0/194 [00:00<?, ?ba/s]

Casting the dataset:   0%|          | 0/20 [00:00<?, ?ba/s]

Casting to class labels:   0%|          | 0/22 [00:00<?, ?ba/s]

Casting the dataset:   0%|          | 0/3 [00:00<?, ?ba/s]

In [5]:
torch.cuda.empty_cache()
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
training_args = TrainingArguments(output_dir="trained", per_device_train_batch_size=6, save_total_limit=5, save_steps=5_000)
metric = load_metric("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return metric.compute(predictions=predictions, references=labels)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    compute_metrics=compute_metrics,
)

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing DistilBertForSequenceClassification: ['vocab_transform.weight', 'vocab_projector.bias', 'vocab_layer_norm.bias', 'vocab_projector.weight', 'vocab_transform.bias', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'pre_classifier.bias', 'pre_classifi

In [6]:
trainer.train()

***** Running training *****
  Num examples = 193716
  Num Epochs = 3
  Instantaneous batch size per device = 6
  Total train batch size (w. parallel, distributed & accumulation) = 6
  Gradient Accumulation steps = 1
  Total optimization steps = 96858


Step,Training Loss
500,0.3401
1000,0.2842
1500,0.2716
2000,0.249
2500,0.2804
3000,0.2629
3500,0.2271
4000,0.2198
4500,0.2384
5000,0.2249


Saving model checkpoint to trained/checkpoint-5000
Configuration saved in trained/checkpoint-5000/config.json
Model weights saved in trained/checkpoint-5000/pytorch_model.bin
Saving model checkpoint to trained/checkpoint-10000
Configuration saved in trained/checkpoint-10000/config.json
Model weights saved in trained/checkpoint-10000/pytorch_model.bin
Saving model checkpoint to trained/checkpoint-15000
Configuration saved in trained/checkpoint-15000/config.json
Model weights saved in trained/checkpoint-15000/pytorch_model.bin
Saving model checkpoint to trained/checkpoint-20000
Configuration saved in trained/checkpoint-20000/config.json
Model weights saved in trained/checkpoint-20000/pytorch_model.bin


RuntimeError: CUDA error: the launch timed out and was terminated
CUDA kernel errors might be asynchronously reported at some other API call,so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.

In [6]:
trained = AutoModelForSequenceClassification.from_pretrained('./trained/checkpoint-20000')
i = 0
for example in small_eval_dataset.remove_columns('label'):
    print(tokenizer.decode(example['input_ids']))
    example = { k: torch.tensor([v]) for k, v in example.items() }
    outputs = trained(**example)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    torch.set_printoptions(precision=30)
    print(predictions)
    i += 1
    if i > 5:
        break

[CLS] something that i begin within 24 hours of a new sets release and our newest set magic the gathering adventures in the forgotten realms of dungeons and dragons mtg set trademark just released and i realized oh no i better start my predictions for the fall set before preview for that set inevitably begins i didn't even get to make a prediction video for the dungeons and dragons magic set because previews began erupting all over the place so soon after modern horizons 2 that it was simply pointless for me to do so this video won't even be able to be edited and up until maybe midweek so for all i know a whole ton of previews for innistrad will already have begun i mean it's been a week without previews gotta get rolling on the next set right no time to enjoy this set hey remember call time remember strictsaven strictsaven no neither do i i mean previews seem to have unintentionally started with the possible oopsie of registered trademarks being discovered for a return to dominaria an

tensor([[0.002715835114941000938415527344, 0.997284173965454101562500000000]],
       grad_fn=<SoftmaxBackward0>)
[CLS] - okay. probably. - get in the box. the giveaway link is down in the description, and huge thanks to t - mobile and oneplus for putting this on. and thanks to dez for helping me out. - you're welcome! - thanks a ton for watching. i'll see you around. [SEP] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD]

tensor([[0.935834825038909912109375000000, 0.064165204763412475585937500000]],
       grad_fn=<SoftmaxBackward0>)


In [9]:
import pandas as pd
import itertools

videos = list(itertools.islice(pd.read_json('../data.1.json.gz', orient='record', compression='infer').itertuples(index=False, name=None), 10))

In [27]:
def window(seq, n):
    it = iter(seq)
    result = tuple(itertools.islice(it, n))
    if len(result) == n:
        yield result
    for elem in it:
        result = result[1:] + (elem,)
        yield result
        
for captions in window(videos[2][1], 5):
    text = ' '.join((caption['text'] for caption in captions))
    inputs = tokenize_function({ 'text': text })
    inputs = { k: torch.tensor([v]) for k, v in inputs.items() }
    outputs = trained(**inputs)
    non_sponsor, sponsor = torch.nn.functional.softmax(outputs.logits, dim=-1).tolist()[0]
    
    print({ 'non_sponsor': '%.2f' % non_sponsor, 'sponsor': '%.2f' % sponsor }, text)
    

{'non_sponsor': '0.99', 'sponsor': '0.01'} oh my god  i'm holding my life back in my hands
 right now it feels like this is the only
 good thing that being severely anemic
has ever done for me
{'non_sponsor': '0.99', 'sponsor': '0.01'}  i'm holding my life back in my hands
 right now it feels like this is the only
 good thing that being severely anemic
has ever done for me [Music]
{'non_sponsor': '0.99', 'sponsor': '0.01'} i'm holding my life back in my hands
 right now it feels like this is the only
 good thing that being severely anemic
has ever done for me [Music] 
{'non_sponsor': '0.99', 'sponsor': '0.01'} right now it feels like this is the only
 good thing that being severely anemic
has ever done for me [Music]  okay wait actually before we go to

{'non_sponsor': '0.99', 'sponsor': '0.01'} good thing that being severely anemic
has ever done for me [Music]  okay wait actually before we go to
 boston i want to tell you guys about the

{'non_sponsor': '0.00', 'sponsor': '1.00'} [Mus

{'non_sponsor': '0.99', 'sponsor': '0.01'} and current is the only place where that
 is happening other cool things about
 current is that they have over 55 000
 free atms for you to use they give you
 instant refunds on gas station holes

{'non_sponsor': '0.98', 'sponsor': '0.02'} is happening other cool things about
 current is that they have over 55 000
 free atms for you to use they give you
 instant refunds on gas station holes
 they work with all the major money

{'non_sponsor': '0.45', 'sponsor': '0.55'} current is that they have over 55 000
 free atms for you to use they give you
 instant refunds on gas station holes
 they work with all the major money
 transferring services like google pay

{'non_sponsor': '0.98', 'sponsor': '0.02'} free atms for you to use they give you
 instant refunds on gas station holes
 they work with all the major money
 transferring services like google pay
 apple pay venmo

{'non_sponsor': '0.99', 'sponsor': '0.01'} instant refunds on gas station hole

{'non_sponsor': '0.99', 'sponsor': '0.01'} me rory and johnny all decided to come
 home to boston this week just to get a
 little bit of a break
 and see our families but another reason
 why i wanted to come home

{'non_sponsor': '0.99', 'sponsor': '0.01'} home to boston this week just to get a
 little bit of a break
 and see our families but another reason
 why i wanted to come home
 is because boston announced that all of
the
{'non_sponsor': '0.99', 'sponsor': '0.01'} little bit of a break
 and see our families but another reason
 why i wanted to come home
 is because boston announced that all of
the the

{'non_sponsor': '0.99', 'sponsor': '0.01'} and see our families but another reason
 why i wanted to come home
 is because boston announced that all of
the the
 arenas and sports and concert venues are

{'non_sponsor': '0.99', 'sponsor': '0.01'} why i wanted to come home
 is because boston announced that all of
the the
 arenas and sports and concert venues are
 opening back up for sp

{'non_sponsor': '0.99', 'sponsor': '0.01'} got my drink i got
 a grande vanilla sweet cream cold brew
 with white mocha and caramel syrup yes
 so much sugar and crap in it but
 whatever i do not like the taste of

{'non_sponsor': '0.99', 'sponsor': '0.01'} a grande vanilla sweet cream cold brew
 with white mocha and caramel syrup yes
 so much sugar and crap in it but
 whatever i do not like the taste of
 coffee at all so i like to drown it out
with
{'non_sponsor': '0.99', 'sponsor': '0.01'} with white mocha and caramel syrup yes
 so much sugar and crap in it but
 whatever i do not like the taste of
 coffee at all so i like to drown it out
with with

{'non_sponsor': '0.99', 'sponsor': '0.01'} so much sugar and crap in it but
 whatever i do not like the taste of
 coffee at all so i like to drown it out
with with
 a million different things this is

{'non_sponsor': '0.99', 'sponsor': '0.01'} whatever i do not like the taste of
 coffee at all so i like to drown it out
with with
 a million 

{'non_sponsor': '0.99', 'sponsor': '0.01'} of people at an arena again like i just
 i'm still like what the [ __ ] was that
 was the past year blur
 and just seeing the arena at only 12 i
 was just like what the [ __ ] is going on

{'non_sponsor': '0.99', 'sponsor': '0.01'} i'm still like what the [ __ ] was that
 was the past year blur
 and just seeing the arena at only 12 i
 was just like what the [ __ ] is going on
 but that was so

{'non_sponsor': '0.99', 'sponsor': '0.01'} was the past year blur
 and just seeing the arena at only 12 i
 was just like what the [ __ ] is going on
 but that was so
 liberating and i cannot wait for you all

{'non_sponsor': '0.99', 'sponsor': '0.01'} and just seeing the arena at only 12 i
 was just like what the [ __ ] is going on
 but that was so
 liberating and i cannot wait for you all
 to experience that too like if sports

{'non_sponsor': '0.99', 'sponsor': '0.01'} was just like what the [ __ ] is going on
 but that was so
 liberating and i cannot 

{'non_sponsor': '0.99', 'sponsor': '0.01'} my dad and aiden but now tonight is
 about my mom
 i'm going out with her and her friends
 the ladies are going to go get some
drinks drinks

{'non_sponsor': '0.99', 'sponsor': '0.01'} about my mom
 i'm going out with her and her friends
 the ladies are going to go get some
drinks drinks
 let me show you the view of the hotel

{'non_sponsor': '0.99', 'sponsor': '0.01'} i'm going out with her and her friends
 the ladies are going to go get some
drinks drinks
 let me show you the view of the hotel
 room right now i'm sure you really care

{'non_sponsor': '0.95', 'sponsor': '0.05'} the ladies are going to go get some
drinks drinks
 let me show you the view of the hotel
 room right now i'm sure you really care
 but i personally appreciate it so i want

{'non_sponsor': '0.98', 'sponsor': '0.02'} drinks
 let me show you the view of the hotel
 room right now i'm sure you really care
 but i personally appreciate it so i want
 you guys to appreciate wi

{'non_sponsor': '0.99', 'sponsor': '0.01'} yeah it's going to bathroom we're
 currently going on a shopping spree
 where are we going first
 brandy because kayla's having me we love
 that live oh i'm having a meet and greet

{'non_sponsor': '0.99', 'sponsor': '0.01'} currently going on a shopping spree
 where are we going first
 brandy because kayla's having me we love
 that live oh i'm having a meet and greet
 no in the most literal sense two weeks

{'non_sponsor': '0.99', 'sponsor': '0.01'} where are we going first
 brandy because kayla's having me we love
 that live oh i'm having a meet and greet
 no in the most literal sense two weeks
 ago i bought every single color

{'non_sponsor': '0.99', 'sponsor': '0.01'} brandy because kayla's having me we love
 that live oh i'm having a meet and greet
 no in the most literal sense two weeks
 ago i bought every single color
 zip up that brandy has now i have six of

{'non_sponsor': '0.99', 'sponsor': '0.01'} that live oh i'm having a meet and

{'non_sponsor': '0.99', 'sponsor': '0.01'} so yeah we're doing that and i'm going
 to go make a mistake somewhere so we'll
 keep you updated on the place that i
 choose to make mistake at i almost just
 ran into a poll

{'non_sponsor': '0.99', 'sponsor': '0.01'} to go make a mistake somewhere so we'll
 keep you updated on the place that i
 choose to make mistake at i almost just
 ran into a poll
 james where are you going jc smoothie

{'non_sponsor': '0.99', 'sponsor': '0.01'} keep you updated on the place that i
 choose to make mistake at i almost just
 ran into a poll
 james where are you going jc smoothie
 are you coming with me oh we've never

{'non_sponsor': '0.99', 'sponsor': '0.01'} choose to make mistake at i almost just
 ran into a poll
 james where are you going jc smoothie
 are you coming with me oh we've never
 been there before but we must have run
hey guys let's go
{'non_sponsor': '0.99', 'sponsor': '0.01'} ran into a poll
 james where are you going jc smoothie
 are you c

{'non_sponsor': '0.99', 'sponsor': '0.01'} accidentally moved over like right
 and it was just 22. yeah yeah oh oh miss
 girl ole miss girl
 mommy tell us what you did we went up
 from the last mistake that i made and

{'non_sponsor': '0.99', 'sponsor': '0.01'} and it was just 22. yeah yeah oh oh miss
 girl ole miss girl
 mommy tell us what you did we went up
 from the last mistake that i made and
 today i made two mistakes at once

{'non_sponsor': '0.99', 'sponsor': '0.01'} girl ole miss girl
 mommy tell us what you did we went up
 from the last mistake that i made and
 today i made two mistakes at once
 and now i'm going to a steakhouse i'm

{'non_sponsor': '0.99', 'sponsor': '0.01'} mommy tell us what you did we went up
 from the last mistake that i made and
 today i made two mistakes at once
 and now i'm going to a steakhouse i'm
 also going to steakhouse i'll be there

{'non_sponsor': '0.99', 'sponsor': '0.01'} from the last mistake that i made and
 today i made two mistakes at on

{'non_sponsor': '0.99', 'sponsor': '0.01'} mistakes that have been made this is
 very unlike me
 i bought one thing that i have been
 begging for for years from myself and
 myself finally let myself have it so it

{'non_sponsor': '0.99', 'sponsor': '0.01'} very unlike me
 i bought one thing that i have been
 begging for for years from myself and
 myself finally let myself have it so it
 was a moment but first drum roll here we
go
{'non_sponsor': '0.99', 'sponsor': '0.01'} i bought one thing that i have been
 begging for for years from myself and
 myself finally let myself have it so it
 was a moment but first drum roll here we
go 
{'non_sponsor': '0.99', 'sponsor': '0.01'} begging for for years from myself and
 myself finally let myself have it so it
 was a moment but first drum roll here we
go  look at the packaging why do i have this

{'non_sponsor': '0.99', 'sponsor': '0.01'} myself finally let myself have it so it
 was a moment but first drum roll here we
go  look at the packaging 

{'non_sponsor': '0.99', 'sponsor': '0.01'} deserve them should have gotten a
 manicure first
 i was just about to here they are
 the next step manicure yes let's just
 get on that they only would let me have

{'non_sponsor': '0.99', 'sponsor': '0.01'} manicure first
 i was just about to here they are
 the next step manicure yes let's just
 get on that they only would let me have
 one shoe again

{'non_sponsor': '0.99', 'sponsor': '0.01'} i was just about to here they are
 the next step manicure yes let's just
 get on that they only would let me have
 one shoe again
 the other one's in a bag but i just

{'non_sponsor': '0.99', 'sponsor': '0.01'} the next step manicure yes let's just
 get on that they only would let me have
 one shoe again
 the other one's in a bag but i just
 don't feel like unboxing that one so

{'non_sponsor': '0.99', 'sponsor': '0.01'} get on that they only would let me have
 one shoe again
 the other one's in a bag but i just
 don't feel like unboxing that one so
 t

{'non_sponsor': '0.99', 'sponsor': '0.01'} the only reason why i didn't black out
 at the cash register is because i think
 about this bag 24 7 and if i didn't get
 it now i would have got it at another
 point in time because this is the only

{'non_sponsor': '0.99', 'sponsor': '0.01'} at the cash register is because i think
 about this bag 24 7 and if i didn't get
 it now i would have got it at another
 point in time because this is the only
 bag i think about because because it's
gorgina
{'non_sponsor': '0.99', 'sponsor': '0.01'} about this bag 24 7 and if i didn't get
 it now i would have got it at another
 point in time because this is the only
 bag i think about because because it's
gorgina gorgina

{'non_sponsor': '0.99', 'sponsor': '0.01'} it now i would have got it at another
 point in time because this is the only
 bag i think about because because it's
gorgina gorgina
 that's james's favorite word for those

{'non_sponsor': '0.99', 'sponsor': '0.01'} point in time because thi

{'non_sponsor': '0.95', 'sponsor': '0.05'} here we go
here we are give it up for us  me putting applause in the background
 without further ado we're now going to
go to the palm and black out right mike 
{'non_sponsor': '0.95', 'sponsor': '0.05'}  me putting applause in the background
 without further ado we're now going to
go to the palm and black out right mike  all right mike what are you getting at

{'non_sponsor': '0.95', 'sponsor': '0.05'} me putting applause in the background
 without further ado we're now going to
go to the palm and black out right mike  all right mike what are you getting at
 the palm tonight the most expensive

{'non_sponsor': '0.99', 'sponsor': '0.01'} without further ado we're now going to
go to the palm and black out right mike  all right mike what are you getting at
 the palm tonight the most expensive
 steak on the menu

{'non_sponsor': '0.99', 'sponsor': '0.01'}  all right mike what are you getting at
 the palm tonight the most expensive
 steak on the m

{'non_sponsor': '0.99', 'sponsor': '0.01'} not even funny i needed that
 so bad and coming home is just so fun
 now and the fact that everyone just
 got to be all together last night just
 made me so [ __ ] happy i wanted to

{'non_sponsor': '0.99', 'sponsor': '0.01'} so bad and coming home is just so fun
 now and the fact that everyone just
 got to be all together last night just
 made me so [ __ ] happy i wanted to
 change my flight so bad and stay here

{'non_sponsor': '0.99', 'sponsor': '0.01'} now and the fact that everyone just
 got to be all together last night just
 made me so [ __ ] happy i wanted to
 change my flight so bad and stay here
 longer but like i

{'non_sponsor': '0.99', 'sponsor': '0.01'} got to be all together last night just
 made me so [ __ ] happy i wanted to
 change my flight so bad and stay here
 longer but like i
 can't that sucks but yeah i'm sitting in

{'non_sponsor': '0.99', 'sponsor': '0.01'} made me so [ __ ] happy i wanted to
 change my flight so bad 

{'non_sponsor': '0.99', 'sponsor': '0.01'} and go right home and return my rental
 car and i didn't get to go to like
 brunch with all my friends i barely even
 got to say bye to everyone well i said
 bye to everyone last night but like i

{'non_sponsor': '0.99', 'sponsor': '0.01'} car and i didn't get to go to like
 brunch with all my friends i barely even
 got to say bye to everyone well i said
 bye to everyone last night but like i
 didn't get to do anything fun today and

{'non_sponsor': '0.99', 'sponsor': '0.01'} brunch with all my friends i barely even
 got to say bye to everyone well i said
 bye to everyone last night but like i
 didn't get to do anything fun today and
 now i'm just like sad that this is how

{'non_sponsor': '0.99', 'sponsor': '0.01'} got to say bye to everyone well i said
 bye to everyone last night but like i
 didn't get to do anything fun today and
 now i'm just like sad that this is how
 my video is ending like it's just over

{'non_sponsor': '0.99', 'sponso

{'non_sponsor': '0.94', 'sponsor': '0.06'} from i don't know
 thank you guys for [ __ ] everything
 and i will see you in my next
 video april like kind of might be fun
 like i don't know like we'll see i have

{'non_sponsor': '0.99', 'sponsor': '0.01'} thank you guys for [ __ ] everything
 and i will see you in my next
 video april like kind of might be fun
 like i don't know like we'll see i have
 some things planned that i think you

{'non_sponsor': '0.99', 'sponsor': '0.01'} and i will see you in my next
 video april like kind of might be fun
 like i don't know like we'll see i have
 some things planned that i think you
 guys will like but

{'non_sponsor': '0.99', 'sponsor': '0.01'} video april like kind of might be fun
 like i don't know like we'll see i have
 some things planned that i think you
 guys will like but
 we'll [ __ ] see no more peace signs i

{'non_sponsor': '0.99', 'sponsor': '0.01'} like i don't know like we'll see i have
 some things planned that i think you
 guys

In [37]:
tc_tokenizer = AutoTokenizer.from_pretrained('dennlinger/roberta-cls-consec')
tc_model = AutoModelForSequenceClassification.from_pretrained('dennlinger/roberta-cls-consec')

Downloading:   0%|          | 0.00/40.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.11k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/878k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/446k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/150 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/478M [00:00<?, ?B/s]

Some weights of the model checkpoint at dennlinger/roberta-cls-consec were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [45]:
def group(iterable, n, fillvalue=None):
    args = [iter(iterable)] * n
    return itertools.zip_longest(fillvalue=fillvalue, *args)
        
for left_captions, right_captions in group(group(videos[2][1], 2), 2, []):
    left_text = ' '.join((caption['text'] for caption in left_captions))
    right_text = ' '.join((caption['text'] for caption in right_captions))
    inputs = tc_tokenizer(left_text, right_text)
    inputs = { k: torch.tensor([v]) for k, v in inputs.items() }
    outputs = tc_model(**inputs)
    non_sponsor, sponsor = torch.nn.functional.softmax(outputs.logits, dim=-1).tolist()[0]
    
    print({ 'not_same': '%.2f' % non_sponsor, 'same': '%.2f' % sponsor }, left_text, right_text)
    

{'not_same': '0.00', 'same': '1.00'} oh my god  i'm holding my life back in my hands
 right now it feels like this is the only

{'not_same': '0.21', 'same': '0.79'} good thing that being severely anemic
has ever done for me [Music]  okay wait actually before we go to

{'not_same': '0.28', 'same': '0.72'} boston i want to tell you guys about the
 sponsor today's video which is current i
 mean it's a way that i'm paying for this
 entire trip in general so it might as

{'not_same': '0.03', 'same': '0.97'} well tell you about how i'm doing this
 in the first place
 while wearing their awesome hat that
 they sent me as well current is the new

{'not_same': '0.00', 'same': '1.00'} way to bank it is truly the future i
 mean i feel like we're all always
 dreaming of what the future holds and
 honestly it is you with this car

{'not_same': '0.02', 'same': '0.98'} current is a mobile bank with a visa
 debit card and a real bank account with
 no hidden fees and no minimum balance
 requirement cur

{'not_same': '0.00', 'same': '1.00'} my dad and aiden but now tonight is
 about my mom
 i'm going out with her and her friends
 the ladies are going to go get some
drinks
{'not_same': '0.01', 'same': '0.99'} drinks
 let me show you the view of the hotel
 room right now i'm sure you really care
 but i personally appreciate it so i want

{'not_same': '0.00', 'same': '1.00'} you guys to appreciate with me we've got
 the highway we've got the garden right
 there oh yeah i lost a nail last night
 so that's really sad oh my god my nails

{'not_same': '0.03', 'same': '0.97'} so gross actually don't look at it it's
 kind of gross out kind of annoyed about
that that
 but yeah awesome 10 out of 10 scenery

{'not_same': '0.00', 'same': '1.00'} probably talk to you guys tomorrow
 because i'm really [ __ ] excited about
 that that's what i'm going to see
 all of my friends and go out to eat and

{'not_same': '0.00', 'same': '1.00'} get a hotel and go shopping and
 everything so let's just [ __ ] cu

{'not_same': '0.00', 'same': '1.00'} manicure first
 i was just about to here they are
 the next step manicure yes let's just
 get on that they only would let me have

{'not_same': '0.03', 'same': '0.97'} one shoe again
 the other one's in a bag but i just
 don't feel like unboxing that one so
 that's what i have ready to take on the

{'not_same': '0.01', 'same': '0.99'} summer with these shoes
 i'm destroying the box with water really
 good inspirational things oh
wait look at the real prize 
{'not_same': '0.01', 'same': '0.99'} are you kidding me oh wow i have to pull
 myself together and you did buy
 something else
 i did buy something else i bitched about

{'not_same': '0.00', 'same': '1.00'} this item in this bag for years
 yes now i do not buy designer bags often
 i truly only bought one which is my
 iconic balenciaga that i bought in
hawaii
{'not_same': '0.05', 'same': '0.95'} hawaii
 fall 2019. which one are you wearing
 tonight i think this
 this bag has been sold out for a ve

{'not_same': '0.01', 'same': '0.99'} don't know it just feels really good to
 just like
 reconnect and just remember where i came
 from i don't know

{'not_same': '0.01', 'same': '0.99'} thank you guys for [ __ ] everything
 and i will see you in my next
 video april like kind of might be fun
 like i don't know like we'll see i have

{'not_same': '0.31', 'same': '0.69'} some things planned that i think you
 guys will like but
 we'll [ __ ] see no more peace signs i
 need to end this video i love you guys

{'not_same': '0.01', 'same': '0.99'} so much oh my god i went to a sports
 game in this video holy [ __ ]
[ __ ]  bring that freaks out i put choices
through


TypeError: 'NoneType' object is not subscriptable