In [1]:
import sys
import os
import itertools

import numpy as np
import sklearn.metrics

sys.path.append(os.path.dirname(os.path.realpath('.')))
os.environ["WANDB_DISABLED"] = "true"

# Evaluation utility methods

In [2]:
def range_equals(left: 'Tuple[float, float]', right: 'Tuple[float, float]', eps: float) -> bool:
    left_start, left_end = left
    right_start, right_end = right
    
    return (abs(left_start - right_start) <= eps
        and abs(left_end - right_end) <= eps)

def count_range_equals(pairs, eps: float) -> int:
    cnt = 0
    for left, right in pairs:
        if range_equals(left, right, eps):
            cnt += 1
    return cnt

def range_negation(base: 'Tuple[float, float]', ranges: 'List[Tuple[float, float]]') -> 'List[Tuple[float, float]]':
    """
    base:    |-------------|
    ranges:  | ***   **    |
    Return:  |#   ###  ####|
    """
    results = []
    last_end = base[0]
    for r in ranges:
        if last_end != r[0]:
            results.append((last_end, r[0]))
        last_end = r[1]
    if last_end != base[1]:
        results.append((last_end, base[1]))
        
    return results

In [39]:
def create_labels_from_range(captions, sponsor_ranges):
    caption_labels = np.zeros(len(captions), dtype=bool)
    for start_idx, end_idx in sponsor_ranges:
        if start_idx is None or end_idx is None:
            continue
        for i in range(start_idx, end_idx + 1):
            caption_labels[i] = True

    token_labels = []
    for i, caption in enumerate(captions):
        num_tokens = len(caption.text.split())
        token_labels.extend([caption_labels[i]] * num_tokens)
    return token_labels

def create_labels_from_times(captions, sponsor_times):
    ranges = [get_intersection_range(captions, *pair[1]) for pair in sponsor_times]
    return create_labels_from_range(captions, ranges)

def merge_ranges(ranges):
    range_iter = iter(ranges)
    try:
        results = [next(range_iter)]
    except StopIteration:
        return
    for r in range_iter:
        if results[-1][1] == r[0]:
            results.append(r)
        else:
            yield results[0][0], results[-1][1]
            results = [r]

    yield results[0][0], results[-1][1]

def compute_results(videos, model, eps=5):
    from tqdm.auto import tqdm
    
    predicted_labels = np.empty(0)
    actual_labels = np.empty(0)
    # Values for our close match metric (exact match with threshold)
    # Number of maches
    close_matches = 0
    # Number of predicted ranges
    total_predicted_ranges = 0
    
    for video_id, captions, sponsor_ranges in tqdm(videos):
        predicted_sponsor_ranges = []

        for predicted_range in merge_ranges(model.predict(captions)):
            if predicted_range[0] is None or predicted_range[1] is None:
                continue
            
            predicted_sponsor_ranges.append(predicted_range)
            
            if any((range_equals(predicted_range, r, eps) for r in sponsor_ranges)):
                close_matches += 1
            total_predicted_ranges += 1

        predicted_labels = np.append(predicted_labels, create_labels_from_range(captions, predicted_sponsor_ranges))
        actual_labels = np.append(actual_labels, create_labels_from_range(captions, sponsor_ranges))
        
        print(f'\tPredicted={predicted_sponsor_ranges},\n\tExpected={sponsor_ranges}')
        
    return { 'predictions': predicted_labels, 'references': actual_labels, 'exact_match': close_matches, 'predicted_ranges': total_predicted_ranges }

def evaluate(videos, model, eps=1):
    from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, precision_recall_curve, roc_curve
    
    outputs = compute_results(videos, model, eps)
    predictions = outputs['predictions']
    references = outputs['references']
    exact_matches = outputs['exact_match']
    total_predictions = outputs['predicted_ranges']
    
    exact_match_score = exact_matches / total_predictions
    print(f'Exact match (within ±{eps} captions)', exact_match_score)
    print('\tConfusion matrix', confusion_matrix(predictions, references))
    print('\tAccuracy', accuracy_score(predictions, references))
    print('\tPrecision', precision_score(predictions, references))
    print('\tRecall', recall_score(predictions, references))
    print('\tP@R', precision_recall_curve(predictions, references))
    print('\tRoC', roc_curve(predictions, references))

# Load videos to evaluate

In [40]:
from data_loader import load_captions_from_chunks

test_videos = list(itertools.islice(load_captions_from_chunks('data', './', [16]), 1, 20))

[34mOpening ./data.16.json.gz for reading...[0m


Dropping YzgTMh21zhI because sponsor times do not match the captions
Dropping yzhnRt6ZDKM because sponsor times do not match the captions
Dropping YZhVE7X0zwk because sponsor times do not match the captions
Dropping yzJokj2gelY because sponsor times do not match the captions
Dropping YzMAxdSdkzo because sponsor times do not match the captions
Dropping YZMrBCxarlk because sponsor times do not match the captions


# Evaluate Sequence Classification

In [41]:
from sequence_classification import SponsorSequenceClassification

evaluate(
    videos=test_videos,
    model=SponsorSequenceClassification('distilbert-classification-uncased/checkpoint-7210'),
    eps=1
)

  0%|          | 0/19 [00:00<?, ?it/s]

	Predicted=[(230, 235), (351, 357)],
	Expected=[[21, 46]]
	Predicted=[(264, 270)],
	Expected=[[267, 300]]
	Predicted=[(13, 17), (33, 38), (56, 58)],
	Expected=[[14, 41]]
	Predicted=[(51, 56), (187, 192), (256, 259), (292, 297)],
	Expected=[[187, 231]]
	Predicted=[],
	Expected=[[156, 196]]
	Predicted=[(0, 10), (147, 153), (161, 167), (181, 193)],
	Expected=[[145, 194]]
	Predicted=[],
	Expected=[[9, 32]]
	Predicted=[],
	Expected=[[0, 0]]
	Predicted=[(0, 4), (101, 115), (124, 128)],
	Expected=[[0, 5], [97, 125]]
	Predicted=[(17, 24), (48, 53)],
	Expected=[[20, 58]]
	Predicted=[(0, 4)],
	Expected=[[0, 5]]
	Predicted=[],
	Expected=[[2, 4], [276, 302]]
	Predicted=[(0, 3), (469, 475), (641, 648)],
	Expected=[[0, 1]]
	Predicted=[(0, 3), (869, 873), (880, 884), (891, 895)],
	Expected=[[0, 4]]
	Predicted=[(0, 2)],
	Expected=[[0, 0], [62, 77]]
	Predicted=[(0, 6), (14, 20)],
	Expected=[[2, 28]]
	Predicted=[(129, 134)],
	Expected=[[73, 98]]
	Predicted=[(27, 31)],
	Expected=[[14, 40]]
	Predicted=[(1

# Evaluate Sequence Labelling

In [42]:
from sequence_labelling import SponsorTokenClassification

evaluate(
    videos=test_videos,
    model=SponsorTokenClassification('seq_labelling.model'),
    eps=1
)

  0%|          | 0/19 [00:00<?, ?it/s]

	Predicted=[(350, 362)],
	Expected=[[21, 46]]
	Predicted=[(264, 264), (265, 297)],
	Expected=[[267, 300]]
	Predicted=[(0, 0), (14, 39)],
	Expected=[[14, 41]]
	Predicted=[],
	Expected=[[187, 231]]
	Predicted=[],
	Expected=[[156, 196]]
	Predicted=[(0, 1), (2, 6), (139, 139), (140, 140), (141, 141), (142, 142), (143, 194), (538, 538), (540, 540), (541, 541), (542, 542), (543, 543), (544, 546), (547, 547)],
	Expected=[[145, 194]]
	Predicted=[(14, 14), (15, 28), (29, 30)],
	Expected=[[9, 32]]
	Predicted=[],
	Expected=[[0, 0]]
	Predicted=[(1, 1), (2, 3), (4, 4), (5, 5), (64, 64), (97, 129)],
	Expected=[[0, 5], [97, 125]]
	Predicted=[],
	Expected=[[20, 58]]
	Predicted=[(0, 4)],
	Expected=[[0, 5]]
	Predicted=[(0, 3), (270, 270), (272, 272), (273, 273), (274, 274), (275, 276), (277, 302), (308, 308)],
	Expected=[[2, 4], [276, 302]]
	Predicted=[(463, 463), (469, 517)],
	Expected=[[0, 1]]
	Predicted=[(864, 864), (865, 865), (866, 866), (868, 878), (879, 915), (940, 943), (946, 946), (947, 949), (

# Evaluate Span Extraction
The predict function is not implemented. Some results are available in the corresponding notebook.

In [43]:
from span_extraction import SponsorSpanExtraction

evaluate(
    videos=test_videos,
    model=SponsorSpanExtraction('distilbert-span-extraction-uncased/checkpoint-9000'),
    eps=1
)

  0%|          | 0/19 [00:00<?, ?it/s]

TypeError: 'NotImplementedType' object is not callable

# Demo

In [51]:
transcript = """You know, buying RAM for
your computer isn't so easy.
There's a lot to take into account:
how much to get, what speed you want,
and whether it's worth
paying a few bucks more
to make the inside of your computer
look like it's been slathered
in rainbow sherbet. (slurps)
(smacks lips) Ah! Delicious.
But one specification people
don't talk too much about
is how many ranks your RAM modules have,
and I'm not referring to
how many Steam achievements
you've unlocked with the same RAM kit.
If only.
A memory rank is actually a
single group of memory blocks.
You see, each rank has a 64-bit bus
that connects your RAM
to your motherboard,
and one stick of RAM can have one, two,
or even four ranks on that one stick.
Generally speaking, more ranks are better
because your system's memory controller
can access rank separately
from the others.
While the CPU can't
access every rank at once,
due to the modules themselves
sharing the 64-bit bus,
it can start an operation on one rank
while another rank finishes
up on another task,
a process called interleaving
that can reduce memory response time
and slightly improve bandwidth
despite not increasing the bus width.
But should you care about this?
Are the performance
benefits actually worth
worrying about such an esoteric
topic? (pretentious moan)
It turns out the answer is yes,
especially if you're
running an AMD Ryzen CPU.
Certain games that are
more memory-dependent
can see noticeable performance increases
when running more ranks of memory,
and even some productivity applications,
like file compression
programs, can also benefit.
And there's more good news.
You don't necessarily have to go out
and buy special dual-rank modules
to take advantage of the speed boost.
Many of you probably have
four DIMM slots on your motherboard
that can operate in dual-channel mode.
If you fill all four slots
with single-rank DIMMs,
this is roughly equivalent
to a dual-rank setup in two slots.
If you're only rocking two sticks of RAM,
this is a situation in which you'll want
to have those dual-rank modules.
But how, exactly, do
you know how many ranks
are on your RAM modules?
Ask them?
Although a common explanation
is that single-rank memory
has chips on only one side
while a dual-rank module has
memory chips on both sides,
it can be hard to tell how
many ranks a stick of RAM has
just by glancing at it,
even if it doesn't have a head spreader.
Sometimes modules that appear
to have memory chips on both sides
actually act as one big rank,
and then you have the fact
that trying to figure this out
in your BIOS or in a system utility
doesn't always give you
an accurate answer either.
Some RAM modules will have a 1R or 2R
to indicate whether they're single-
or dual-rank, respectively.
And quad-rank memory is quite
unusual on regular desktops,
so odds are you won't be
seeing too much of it.
If that isn't too helpful,
you might try looking at a spec sheet
or checking out online resources
where users have compiled lists
of single- and dual-rank memory kits.
Now, it also turns out that because
modern memory integrated circuits, or ICs,
tend to be 8-gigabit capacity
as opposed to the older standard of 4,
newer 8-gigabyte sticks
tend to be single rank
while newer 16-gig sticks
tend to be dual-rank.
And if you're confused
about the math there,
one rank is made up of eight ICs.
This means that even if
you won't use all of it,
32 gigabytes of memory
may be the sweet spot for speed today,
though this will keep changing
as IC capacity increases.
But remember that whether
you see real benefits
really depends on your workload,
and more ranks can
paradoxically add more latency,
depending on what you're doing.
So do your research and
see if that trade-off
is worth it for whatever it
is you get up to on your PC.
I don't know what it is,
and frankly, I don't wanna know.
But I think you do wanna
know about our sponsor,
Private Internet Access,
the VPN that masks your IP address
and encrypts your internet traffic.
PIA has reliable service
with over 24,000 servers in 77 countries
and no bandwidth caps.
It has configurable encryption
and an internet kill switch
to keep you in control of
your connection and privacy.
And when you combine it
with private browsing,
you can make websites think
you're in a different country.
Connect up to 10 devices at once
with their clients for Windows,
Mac, Android, iOS and Linux,
and stay protected with MACE,
the built-in malware and tracking blocker.
To try it risk-free for 30 days,
just head to
privateinternetaccess.com/TechQuickie.
Wow! What do you know, that was a video.
Thanks for watching, guys.
Like the video if you liked it.
Dislike it if you disliked it.
Check out our other videos. We got lots.
Comment below with video suggestions.
And don't forget to subscribe and follow.
I don't wanna talk to
you about this again.""".split('\n')

In [55]:
def predict_and_print(model):
    from termcolor import colored
    captions = [{'text': line, 'is_sponsor': False} for line in transcript]
    for start_idx, end_idx in model.predict(captions):
        for i in range(start_idx, end_idx + 1):
            captions[i]['is_sponsor'] = True
    
    for caption in captions:
        color = 'yellow' if caption['is_sponsor'] else None
        print(colored(caption['text'], color))

# Demo of Sequence Labelling
Demonstration of the best performning approach - sequence labelling.

In [56]:
predict_and_print(SponsorTokenClassification('seq_labelling.model'))

You know, buying RAM for[0m
your computer isn't so easy.[0m
There's a lot to take into account:[0m
how much to get, what speed you want,[0m
and whether it's worth[0m
paying a few bucks more[0m
to make the inside of your computer[0m
look like it's been slathered[0m
in rainbow sherbet. (slurps)[0m
(smacks lips) Ah! Delicious.[0m
But one specification people[0m
don't talk too much about[0m
is how many ranks your RAM modules have,[0m
and I'm not referring to[0m
how many Steam achievements[0m
you've unlocked with the same RAM kit.[0m
If only.[0m
A memory rank is actually a[0m
single group of memory blocks.[0m
You see, each rank has a 64-bit bus[0m
that connects your RAM[0m
to your motherboard,[0m
and one stick of RAM can have one, two,[0m
or even four ranks on that one stick.[0m
Generally speaking, more ranks are better[0m
because your system's memory controller[0m
can access rank separately[0m
from the others.[0m
While the CPU can't[0m
access every rank at once