# Evaluation Data Exploration

First load `fho_main.json`

In [None]:
import json

with open("../../../ego4d/v2/annotations/fho_main.json") as f:
    fho_main = json.load(f)


How many structured verbs are there?

In [None]:
from collections import Counter

from eilev.data.ego4d import filter_action

struct_verb_counter = Counter()
freeform_verb_counter = Counter()
no_critical_frames_frames = []
critical_frames_no_frames = []
non_other_struct_and_freeform_verb = 0
for video in fho_main["videos"]:
    for interval in video["annotated_intervals"]:
        for action in interval["narrated_actions"]:
            if action["critical_frames"] is None and action["frames"] is not None:
                no_critical_frames_frames.append(action)
            elif action["critical_frames"] is not None and action["frames"] is None:
                critical_frames_no_frames.append(action)
            if filter_action(action):
                if action["structured_verb"] == "[other]":
                    freeform_verb_counter[action["freeform_verb"].strip().lower()] += 1
                elif action["freeform_verb"] is not None:
                    non_other_struct_and_freeform_verb += 1
                struct_verb_counter[action["structured_verb"]] += 1

print(f"len(no_critical_frames_frames) = {len(no_critical_frames_frames)}")
print(f"len(critical_frames_no_frames) = {len(critical_frames_no_frames)}")
print(f"len(struct_verb_counter) = {len(struct_verb_counter)}")
print(f"len(freeform_verb_counter) = {len(freeform_verb_counter)}")
print(f"non_other_struct_and_freeform_verb = {non_other_struct_and_freeform_verb}")
print("=====structured verbs=========")
for verb, count in struct_verb_counter.items():
    print(f"{verb}: {count}")
print("=====freeform verbs=========")
for verb, count in freeform_verb_counter.items():
    print(f"{verb}: {count}")


How many structured nouns are there?

In [None]:
struct_noun_counter = Counter()
freeform_noun_counter = Counter()
none_struct_and_freeform_noun = 0
for video in fho_main["videos"]:
    for interval in video["annotated_intervals"]:
        for action in interval["narrated_actions"]:
            if not filter_action(action):
                continue
            if action["frames"] is None:
                continue
            for frame in action["frames"]:
                if frame["frame_type"] != "pnr_frame":
                    # some actions don't have contact frames so use pnr_frame
                    continue
                for box in frame["boxes"]:
                    if box["object_type"] != "object_of_change":
                        continue
                    if box["structured_noun"] is None:
                        if box["freeform_noun"] is not None:
                            freeform_noun_counter[
                                box["freeform_noun"].strip().lower()
                            ] += 1
                        else:
                            none_struct_and_freeform_noun += 1
                    struct_noun_counter[box["structured_noun"]] += 1

print(f"len(struct_noun_counter) = {len(struct_noun_counter)}")
print(f"len(freeform_noun_counter) = {len(freeform_noun_counter)}")
print(f"none_struct_and_freeform_noun = {none_struct_and_freeform_noun}")
print("=====structured nouns=========")
for noun, count in struct_noun_counter.items():
    print(f"{noun}: {count}")
print("=====freeform nouns=========")
for noun, count in freeform_noun_counter.items():
    print(f"{noun}: {count}")


How many (structured_verb, structured_noun)'s are there?

In [None]:
struct_verb_noun_counter = Counter()
for video in fho_main["videos"]:
    for interval in video["annotated_intervals"]:
        for action in interval["narrated_actions"]:
            if not filter_action(action):
                continue
            verb = action["structured_verb"]
            if verb is None:
                continue
            if action["frames"] is None:
                continue
            for frame in action["frames"]:
                if frame["frame_type"] != "pnr_frame":
                    # some actions don't have contact frames so use pnr_frame
                    continue
                for box in frame["boxes"]:
                    if box["object_type"] != "object_of_change":
                        continue
                    if box["structured_noun"] is None:
                        # TODO: Maybe don't filter this out?
                        continue
                    noun = box["structured_noun"]
            struct_verb_noun_counter[(verb, noun)] += 1

print(f"len(struct_verb_noun_counter) = {len(struct_verb_noun_counter)}")
print("=====structured (verb, noun)s=========")
for (verb, noun), count in struct_verb_noun_counter.items():
    print(f"({verb}, {noun}): {count}")


How about in `fho_lta_taxonomy.json`?

In [None]:
with open("../../../ego4d/v2/annotations/fho_lta_taxonomy.json") as f:
    fho_lta_taxonomy = json.load(f)

taxonomy_verbs = set(fho_lta_taxonomy["verbs"])
taxonomy_nouns = set(fho_lta_taxonomy["nouns"])

print(f"len(taxonomy_verbs): {len(taxonomy_verbs)}")
print(f"len(taxonomy_nouns): {len(taxonomy_nouns)}")

print(
    "taxonomy_verbs - struct_verb_counter.keys(): "
    f"{taxonomy_verbs - struct_verb_counter.keys()}"
)
print(
    "struct_verb_counter.keys() - taxonomy_verbs: "
    f"{struct_verb_counter.keys() - taxonomy_verbs}"
)

print(
    "taxonomy_nouns - struct_noun_counter.keys(): "
    f"{taxonomy_nouns - struct_noun_counter.keys()}"
)
print(
    "struct_noun_counter.keys() - taxonomy_nouns: "
    f"{struct_noun_counter.keys() - taxonomy_nouns}"
)


Draw a bar chart for structured verbs.

In [None]:
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

# Define a function to truncate labels


def truncate_label(label, max_length=10):
    return label if len(label) <= max_length else label[:max_length] + "..."


labels, values = zip(
    *[
        (verb, count)
        for verb, count in struct_verb_counter.most_common()
        if verb is not None
    ]
)
truncated_labels = [truncate_label(label) for label in labels]

# Calculate the cumulative sum and find the index where it crosses 80%
cumulative_sum = np.cumsum(values)
eighty_percent_index = np.where(cumulative_sum >= 0.8 * cumulative_sum[-1])[0][0]

# Print the verbs that make up 80% and the rest
print("Verbs making up 80% of samples:", labels[: eighty_percent_index + 1])
print("Remaining verbs:", labels[eighty_percent_index + 1 :])

plt.figure(figsize=(20, 7))
bars = plt.bar(truncated_labels, values)

# Mark the 80% threshold on the bar chart
plt.axvline(
    x=eighty_percent_index + 0.5, color="red", linestyle="--", label="80% threshold"
)  # +0.5 to place line between bars
plt.legend()

plt.xlabel("Verbs")
plt.ylabel("Count")
plt.title("Verb Count")

# Sparse labeling: Show every nth label
n = 3  # adjust this based on your data and preferences
sparse_labels = [
    "" if i % n != 0 else label for i, label in enumerate(truncated_labels)
]
plt.xticks(range(len(labels)), sparse_labels, rotation=45, ha="right", fontsize=10)

# Adjust x-axis limits to remove margins
plt.xlim(-0.5, len(labels) - 0.5)

# Display the count on top of each bar
for i, bar in enumerate(bars):
    if i % n == 0:
        yval = bar.get_height()
        plt.text(
            bar.get_x() + bar.get_width() / 2,
            yval + 0.5,
            str(yval),
            ha="center",
            va="bottom",
            fontsize=9,
        )

# Display the plot
plt.tight_layout()  # Adjust layout for better visibility
plt.show()


Draw a bar chart for structured nouns.

In [None]:
labels, values = zip(
    *[
        (noun, count)
        for noun, count in struct_noun_counter.most_common()
        if noun is not None
    ]
)
truncated_labels = [truncate_label(label) for label in labels]

# Calculate the cumulative sum and find the index where it crosses 80%
cumulative_sum = np.cumsum(values)
eighty_percent_index = np.where(cumulative_sum >= 0.8 * cumulative_sum[-1])[0][0]

# Print the nouns that make up 80% and the rest
print("Nouns making up 80% of samples:", labels[: eighty_percent_index + 1])
print("Remaining verbs:", labels[eighty_percent_index + 1 :])

plt.figure(figsize=(20, 7))
bars = plt.bar(truncated_labels, values)

# Mark the 80% threshold on the bar chart
plt.axvline(
    x=eighty_percent_index + 0.5, color="red", linestyle="--", label="80% threshold"
)  # +0.5 to place line between bars
plt.legend()

plt.xlabel("Nouns")
plt.ylabel("Count")
plt.title("Noun Count")

# Sparse labeling: Show every nth label
n = 5  # adjust this based on your data and preferences
sparse_labels = [
    "" if i % n != 0 else label for i, label in enumerate(truncated_labels)
]
plt.xticks(range(len(labels)), sparse_labels, rotation=45, ha="right", fontsize=10)

# Adjust x-axis limits to remove margins
plt.xlim(-0.5, len(labels) - 0.5)

# Display the count on top of each bar
for i, bar in enumerate(bars):
    if i % n == 0:
        yval = bar.get_height()
        plt.text(
            bar.get_x() + bar.get_width() / 2,
            yval + 0.5,
            str(yval),
            ha="center",
            va="bottom",
            fontsize=9,
        )

# Display the plot
plt.tight_layout()  # Adjust layout for better visibility
plt.show()


Draw a bar chart for (structured_verb, structured_noun).

In [None]:
labels, values = zip(
    *[(pair, count) for pair, count in struct_verb_noun_counter.most_common()]
)
truncated_labels = [
    f"({truncate_label(verb), truncate_label(noun)})" for verb, noun in labels
]

# Calculate the cumulative sum and find the index where it crosses 80%
cumulative_sum = np.cumsum(values)
eighty_percent_index = np.where(cumulative_sum >= 0.8 * cumulative_sum[-1])[0][0]

# Print the nouns that make up 80% and the rest
print("(verb, noun)'s making up 80% of samples:", labels[: eighty_percent_index + 1])
print("Remaining (verb, noun)'s:", labels[eighty_percent_index + 1 :])

plt.figure(figsize=(20, 7))
bars = plt.bar(truncated_labels, values)

# Mark the 80% threshold on the bar chart
plt.axvline(
    x=eighty_percent_index + 0.5, color="red", linestyle="--", label="80% threshold"
)  # +0.5 to place line between bars
plt.legend()

plt.xlabel("(verb, noun)")
plt.ylabel("Count")
plt.title("(verb, noun) Count")

# Sparse labeling: Show every nth label
n = 200  # adjust this based on your data and preferences
sparse_labels = [
    "" if i % n != 0 else label for i, label in enumerate(truncated_labels)
]
plt.xticks(range(len(labels)), sparse_labels, rotation=45, ha="right", fontsize=10)

# Adjust x-axis limits to remove margins
plt.xlim(-0.5, len(labels) - 0.5)

# Display the count on top of each bar
for i, bar in enumerate(bars):
    if i % n == 0:
        yval = bar.get_height()
        plt.text(
            bar.get_x() + bar.get_width() / 2,
            yval + 0.5,
            str(yval),
            ha="center",
            va="bottom",
            fontsize=9,
        )

# Display the plot
plt.tight_layout()  # Adjust layout for better visibility
plt.show()


Let's flatten structured verbs and nouns.

In [None]:
import re

pattern = re.compile(r"^(.+)_\((.+)\)$")


def extract_words(s):
    m = pattern.match(s)
    if m is None:
        extracted_words = [s]
    else:
        extracted_words = [m.group(1)] + m.group(2).split(",_")
    words = []
    for extracted in extracted_words:
        words.extend(
            word.replace("-", " ").replace("_", " ") for word in extracted.split("/")
        )
    return words


flat_verbs = set()
for verb in taxonomy_verbs:
    flat_verbs.update(extract_words(verb))
print(f"len(flat_verbs) = {len(flat_verbs)}")
print("Flat verbs:")
for verb in flat_verbs:
    print(verb)

flat_nouns = set()
for noun in taxonomy_nouns:
    flat_nouns.update(extract_words(noun))
print(f"len(flat_nouns) = {len(flat_nouns)}")
print("Flat nouns:")
for noun in flat_nouns:
    print(noun)


Let's get the verbs and nouns from EPIC-KITCHENS 100

In [None]:
import csv


def format_verb(verb: str) -> str:
    return verb.replace("-", " ")


def format_noun(noun: str) -> str:
    if ":" in noun:
        return noun.split(":")[-1]
    else:
        return noun


ek_verbs = set()
ek_nouns = set()
with open("../../../EPIC-KITCHENS/annotations/EPIC_100_validation.csv") as f:
    for row in csv.DictReader(f):
        ek_verbs.add(format_verb(row["verb"]))
        ek_nouns.add(format_noun(row["noun"]))

print(f"len(ek_verbs) = {len(ek_verbs)}")
print("EPIC-KITCHENS verbs:")
for verb in ek_verbs:
    print(verb)

print(f"len(ek_nouns) = {len(ek_nouns)}")
print("EPIC-KITCHENS nouns:")
for noun in ek_nouns:
    print(noun)


Let's compare verbs and nouns from the two datasets.

In [None]:
verb_intersection = flat_verbs.intersection(ek_verbs)
noun_intersection = flat_nouns.intersection(ek_nouns)

print("verb intersection")
for verb in verb_intersection:
    print(verb)


print()
print("flat_verbs - ek_verbs")
for verb in flat_verbs - ek_verbs:
    print(verb)

print()
print("ek_verbs - flat_verbs")
for verb in ek_verbs - flat_verbs:
    print(verb)

print()
print("noun intersection")
for noun in noun_intersection:
    print(noun)

print()
print("flat_nouns - ek_nouns")
for noun in flat_nouns - ek_nouns:
    print(noun)

print()
print("ek_nouns - flat_nouns")
for noun in ek_nouns - flat_nouns:
    print(noun)
