## Initialize the data-generator

In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import tensorflow as tf
import pandas as pd
from google.protobuf import text_format
from modeling import trainer
from protos import model_pb2
from protos import pipeline_pb2
from readers import reader

model_dir = "logs/single_class_aware/"
video_lengths_csv = "epic-kitchens-100-annotations/EPIC_100_video_info.csv"
verb_classes_csv = "epic-kitchens-100-annotations/EPIC_100_verb_classes.csv"
noun_classes_csv = "epic-kitchens-100-annotations/EPIC_100_noun_classes.csv"


for gpu in tf.config.experimental.list_physical_devices('GPU'):
  tf.config.experimental.set_memory_growth(gpu, True)

with tf.io.gfile.GFile(os.path.join(model_dir, 'pipeline.pbtxt'), 'r') as fp:
  pipeline_proto = text_format.Merge(fp.read(), pipeline_pb2.Pipeline())

# HERE WE VISUALIZE TRAIN SET PREDICTIONS.
reader_options = pipeline_proto.train_reader
reader_options.ek100_st_reader.batch_size = 1
model_options = pipeline_proto.model.Extensions[model_pb2.SingleTimeClassAwareDet.ext]
n_verb_classes = model_options.n_verb_classes
n_noun_classes = model_options.n_noun_classes

input_fn = reader.get_input_fn(reader_options, is_training=False)
model_fn = trainer.create_model_fn(pipeline_proto)

features, labels = input_fn().make_one_shot_iterator().get_next()
predictions = model_fn(features, labels, tf.estimator.ModeKeys.PREDICT, None).predictions

def data_generator():
  saver = tf.train.Saver()
  with tf.Session() as sess:
    ckpt = tf.train.get_checkpoint_state(model_dir)
    saver.restore(sess, ckpt.model_checkpoint_path)
    while True:
      yield sess.run([predictions, labels])

dg = data_generator()

W0906 23:14:47.847424 140252623681344 __init__.py:308] Limited tf.compat.v2.summary API due to missing TensorBoard installation.


ModuleNotFoundError: No module named 'joblib'

## Get an example

In [None]:
def load_id_to_name(file_name):
  df = pd.read_csv(file_name)
  return {i + 1: v for i, v in zip(df['id'], df['key'])}

classid2verb = load_id_to_name(verb_classes_csv)
classid2noun = load_id_to_name(noun_classes_csv)
classid2action = {}
for vid, verb in classid2verb.items():
  for nid, noun in classid2noun.items():
    classid2action[vid * (1 + n_noun_classes) + nid] = verb + ' ' + noun
    
elem = next(dg)
y_pred, y_true = elem

## Visualization Utils

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import random

from matplotlib import colors
from scipy.special import softmax
from scipy.special import expit as sigmoid

STANDARD_COLORS = [
    'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite',
    'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange',
    'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite',
    'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki',
    'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey',
    'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime',
    'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen',
    'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed',
    'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple',
    'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue',
    'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat',
    'WhiteSmoke', 'Yellow', 'YellowGreen'
]
random.seed(286)
random.shuffle(STANDARD_COLORS)
STANDARD_COLORS = ['White'] + STANDARD_COLORS

def show_color_bar(data, classid2colorid, classid2name, title, show_ticks=True):
  observation_window = data.shape[0]
    
  image = np.full((2, observation_window, 3), 255)
  for i in range(observation_window):
    color_id = classid2colorid[data[i]]
    r, g, b = colors.to_rgb(STANDARD_COLORS[color_id % len(STANDARD_COLORS)].lower())
    image[:, i, 0] = int(r * 255)
    image[:, i, 1] = int(g * 255)
    image[:, i, 2] = int(b * 255)
    
  plt.figure(figsize=(20, 2))
  ax = plt.subplot(111)
  ax.tick_params(direction='out', length=0, width=0, colors='black', grid_color='black', grid_alpha=0, labelsize=12)
  ax.matshow(image)
  if show_ticks:
    ticks = [classid2name.get(x, '') + '({})'.format(x) for x in data[:observation_window]]
    new_ticks = [ticks[i].split('(')[0] if (i == 0 or ticks[i] != ticks[i - 1]) and ticks[i] != '(0)' else '' for i in range(len(ticks))]
    ax.set_xticks(np.arange(observation_window))
    ax.set_xticklabels(new_ticks)
    ax.set_yticks([])
    ax.set_yticklabels([])
  plt.title(title)
  plt.xticks(rotation=75)

In [None]:
def _read_video_length(video_id):
  df = pd.read_csv(reader_options.ek100_st_reader.path_to_video_lengths)
  df = df[df.video_id == video_id]
  return df.to_numpy()[0, 1]

def _timestamp_to_seconds(timestamp):
  _MINUTES_TO_SECONDS = 60
  _HOURS_TO_SECONDS = 60 * 60
  hours, minutes, seconds = map(float, timestamp.split(":"))
  total_seconds = hours * _HOURS_TO_SECONDS + minutes * _MINUTES_TO_SECONDS + seconds
  return total_seconds

def _read_annotations(video_id):
  df = pd.read_pickle(reader_options.ek100_st_reader.path_to_annotations)
  df = pd.DataFrame({
    'video_id': df['video_id'],
    't_start': df['start_timestamp'].apply(_timestamp_to_seconds),
    't_end': df['stop_timestamp'].apply(_timestamp_to_seconds),
    'verb_class': df['verb_class'],
    'noun_class': df['noun_class'],
  })
  df = df[df.video_id == video_id]
  return df

video_id = y_pred['video_id'][0].decode('ascii')
video_length = _read_video_length(video_id)

# Frame-level predictions.
verb_seq_logits = y_pred['verb_seq_classification'][0]
noun_seq_logits = y_pred['noun_seq_classification'][0]
verb_seq_predictions = verb_seq_logits.argmax(-1)
noun_seq_predictions = noun_seq_logits.argmax(-1)
observation_window = verb_seq_logits.shape[0]

# Frame-level labels.
annotations = _read_annotations(video_id)
verb_seq_labels = np.full((observation_window,), -1, dtype=np.int32)
noun_seq_labels = np.full((observation_window,), -1, dtype=np.int32)
for _, row in annotations.iterrows():
  i_start = int(observation_window * row['t_start'] / video_length)
  i_end = int(observation_window * row['t_end'] / video_length)
  for i in range(i_start, min(i_end + 1, observation_window)):
    verb_seq_labels[i] = row['verb_class']
    noun_seq_labels[i] = row['noun_class']
verb_seq_labels += 1
noun_seq_labels += 1
print('video_id = %s, video_length = %.2lf, observation_window = %i' 
      % (video_id, video_length, observation_window))

action_seq_labels = verb_seq_labels * (1 + n_noun_classes) + noun_seq_labels
action_seq_predictions = verb_seq_predictions * (1 + n_noun_classes) + noun_seq_predictions
actionid2colorid = {c: i for i, c in enumerate(sorted(set(action_seq_labels.tolist() + action_seq_predictions.tolist())))}
show_color_bar(action_seq_labels[:100], actionid2colorid, classid2action, "action GT")
show_color_bar(action_seq_predictions[:100], actionid2colorid, classid2action, "top-1 action prediction")

# verbid2colorid = {c: i for i, c in enumerate(sorted(set(verb_seq_labels.tolist() + verb_seq_predictions.tolist())))}
# show_color_bar(verb_seq_labels[:100], verbid2colorid, classid2verb, "verb-true")
# show_color_bar(verb_seq_predictions[:100], verbid2colorid, classid2verb, "verb-pred")

# nounid2colorid = {c: i for i, c in enumerate(sorted(set(noun_seq_labels.tolist() + noun_seq_predictions.tolist())))}
# show_color_bar(noun_seq_labels[:100], nounid2colorid, classid2noun, "noun-true")
# show_color_bar(noun_seq_predictions[:100], nounid2colorid, classid2noun, "noun-pred")

## Post-processing

In [None]:
show_color_bar(action_seq_labels[:100], actionid2colorid, classid2action, "action GT")

from models.post_process import py_post_process

verb_seq_scores = softmax(verb_seq_logits, -1)[:, 1:]
noun_seq_scores = softmax(noun_seq_logits, -1)[:, 1:]
verb_seq_scores = np.expand_dims(verb_seq_scores, 2)
noun_seq_scores = np.expand_dims(noun_seq_scores, 1)
action_seq_scores = (verb_seq_scores * noun_seq_scores).reshape([-1, n_verb_classes * n_noun_classes])

(i_starts, i_ends, action_ids, action_scores
 ) = py_post_process(action_seq_scores[:100, :], 
                     max_n_detection=20, 
                     thresholds=[x ** 2 for x in [0.1, 0.2, 0.4]])


images = []
for i_start, i_end, action_id, action_score in zip(i_starts, i_ends, action_ids, action_scores):
  verb_id = action_id // n_noun_classes + 1
  noun_id = action_id % n_noun_classes + 1
  print('%s %s' % (classid2verb[verb_id], classid2noun[noun_id]))

  action_id = verb_id * (1 + n_noun_classes) + noun_id
#   if not action_id in actionid2colorid: continue
        
  color_id = actionid2colorid[action_id]
  image = np.full((2, 100, 3), 255)
  r, g, b = colors.to_rgb(STANDARD_COLORS[color_id % len(STANDARD_COLORS)].lower())
  for i in range(i_start, 1 + i_end):
    image[:, i, 0] = int(r * 255)
    image[:, i, 1] = int(g * 255)
    image[:, i, 2] = int(b * 255)
  images.append(image)

image = np.concatenate(images, 0)
plt.figure(figsize=(20, 75))
ax = plt.subplot(111)
ax.get_xaxis().set_visible(False)
ax.get_yaxis().set_visible(False)
ax.matshow(image[:, :, :])