In [38]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
import glob
tf.logging.set_verbosity(tf.logging.ERROR)
import numpy as np

In [31]:
# from youtube-8m utils.py
def Dequantize(feat_vector, max_quantized_value=2, min_quantized_value=-2):
  """Dequantize the feature from the byte format to the float format.

  Args:
    feat_vector: the input 1-d vector.
    max_quantized_value: the maximum of the quantized value.
    min_quantized_value: the minimum of the quantized value.

  Returns:
    A float vector which has the same shape as feat_vector.
  """
  assert max_quantized_value > min_quantized_value
  quantized_range = max_quantized_value - min_quantized_value
  scalar = quantized_range / 255.0
  bias = (quantized_range / 512.0) + min_quantized_value
  return feat_vector * scalar + bias

def decode(feat_vector, feature_size):
    return tf.reshape(tf.cast(tf.decode_raw(feat_vector, 
                                            tf.uint8), 
                              tf.float32),
                      [-1, feature_size])

In [34]:
# filepath is path to tfrecord
# datatype is audio or video
# output_features and output_labels are empty lists or existing lists
def load_data(filepath, data_type, output_labels, output_features):
    if data_type == 'audio':
        context = {
            'labels': tf.VarLenFeature(dtype=tf.int64)
        }

        feature_list = {
            'audio_embedding': tf.FixedLenSequenceFeature([], dtype=tf.string)
        }
        feature_name = 'audio_embedding'
        feature_len = 128

    elif data_type == 'video':
        context = {
            'id': tf.FixedLenFeature([], dtype=tf.string),
            'labels': tf.VarLenFeature(dtype=tf.int64)
        }

        feature_list = {
            'rgb': tf.FixedLenSequenceFeature([], dtype=tf.string),
        }
        feature_name = 'rgb'
        feature_len = 128
        
        
    tf.reset_default_graph()    
    sess = tf.InteractiveSession()

    # Read TFRecord file
    reader = tf.TFRecordReader()
    filename_queue = tf.train.string_input_producer([filepath])


    # Extract features from serialized data

    _, serialized_example = reader.read(filename_queue)
    context, features = tf.io.parse_single_sequence_example(serialized_example,
                                                    context_features=context,
                                                    sequence_features=feature_list,
                                                    example_name=None,
                                                    name=None
    )

    # Many tf.train functions use tf.train.QueueRunner,
    # so we need to start it before we read
    tf.train.start_queue_runners(sess)
    
    
    num_in_file = sum(1 for _ in tf.python_io.tf_record_iterator(filepath))

    for i in range(num_in_file):
        labels = context['labels'].eval()
        label = labels.values[0]
        data = Dequantize(decode(features[feature_name], feature_len)).eval()
        output_labels.append(label)
        output_features.append(data)

    tf.InteractiveSession().close()
    
    return output_labels, output_features

In [66]:
audio_output_labels = []
audio_output_features = []
audio_path = "audio_1556745450.370243.tfrecord"
audio_output_labels, audio_output_features = load_data(audio_path,
                                           'audio', audio_output_labels, audio_output_features)

video_output_labels = []
video_output_features = []
video_path = "video_1556749084.971638.tfrecord"
video_output_labels, video_output_features = load_data(video_path,
                                           'video', video_output_labels, video_output_features)

In [67]:
audio_x = np.array(audio_output_features)
audio_y = np.array(audio_output_labels)
video_x = np.array(video_output_features)
video_y = np.array(video_output_labels)

In [81]:
for i in range(len(video_x)):
    print(len(video_x[i]))

9
10
9
9
9
11
10
10
10
10
10
10
10
10
10
9
10
10
9
8
10
9
10
10
10
9
10
10
10
9
9
11
9
8
9
10
10
10
11
10
10
9
10
10
9
10
10
10
8
10
10
10
10
10
9
11
10
11
10
9
11
10
10
9
9
10
9
10
9
10
9
9
10
10
11
10
10
10
11
9
11
8
9
10
10
9
9
11
10
10
10
9
9
8
8
10
9
10
9
10
9
10
10
10
10
10
9
9
8
10
10
10
10
9
10
10
10
10
10
9
10
11
8
9
10
10
11
10
10
8
9
10
10
10
10
10
9
10
11
9
10
11
10
10
10
11
10
9
10
10
11
11
10
10
10
9
10
10
9
10
9
11
11
10
11
10
9
9
11
10
10
10
10
11
10
10
10
10
9
9
11
10
10
10
9
11
10
8
9
9
10
8
9
10
10
9
10
10
10
9
9
9
9
11
10
10
11
11
10
11
10
10
9
8
9
10
11
9
10
8
10
9
10
10
10
9
10
9
9
9
11
10
10
10
10
10
10
10
10
10
9
10
10
9
8
10
9
10
10
10
9
10
10
10
9
9
11
9
8
9
10
10
10
11
10
10
9
10
10
9
10
10
10
8
10
10
10
10
10
9
11
10
11
10
9
11
10
10
9
9
10
9
10
9
10
9
9
10
10
11
10
10
10
11
9
11
8
9
10
10
9
9
11
10
10
10
9
9
8
8
10
9
10
9
10
9
10
10
10
10
10
9
9
8
10
10
10
10
9
10
10
10
10
10
9
10
11
8
9
10
10
11
10
10
8
9
10
10
10
10
10
9
10
11
9
10
11
10
10
10
11
10
9
10
