**Objective** Read the AudioSet data and convert it into a usable JSON format file.


In [1]:
import numpy as np
import json
import tensorflow as tf
import os
import pandas as pd

### Importing dataset

In [None]:
directory = "audioset_v1_embeddings/eval"

dataset = []
for file_name in os.listdir(directory):
     # extracting only that file which contains tfrecord at end 
     if file_name.endswith(".tfrecord"):
            dataset.append(os.path.join(directory,file_name))

### Reading data stored in the TFRecord format

In [184]:
raw_dataset = tf.data.TFRecordDataset(dataset)

### Reading all labels from class_labels_indices file

In [185]:
class_labels = pd.read_csv('class_labels_indices.csv')
labels = class_labels['display_name'].tolist()

# just extracting labels related to music 
music_class = class_labels[class_labels['display_name'].str.contains('Music', case=False)]
music_labels = music_class['index'].tolist()

In [215]:
audios = []
counter = 0
NUM_SECONDS = 10     # limiting music file to only 10 sec.

for raw_record in raw_dataset:
    example = tf.train.SequenceExample()
    example.ParseFromString(raw_record.numpy())
    
    # Audio Meta data
    audio_labels = example.context.feature['labels'].int64_list.value
    start_time = example.context.feature['start_time_seconds'].float_list.value
    end_time = example.context.feature['end_time_seconds'].float_list.value
    video_id = example.context.feature['video_id'].bytes_list.value
    

    # not related to music then skip record
    if not (set(music_labels) & set(audio_labels)):
        continue

    # Audio Feature
    feature_list = example.feature_lists.feature_list['audio_embedding'].feature
    final_features = [list(feature.bytes_list.value[0]) for feature in feature_list]
    audio_embedding = [item for sublist in final_features[:NUM_SECONDS] for item in sublist]
    
    # if record has less then 10 sec then skip 
    if len(final_features) < NUM_SECONDS:
        continue
    
    # making it as JSON 
    audio = {
        'label': audio_labels,
        'video_id': video_id[0],
        'start_time': start_time[0],
        'end_time': end_time[0],
        'data': audio_embedding
    }
    
    # appending it in our record
    audios.append(audio)
    counter += 1
    if counter % 100 == 0:
        print(f"Processing {counter}th file ...")

Processing 100th file ...
Processing 200th file ...
Processing 300th file ...
Processing 400th file ...
Processing 500th file ...
Processing 600th file ...
Processing 700th file ...
Processing 800th file ...
Processing 900th file ...
Processing 1000th file ...
Processing 1100th file ...
Processing 1200th file ...
Processing 1300th file ...
Processing 1400th file ...
Processing 1500th file ...
Processing 1600th file ...
Processing 1700th file ...
Processing 1800th file ...
Processing 1900th file ...
Processing 2000th file ...


### Dumping into JSON file

In [216]:
with open('music_set.json', 'w') as file:
    str_audio = repr(audios)
    json.dump(str_audio, file)

### Each record of 10 sec

In [222]:
[audio['data'][:10] for audio in audios[:4]]

[[0, 255, 0, 255, 147, 255, 12, 255, 0, 0],
 [166, 73, 135, 117, 139, 31, 187, 200, 190, 99],
 [71, 24, 175, 143, 68, 126, 84, 118, 78, 157],
 [208, 255, 255, 68, 8, 145, 134, 220, 50, 205]]