In [None]:
import essentia
print(essentia.__version__)
print(essentia.__file__)
import essentia.standard , essentia.streaming
from essentia.standard import MonoLoader, TensorflowPredictFSDSINet

# let's have a look at what is in there
#print(dir(essentia.standard))

import utils.util as util

import json
import matplotlib.pyplot as plt
import numpy as np

In [None]:
mp4_fn,mp4_labels,aac_fn,aac_labels = util.load_xdv_test()
audio = MonoLoader(filename=aac_fn[17], sampleRate=22050)()

In [None]:
'''
model files
    https://essentia.upf.edu/models/audio-event-recognition/fsd-sinet/    
    !wget -q https://essentia.upf.edu/models/audio-event-recognition/fsd-sinet/fsd-sinet-vgg41-tlpf-1.pb
    !wget -q https://essentia.upf.edu/models/audio-event-recognition/fsd-sinet/fsd-sinet-vgg41-tlpf-1.json


https://mtg.github.io/essentia-labs/news/tensorflow/2023/02/08/fsdsinet-models/
    tlpf : Trainable Low-Pass Filters
    aps : Adaptive Polyphase Sampling

    fsd-sinet-vgg42-tlpf_aps-1 - best
    fsd-sinet-vgg41-tlpf-1 - lighter
'''

graph_filename = "/fsd-sinet-essentia/models/fsd-sinet-vgg42-aps-1.pb"
model = TensorflowPredictFSDSINet(graphFilename=graph_filename)

predictions = model(audio)

In [None]:
def top_from_average(data, top_n=10):
    av = np.mean(data, axis=1)
    sorting = np.argsort(av)[::-1]
    return sorting[:top_n], [av[i] for i in sorting]

# Read the metadata
metadata_file = "/fsd-sinet-essentia/models/fsd-sinet-vgg42-aps-1.json"
metadata = json.load(open(metadata_file, "r"))
labels = metadata["classes"]

for label, probability in zip(metadata['classes'], predictions.mean(axis=0)):
    print(f'{label}: {100 * probability:.1f}%') 


In [None]:

# Compute the top-n labels and predictions
top_n, averages = top_from_average(predictions, top_n=15)
top_labels = [labels[i] for i in top_n]
top_labels_with_av = [
    f"{label} ({av:.3f})" for label, av in zip(top_labels, averages)
]

top_predictions = np.array([predictions[i, :] for i in top_n])

# Generate plots and improve formatting
matfig = plt.figure(figsize=(8, 3))
plt.matshow(top_predictions, fignum=matfig.number, aspect="auto")

plt.yticks(np.arange(len(top_labels_with_av)), top_labels_with_av)
locs, _ = plt.xticks()
ticks = np.array(locs // 2).astype("int")
plt.xticks(locs[1: -1], ticks[1: -1])
plt.tick_params(
    bottom=True, top=False, labelbottom=True, labeltop=False
)
plt.xlabel("(s)")

#plt.savefig("predictions.png", bbox_inches='tight')
