## Install Dependencies

In [None]:
!pip install --upgrade -qr /kaggle/input/spectrogrand-public-release/kaggle-public-release/REQUIREMENTS.txt
!pip install -q essentia-tensorflow

## Load models

In [None]:
!wget "https://essentia.upf.edu/models/classification-heads/danceability/danceability-discogs-effnet-1.pb"
!wget "https://essentia.upf.edu/models/feature-extractors/discogs-effnet/discogs-effnet-bs64-1.pb"

In [7]:
from diffusers import AudioLDM2Pipeline
from typing import Optional, List
import scipy
import numpy as np
import librosa
import pickle
from essentia.standard import MonoLoader, TensorflowPredictMusiCNN, TensorflowPredict2D
from essentia.standard import MonoLoader, TensorflowPredictVGGish, TensorflowPredict2D
from essentia.standard import MonoLoader, TensorflowPredictEffnetDiscogs, TensorflowPredict2D

import torch
torch.random.manual_seed(42)
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"

from transformers import ClapModel, ClapProcessor


# Load the CLAP pipeline
clap_model = ClapModel.from_pretrained("laion/clap-htsat-fused")
clap_model.to(DEVICE)
clap_processor = ClapProcessor.from_pretrained("laion/clap-htsat-fused")

## Migrate code to compute audio scores

In [8]:
"""
    @method resample_audio_data
        Resample audio data to a target sampling rate
    @param origin_data: Numpy array containing the audio data (@note Intended inputs stem from the `load_wav_file` method)
    @param origin_sampling_rate: Sampling rate of the input audio (@note Intended inputs stem from the `load_wav_file` method)
    @param new_sampling_rate: Desired sampling rate (default: 48000)
"""
def resample_audio_data(origin_data:np.ndarray, origin_sampling_rate:int, new_sampling_rate:int=48000) -> Optional[np.ndarray]:
    try:
        origin_type = origin_data.dtype
        resampled_data = librosa.resample(origin_data.T.astype('float'), orig_sr = origin_sampling_rate, target_sr = new_sampling_rate) 
        resampled_data = librosa.to_mono(resampled_data)        
        resampled_data = resampled_data.T.astype(origin_type)
        data_np = np.array(resampled_data)
        return data_np
    except Exception as e:
        print(f"Error while resampling audio data: {e}")
        return None

"""
    @method load_wav_file
        Load a wav file from a given path
    @param input_file_path: Path containing the input audio file
"""
def load_wav_file(input_file_path:str):
    try:
        sr, data = scipy.io.wavfile.read(input_file_path)
        return sr, data
    except Exception as e:
        print(f"Error while reading {input_file_path}: {e}")
        return None, None
    

"""
    @method compute_clap_embeddings
        Compute CLAP embeddings for an input audio file
    @input input_file_path: Path to the input audio file
"""
def compute_clap_embeddings(input_file_path:str) -> Optional[torch.Tensor]:
    try:
        global clap_processor, clap_model, DEVICE
        # Load audio and resample to 48000 Hz
        sr, origin_data = load_wav_file(input_file_path=input_file_path)
        origin_data_resampled = resample_audio_data(origin_data=origin_data, origin_sampling_rate=sr, new_sampling_rate=48000)
        # Get CLAP outputs
        clap_inputs = clap_processor(audios=origin_data_resampled, sampling_rate=48000, return_tensors="pt").to(DEVICE)
        clap_outputs = clap_model.get_audio_features(**clap_inputs)
        audio_embeds = clap_outputs[0].detach().cpu()
        return audio_embeds
    except Exception as e:
        print(f"Error while computing CLAP embeddings for {input_file_path}: {e}")
        return None
    
"""
    @method compute_clap_similarity
        Compute CLAP similarity for an input audio file with respect to a saved ground truth mapping of embeddings
        @input input_file_path: Path to the input audio file
        @input ground_truth_dict_path: Path to the mapping .pkl file 
        @note The ground truth mapping should be a .pkl file with the following schema:
            {
                "genre_name" : [list_of_clap_embeddings],
                ...
            }
        @input filter_genre: Genre name to compute from. If values are to be aggregated across the entire search space, this value should be left as `None`. (default: None)
"""
def compute_clap_similarity(input_file_path:str, ground_truth_dict_path:str, filter_genre:Optional[str]=None) -> Optional[float]:
    try:
        # Load the embeddings from the ground truth mapping and set the search space
        with open(ground_truth_dict_path, "rb") as f:
            data = pickle.load(f)
        input_search_space_embeds = []
        if filter_genre is not None:
            # Convert `filter_genre` into underscore format if required
            if "_" not in filter_genre: # eg: 'bass house'
                filter_genre = filter_genre.replace(" ","_")
            input_search_space_embeds = data[filter_genre]
        else:
            for _k in data:
                input_search_space_embeds.extend(data[_k])
        assert len(input_search_space_embeds) >= 1

        # Compute CLAP embeddings for the input file
        source_embed = compute_clap_embeddings(input_file_path=input_file_path)

        # Keep track of running dot product scores
        running_score = 0.0
        for target_embed in input_search_space_embeds:
            z = source_embed@target_embed.T
            running_score += float(z.detach().cpu())

        # Return the average dot product score
        return (running_score)/float(len(input_search_space_embeds))
    except Exception as e:
        print(f"Error while computing CLAP similarity score for {input_file_path}: {e}")
        return None
    
"""
    @method get_danceability_score
        Use Essentia to score an audio track on its danceability
    @input input_file_path: Path to the input audio file
    @input embedding_model_path: Path to the essentia encoder model (@note To ensure compatibility, this should be a `.pb` file)
    @input danceability_model_path: Path to the essentia danceability computation model (@note To ensure compatibility, this should be a `.pb` file)
"""
def get_danceability_score(input_file_path:str, embedding_model_path:str, danceability_model_path:str) -> Optional[float]:
    try:
        # Load audio and get embeddings
        audio = MonoLoader(filename=input_file_path, sampleRate=16000, resampleQuality=4)()
        embedding_model = TensorflowPredictEffnetDiscogs(graphFilename=embedding_model_path, output="PartitionedCall:1")
        embeddings = embedding_model(audio)

        # Load model and get predictions
        model = TensorflowPredict2D(graphFilename=danceability_model_path, output="model/Softmax")
        predictions = model(embeddings)
        mean_danceability_score = np.mean(predictions[:,0])
        return mean_danceability_score
    except Exception as e:
        print(f"Error while computing danceability score for {input_file_path}: {e}")
        return None


In [9]:
GT_EMBEDDINGS_PATH = "/kaggle/input/spectrogrand-public-release/kaggle-public-release/housex_ground_truth_embeddings.pkl"
def calculate_novelty_score(input_file_path:str) -> float:
    try:
        sim_score = compute_clap_similarity(input_file_path=input_file_path, ground_truth_dict_path=GT_EMBEDDINGS_PATH, filter_genre=None)
        novelty_score = 1.0 - sim_score
        return novelty_score
    except Exception as e:
        print(f"Error while computing novelty score: {e}")
        return -1.0
    
ESSENTIA_EMBEDDINGS_MODEL_PATH = "/kaggle/working/discogs-effnet-bs64-1.pb"
ESSENTIA_DANCEABILITY_MODEL_PATH = "/kaggle/working/danceability-discogs-effnet-1.pb"
def calculate_danceability_score(input_file_path:str) -> float:
    try:
        essentia_score = get_danceability_score(input_file_path=input_file_path, embedding_model_path=ESSENTIA_EMBEDDINGS_MODEL_PATH, danceability_model_path=ESSENTIA_DANCEABILITY_MODEL_PATH)
        return essentia_score
    except Exception as e:
        print(f"Error while computing danceability score: {e}")
        return -1.0
    
def get_music_score(input_file_path:str) -> float:
    try:
        value_score = calculate_danceability_score(input_file_path=input_file_path)
        novelty_score = calculate_novelty_score(input_file_path=input_file_path)
        if value_score == -1.0 or novelty_score == -1.0:
            return -1.0
        # Update the weights if your use case values value more than novelty or vice-versa
        return 0.50*value_score + 0.50*novelty_score
    except Exception as e:
        print(f"Error while computing audio score: {e}")
        return -1.0

## Compute scores for all files in the survey base

In [11]:
from glob import glob
all_file_names = []
all_scores = []

pool_1_files = glob("/kaggle/input/spectrogrand-pmqd-survey/selected_audio_files_survey/pool-1/*.wav")

for _f in pool_1_files:
    score = get_music_score(_f)
    all_file_names.append(_f)
    all_scores.append(score)
    
print('='*90)

pool_2_files = glob("/kaggle/input/spectrogrand-pmqd-survey/selected_audio_files_survey/pool-2/*.wav")

for _f in pool_2_files:
    score = get_music_score(_f)
    all_file_names.append(_f)
    all_scores.append(score)

[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/discogs-effnet-bs64-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/danceability-discogs-effnet-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/danceability-discogs-effnet-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/discogs-effnet-bs64-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/danceability-discogs-effnet-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/danceability-discogs-effnet-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/discogs-effnet-bs64-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/danceability-discogs-effnet-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/danceability-discogs-ef



[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/discogs-effnet-bs64-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/danceability-discogs-effnet-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/danceability-discogs-effnet-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/discogs-effnet-bs64-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/danceability-discogs-effnet-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/danceability-discogs-effnet-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/discogs-effnet-bs64-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/danceability-discogs-effnet-1.pb`
[   INFO   ] TensorflowPredict: Successfully loaded graph file: `/kaggle/working/danceability-discogs-ef

## Save scores

In [13]:
import pandas as pd

df = pd.DataFrame({
    "filename": all_file_names,
    "audio_score" : all_scores
})

In [14]:
df

Unnamed: 0,filename,audio_score
0,/kaggle/input/spectrogrand-pmqd-survey/selecte...,0.537624
1,/kaggle/input/spectrogrand-pmqd-survey/selecte...,0.524891
2,/kaggle/input/spectrogrand-pmqd-survey/selecte...,0.536908
3,/kaggle/input/spectrogrand-pmqd-survey/selecte...,0.342022
4,/kaggle/input/spectrogrand-pmqd-survey/selecte...,0.542865
5,/kaggle/input/spectrogrand-pmqd-survey/selecte...,0.531359
6,/kaggle/input/spectrogrand-pmqd-survey/selecte...,0.538075
7,/kaggle/input/spectrogrand-pmqd-survey/selecte...,0.534503
8,/kaggle/input/spectrogrand-pmqd-survey/selecte...,0.533203
9,/kaggle/input/spectrogrand-pmqd-survey/selecte...,0.438718


In [15]:
df.to_csv("spectrogrand_audio_survey.csv", index = False)