In order to proceed with the following steps, we first need to place all the raw audio files in a specific folder, along with a `metadata.csv` file that contains the mapping between MSD `MSD_id` and LFM-1b `tracks_id`.

In [6]:
import pandas as pd

mapping = pd.read_csv('mapping.csv')
mapping_dict = dict(zip(mapping['MSD_id'], mapping['tracks_id:token']))

## MuQ Features

In [None]:
import torch, librosa
from muq import MuQMuLan

device = 'cuda'
mulan = MuQMuLan.from_pretrained("OpenMuQ/MuQ-MuLan-large", )
mulan = mulan.to(device).eval()

import os
import torch
import librosa
import numpy as np
from tqdm import tqdm
import pickle

# Extract features and save as id:embedding in pkl format
def traverse_and_extract_features(folder_path, output_path, mapping_dict):
    features_dict = {}
    names_ready = [os.path.splitext(file)[0] for file in os.listdir(output_path)]
    
    for root, _, files in os.walk(folder_path):
        for file in tqdm(files):
            if file.endswith('.mp3') or file.endswith('.wav'):
                file_prefix = os.path.splitext(file)[0]
                if file_prefix not in names_ready:
                    file_path = os.path.join(root, file)
                    wav, sr = librosa.load(file_path, sr=24000)
                    wavs = torch.tensor(wav).unsqueeze(0).to(device)
                    with torch.no_grad():
                        audio_embeds = mulan(wavs=wavs)
                    
                    audio_embeds = audio_embeds.cpu().numpy()
                    if file_prefix in mapping_dict:
                        features_dict[mapping_dict[file_prefix]] = audio_embeds
                    output_file = os.path.join(output_path, file_prefix + '.npy')
                    np.save(output_file, audio_embeds)
    
    # Save features_dict to pkl
    pkl_file_path = os.path.join(output_path, 'features.pkl')
    with open(pkl_file_path, 'wb') as f:
        pickle.dump(features_dict, f)

folder_path = 'WAV_FILE_FOLDER_PATH'
output_path = 'OUTPUT_PATH'
os.makedirs(output_path, exist_ok=True)

traverse_and_extract_features(folder_path, output_path, mapping_dict)


## CLAP Features

In [None]:
from msclap import CLAP

# Load model (Choose between versions '2022' or '2023')
# The model weight will be downloaded automatically if `model_fp` is not specified
clap_model = CLAP(version = '2023', use_cuda=True)

import os
import torch
import librosa
import numpy as np
from tqdm import tqdm


def traverse_and_extract_features(folder_path, output_path):

    features_dict = {}
    
    names_ready = [ os.path.splitext(file)[0] for file in os.listdir(output_path)]
    for root, _, files in os.walk(folder_path):
        for file in tqdm(files):
            if file.endswith('.mp3'):
                file_prefix = os.path.splitext(file)[0]
                if file_prefix not in names_ready:

                    file_paths = [os.path.join(root, file)]

                    audio_embeds = clap_model.get_audio_embeddings(file_paths)

                    audio_embeds = audio_embeds.cpu().numpy()
                    if file_prefix in mapping_dict:
                        features_dict[mapping_dict[file_prefix]] = audio_embeds
                    output_file = os.path.join(output_path, file_prefix + '.npy')
                    np.save(output_file, audio_embeds)
    # Save features_dict to pkl
    pkl_file_path = os.path.join(output_path, 'features.pkl')
    with open(pkl_file_path, 'wb') as f:
        pickle.dump(features_dict, f)

folder_path = 'WAV_FILE_FOLDER_PATH' 
output_path = 'OUTPUT_PATH'  
os.makedirs(output_path, exist_ok=True)

traverse_and_extract_features(folder_path, output_path)
