In [1]:
# IMPORT THE LIBRARIES
import pandas as pd
import numpy as np
import librosa
import torch
import os
from datasets import load_dataset, Audio
from glob import glob
from qdrant_client import QdrantClient
from qdrant_client.http import models

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# IMPORT THE DATA
data_path = os.path.join(".", "dataset", "music-clips-50")
data = load_dataset(
    "audiofolder", data_dir=os.path.join(data_path), split="train", drop_labels=True
)

In [3]:
# GENERATE THE IDS
id = [
    (
        data[i]
        ['audio']
        ['path']
        .split("/")
        [-1]
        .replace(".wav", '')
    )
    for i in range(len(data))
]
index = [num for num in range(len(data))]
data = data.add_column("index", index)
data = data.add_column("id", id)
print(data)

Dataset({
    features: ['audio', 'index', 'id'],
    num_rows: 50
})


In [4]:
# CREATE A PANDAS DATAFRAME
dataframe  = pd.DataFrame(data)
# WHAT WE SEND TO THE DATABASE IS CALLED PAYLOAD
payload = dataframe.drop(['index', 'id'], axis=1).to_dict(orient="records")

In [5]:
# CREATE THE AUDIO EMBEDDINGS
# SELECT A RANDOM SONG
song = os.path.join(data_path, "48.wav")
audio, sampling_rate = librosa.core.load(song, sr=44100, mono=True)
audio.shape
audio_reshaped = audio[None, :]
audio_reshaped.shape
resampled_audio = librosa.resample(y=audio_reshaped, orig_sr=sampling_rate, target_sr=16_000)

In [6]:
# IMPORT AND INITIALISE THE MODEL
from transformers import AutoFeatureExtractor, AutoModel
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModel.from_pretrained('facebook/wav2vec2-base').to(device)
feature_extractor = AutoFeatureExtractor.from_pretrained('facebook/wav2vec2-base')

2025-05-15 02:46:41.984836: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747257402.039571    8988 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747257402.056477    8988 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1747257402.182078    8988 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747257402.182105    8988 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1747257402.182108    8988 computation_placer.cc:177] computation placer alr

In [7]:
# PREPARE INPUTS TO FEED TO THE EMBEDDING MODEL
inputs = feature_extractor(
    resampled_audio[0], 
    sampling_rate=feature_extractor.sampling_rate,
    return_tensors="pt",
    padding=True,
    return_attention_mask=True,
    truncation=True,
    max_length=16_000
).to(device)

inputs['input_values'].shape

torch.Size([1, 16000])

In [8]:
# CREATE SAMPLE EMBEDDINGS FOR ONE SONG
with torch.no_grad():
    embeddings = model(**inputs).last_hidden_state.mean(dim=1)

In [9]:
embeddings.shape

torch.Size([1, 768])

In [10]:
# FUNCTION TO CREATE EMBEDDINGS FOR ALL SONGS
def generate_embeddings(batch):
    songs = [x["array"] for x in batch["audio"]]


    inputs = feature_extractor(
        songs, sampling_rate=16_000, return_tensors="pt", padding=True,
        return_attention_mask=True, max_length=16_000, truncation=True
    ).to(device)


    with torch.no_grad():
        pooled_embeddings = model(**inputs).last_hidden_state.mean(dim=1)


    return {"Embeddings": pooled_embeddings.cpu().numpy()}


data = data.cast_column("audio", Audio(sampling_rate=16_000))
data = data.map(generate_embeddings, batched=True, batch_size=4)
data

Map: 100%|████████████████████████████████████████████| 50/50 [00:02<00:00, 18.68 examples/s]


Dataset({
    features: ['audio', 'index', 'id', 'Embeddings'],
    num_rows: 50
})

In [16]:
data.features['Embeddings']
len(data['Embeddings'][0])

# WE HAVE CREATED THE AUDIO EMBEDDINGS OF THE DATA

768

In [45]:
qdrant_client = QdrantClient(
    url="Hello", 
    api_key="Hello",
)

print(qdrant_client.get_collections())

collections=[]


In [59]:
collection_name = "music_collection"
qdrant_client.recreate_collection(
    collection_name=collection_name,
    vectors_config=models.VectorParams(size=768, distance=models.Distance.COSINE)
)

  qdrant_client.recreate_collection(


True

In [16]:
# check the type of the payload, It must be a List
type(payload[0]["audio"]["array"])
print(payload)

[{'audio': {'path': '/home/udayan/Projects/RAG1/dataset/music-clips-50/01.wav', 'array': array([-0.47167969, -0.52468872, -0.51089478, ...,  0.2678833 ,
        0.16299438,  0.11865234]), 'sampling_rate': 16000}}, {'audio': {'path': '/home/udayan/Projects/RAG1/dataset/music-clips-50/02.wav', 'array': array([-0.24053955, -0.27453613, -0.3175354 , ...,  0.35379028,
        0.27987671,  0.22906494]), 'sampling_rate': 16000}}, {'audio': {'path': '/home/udayan/Projects/RAG1/dataset/music-clips-50/03.wav', 'array': array([-0.00765991, -0.01452637, -0.12826538, ..., -0.27139282,
       -0.08724976,  0.07022095]), 'sampling_rate': 16000}}, {'audio': {'path': '/home/udayan/Projects/RAG1/dataset/music-clips-50/04.wav', 'array': array([0.01699829, 0.0020752 , 0.03121948, ..., 0.32128906, 0.39666748,
       0.41821289]), 'sampling_rate': 16000}}, {'audio': {'path': '/home/udayan/Projects/RAG1/dataset/music-clips-50/05.wav', 'array': array([ 0.05169678,  0.03759766,  0.05908203, ...,  0.04110718,
 

In [48]:
# Convert it to a List
for item in payload:
    item["audio"]["array"] = item["audio"]["array"].tolist()

In [61]:
# UPSERT THE VECTORS
batch_size = 1  # Adjust this value based on your needs

for i in range(0, len(data), batch_size):
    batch_data = data[i:i+batch_size]
    batch_payload = payload[i:i+batch_size]
    qdrant_client.upsert(
        collection_name=collection_name,
        points=models.Batch(
            ids=batch_data['index'],
            vectors=batch_data['Embeddings'],
            payloads=batch_payload
        )
    )

ResponseHandlingException: The read operation timed out