In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
import requests
import dask.dataframe as dd

<!-- ##### NOTE
# The files that are not included yet are:
# jazz.00002.wav
# jazz.00009.wav
# jazz.00013.wav
# jazz.00016.wav
# jazz.00022.wav
# jazz.00031.wav
# jazz.00032.wav
# jazz.00033.wav
# jazz.00055.wav
# jazz.00078.wav
# jazz.00086.wav
# jazz.00090.wav
# jazz.00098.wav -->

In [2]:
df = dd.read_csv('DATASET/Captioned/*.csv')
captions = list(df["captioning"])

ADDRESS = dict()  #key:caption, value:file_name
for _,row in df.iterrows():
    ADDRESS[row['captioning']] = row['file_name']

In [3]:
QUERY = "I want a slow and mellow song for sleep"

In [4]:
VDB_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
query_embedding = VDB_model.encode(QUERY)
passage_embedding = VDB_model.encode(captions)
similarity = util.dot_score(query_embedding, passage_embedding)
sorted_similarity = similarity.sort(descending=True)

In [5]:
Query_match_caption = captions[int(sorted_similarity[1][0][0])]
print("Caption that matched your query:\n" + Query_match_caption)

Caption that matched your query:
[0:00-10:00]
This is a lullaby music piece. There is a female vocalist singing melodically. The melody is being played by the piano while there is a synth pad playing in the background. The atmosphere is dreamy. This piece could be used in the soundtrack of a drama TV series during the scenes where the characters are reminiscing the past. 
 
[10:00-20:00]
A female vocalist sings this soft love song in a foreign language. The tempo is slow with a melancholic piano accompaniment and a violin harmony. The song is soft, mellow, poignant, emotional, sentimental, melancholic and pensive. This song is a Romantic Pop. 
 
[20:00-30:00]
This slow pop song features a female voice singing the main melody. This is accompanied by an acoustic guitar playing chords in the background. The bass plays the root notes of the chords. There is no percussion in this song. The mood of this song is romantic. This song can be played in a romantic movie. 
 



In [6]:
music_clip_name = ADDRESS[captions[int(sorted_similarity[1][0][0])]]
print("Music file name:", music_clip_name)

Music file name: pop.00011.wav


In [14]:
# API keys and tokens
API_URL_Z = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
headers_Z = {"Authorization": "Bearer "}

    
def Zephyr_overall_opinion(Clips_descriptions):

    Z_Query_Final = f"""<|system|>
        Given the user's descriptions about each ten-second clips of the music, provide an overall description of the music</s>
        <|user|>
        {Clips_descriptions}</s>
        <|assistant|>"""
    
    Zephyr_B_Beta_Final_Generated_Response = requests.post(API_URL_Z, headers=headers_Z, json={"inputs": Z_Query_Final}).json()
    return Zephyr_B_Beta_Final_Generated_Response[0]["generated_text"][len(Z_Query_Final):]

In [15]:
ANSWER = Zephyr_overall_opinion(Query_match_caption)
print(ANSWER)


Overall, these three ten-second clips showcase a collection of soothing and romantic music pieces. The first clip features a dreamy lullaby with a female vocalist singing over a piano melody and a synth pad background. The second clip presents a slow love song in a foreign language, with a melancholic piano accompaniment and a violin harmony, creating a soft and emotional atmosphere. The third clip features a slow pop song with a female vocalist and an acoustic guitar accompaniment, creating a romantic mood without any percussion. These pieces would be suitable for use in dramas, romantic movies, or as background music for intimate moments.


## Packaging

In [12]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
import requests
import dask.dataframe as dd
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer, util
import requests
import dask.dataframe as dd

In [2]:
df = dd.read_csv('DATASET/Captioned/*.csv')

ADDRESS = dict()  #key:caption, value:file_name
for _,row in df.iterrows():
    ADDRESS[row['captioning']] = row['file_name']

In [23]:
class sangeetha_swarangal_similar_music:
    def __init__(self, database):
        self.captions = list(database.keys())
        self.VDB_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
        self.All_songs_embedding = self.VDB_model.encode(self.captions)

    def get_k_songs_based_on_query(self, query, k):
        query_embedding = self.VDB_model.encode(query)
        similarity = util.dot_score(query_embedding, self.All_songs_embedding)
        sorted_similarity = similarity.sort(descending=True)
        K_query_match_caption = [self.captions[int(sorted_similarity[1][0][i])] for i in range(k)]
        why_these_clips = [self.Zephyr_overall_opinion(K_query_match_caption[i], query) for i in range(k)]
        return (K_query_match_caption, why_these_clips)
    
    def Zephyr_overall_opinion(self, Clips_descriptions, query):
        # API keys and tokens
        API_URL_Z = "https://api-inference.huggingface.co/models/HuggingFaceH4/zephyr-7b-beta"
        headers_Z = {"Authorization": "Bearer "}
        # Prompt tuning
        Z_Query_Final = f"""<|system|>
            ROLE: song suggestion reasoner. You matched the my REQUEST for song with another song having the following DESCRIPTION. Tell me why you matched these?</s>
            <|user|>
            DESCRIPTION: {Clips_descriptions}
            REQUEST: {query}</s>
            <|assistant|>"""
        # Generate response
        Zephyr_B_Beta_Final_Generated_Response = requests.post(API_URL_Z, headers=headers_Z, json={"inputs": Z_Query_Final}).json()
        return Zephyr_B_Beta_Final_Generated_Response[0]["generated_text"][len(Z_Query_Final):]

In [24]:
QUERY = "I want a slow and mellow song"
K = 5
SSAI1 = sangeetha_swarangal_similar_music(ADDRESS)


k_captions, reasoning = SSAI1.get_k_songs_based_on_query(QUERY, K)
music_clips_name = [ADDRESS[k_captions[i]] for i in range(K)]

for i in range(K):
    print("Music file name:", music_clips_name[i])
    print(f"captions: {k_captions[i]}")
    print(f"Reason: {reasoning[i]}")
    print("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx")

Music file name: pop.00000.wav
captions: [0:00-10:00]
A female vocalist sings this melodic Soft Rock. The tempo is medium with a strong bass line, keyboard accompaniment, steady drumming and various percussion hits. The song is mellow, soft, melodic, soothing, devotional, sentimental, melancholic and pensive. This song is a devotional Christian Song. 
 
[10:00-20:00]
This is a teen pop music piece. There is a female vocalist singing melodically. The main tune is being played by the acoustic guitar and the electric guitar while the bass guitar is playing in the background. The rhythm is provided by a simple acoustic drum beat. The atmosphere is mellow. This piece could be used in the soundtrack of a teenage drama TV series. 
 
[20:00-30:00]
A female vocalist sings this melodic Soft Rock. The tempo is medium with a strong bass line, keyboard accompaniment, steady drumming and various percussion hits. The song is emotional and devotional. The audio quality is poor. 
 

Reason: 
Based on y