In [5]:
import faiss
import sqlite3
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import torch

# Load model

In [6]:
device = "mps" if torch.backends.mps.is_available() else "cpu"
model = SentenceTransformer("all-MiniLM-L6-v2").to(device)

In [4]:
df = pd.read_csv('songs_lyrics_adjusted.csv')
df["lyrics"] = df["lyrics"].fillna("")

In [8]:
test = df[:1000]

In [9]:
embeddings = model.encode(test['lyrics'].tolist(), batch_size=64, convert_to_numpy=True, device=device)

In [10]:
index = faiss.IndexFlatL2(embeddings.shape[1])  # L2 Distance Index
index.add(embeddings)
faiss.write_index(index, "song_lyrics.index")


In [11]:
db_conn = sqlite3.connect("songs.db")
df.to_sql("songs", db_conn, if_exists="replace", index=True)
db_conn.close()

In [45]:
db_conn = sqlite3.connect("songs.db")
cursor = db_conn.cursor()
cursor.execute("SELECT lyrics FROM songs WHERE rowid = ?", (451+1,))
row = cursor.fetchall()
print(row)


[('intro hayley williams\ncan we pretend that airplanes\nin the night sky are like shooting stars\ni could really use a wish right now\nwish right now wish right now\ncan we pretend that airplanes\nin the night sky are like shooting stars\ni could really use a wish right now\nwish right now wish right now\n\nverse 1 bob\nyeah i could use a dream or a genie or a wish\nto go back to a place much simpler than this\ncause after all the partyin and smashin and crashin\nand all the glitz and the glam and the fashion\nand all the pandemonium and all the madness\nthere comes a time where you fade to the blackness\nand when you starin at that phone in your lap\nand you hopin but them people never call you back\nbut thats just how the story unfolds\nyou get another hand soon after you fold\nand when your plans unravel in the sand\nwhat would you wish for if you had one chance\nso airplane airplane sorry im late\nim on my way so dont close that gate\nif i dont make that then ill switch my flight\

In [56]:
def search_songs(tag, top_k=5):
    tag_embedding = model.encode([tag], convert_to_numpy=True, device=device)
    index = faiss.read_index("song_lyrics.index")
    distances, indices = index.search(tag_embedding, top_k)
    # Retrieve song metadata
    db_conn = sqlite3.connect("songs.db")
    cursor = db_conn.cursor()
    
    results = []
    for idx, distance in zip(indices[0], distances[0]):
        cursor.execute("SELECT title, lyrics FROM songs WHERE rowid = ?", (int(idx)+1,))
        row = cursor.fetchone()
        print(row)
        if row:
            title, lyrics = row
            results.append((title, lyrics, distance))
    
    db_conn.close()
    return results

In [57]:
tag = "airplane"
results = search_songs(tag)
if results:
    print("Most similar song:")
    title, lyrics, similarity = results[0]
    print(f"Title: {title}\nLyrics: {lyrics[:200]}...\nSimilarity Score: {similarity:.4f}\n")
else:
    print("No matching songs found.")

[[451 754 872 537  85]]
[[1.221153  1.4340206 1.5551693 1.5673573 1.5746797]]
<class 'numpy.int64'>
('Airplanes', 'intro hayley williams\ncan we pretend that airplanes\nin the night sky are like shooting stars\ni could really use a wish right now\nwish right now wish right now\ncan we pretend that airplanes\nin the night sky are like shooting stars\ni could really use a wish right now\nwish right now wish right now\n\nverse 1 bob\nyeah i could use a dream or a genie or a wish\nto go back to a place much simpler than this\ncause after all the partyin and smashin and crashin\nand all the glitz and the glam and the fashion\nand all the pandemonium and all the madness\nthere comes a time where you fade to the blackness\nand when you starin at that phone in your lap\nand you hopin but them people never call you back\nbut thats just how the story unfolds\nyou get another hand soon after you fold\nand when your plans unravel in the sand\nwhat would you wish for if you had one chance\nso airpl