This program is a machine learning model that acts as a music recommender system. This uses the concept of "Cosine Similarity" to do content-based filtering on a Spotify Dataset. The program generates a "similarity score" for each song based on its acoustic features such as acousticness, danceability, energy, and tempo, among others. Using this score, the program compares and retrieves the top 5 songs that are most similar to a given song.

In [None]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

features = ['acousticness', 'danceability', 'duration_ms', 'energy', 'instrumentalness',
            'key', 'liveness', 'loudness', 'mode', 'speechiness', 'tempo', 'time_signature', 'valence']

def CosineSimilarity(df):
    norm = np.sqrt(np.sum(np.square(df[features].values), axis=1))
    normalized_features = df[features].div(pd.Series(norm), axis=0)
    CSFeatures = cosine_similarity(normalized_features)
    return CSFeatures

similar_songs = []
for chunk in pd.read_csv("/content/spotifytracksdataset.csv", chunksize=1000):
    artist_names = chunk['artists'].unique().tolist()       
    print("Distinct artist names in current chunk:")
    print("\n".join(artist_names))
    artist_name = input("Enter the name of the artist: ")
    artist_songs = chunk[chunk['artists'] == artist_name]
    
    if artist_songs.empty:
        print("No songs found for the artist {}".format(artist_name))
        continue

    print("Songs by {}: ".format(artist_name))
    print(artist_songs['track_name'].values)

    selected_song = input("Select a song from the list: ")
    selected_song_genre = chunk.loc[chunk['track_name'] == selected_song, 'track_genre'].values[0]
    song_idx = artist_songs.index[artist_songs['track_name'] == selected_song].tolist()
    if not song_idx:
        print("Song {} not found for the artist {}".format(selected_song, artist_name))
        continue

    CSFeatures = CosineSimilarity(chunk)

    
    song_idx = song_idx[0]  
    similarity_scores = list(enumerate(CSFeatures[song_idx]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)

    top_songs = []
    selected_artist = ""
    for i in range(len(similarity_scores)):
        idx = similarity_scores[i][0]
        song_name = chunk.iloc[idx]['track_name']
        artist_name = chunk.iloc[idx]['artists']
        song_genre = chunk.iloc[idx]['track_genre']
        if selected_song == song_name:
            selected_artist = artist_name
            continue
        if song_name not in top_songs and artist_name != selected_artist and song_genre == selected_song_genre:
            top_songs.append(song_name)
            print("{} - {}".format(artist_name, song_name))
        if len(top_songs) == 5:
            break
