In [21]:
%matplotlib inline
import pandas as pd
import json
import matplotlib.pyplot as plt
from scipy.sparse import lil_matrix
import scipy.spatial.distance
import csv
from scipy.spatial.distance import cosine


In [15]:
artist_name_map = {}
artist_genre_map = {}
artist_title_map = {}

df = pd.read_csv('Spotify-2000.csv')


In [25]:
numeric_columns = ['Beats Per Minute (BPM)', 'Energy', 'Danceability', 'Loudness (dB)', 'Liveness', 'Valence', 'Length (Duration)', 'Acousticness', 'Speechiness', 'Popularity']
for column in numeric_columns:
    df[column] = pd.to_numeric(df[column], errors='coerce')

df = df.dropna(subset=numeric_columns)

artist_traits = {}

for index, row in df.iterrows():
    artist = row['Artist']
    if artist not in artist_traits:
        artist_traits[artist] = []
    traits = [row[column] for column in numeric_columns]
    artist_traits[artist].append(traits)

cosine_sim = {}
for artist1 in artist_traits.keys():
    for artist2 in artist_traits.keys():
        if artist1 != artist2:
            similarities = []
            for traits1 in artist_traits[artist1]:
                for traits2 in artist_traits[artist2]:
                    similarities.append(1 - cosine(traits1, traits2))
            cosine_sim[(artist1, artist2)] = sum(similarities) / len(similarities)

artists_of_interest = ["Beyoncé", "Taylor Swift", "Backstreet Boys"]

for artist in artists_of_interest:
    print(f"\nTop 10 most similar artists to {artist}:")
    similar_artists = sorted([(key[1], value) for key, value in cosine_sim.items() if key[0] == artist], key=lambda x: x[1], reverse=True)[:10]
    for similar_artist, similarity_score in similar_artists:
        print(f"{similar_artist}: {similarity_score}")



Top 10 most similar artists to Beyoncé:
First Aid Kit: 0.9817711365169072
OneRepublic: 0.9816519079895775
Journey: 0.9805372165925234
David Guetta: 0.9796908414724502
Lady Antebellum: 0.9796425940456925
The Chainsmokers: 0.9795746870714249
Cher: 0.9794744032662097
George Harrison: 0.9790194555044555
The Fray: 0.9789536040632573
The Cult: 0.9787689272611512

Top 10 most similar artists to Taylor Swift:
Iron Butterfly: 0.9965645706639197
Pharrell Williams: 0.9948758777427686
Patrick Hernandez: 0.9945018592418253
Barry White: 0.9944493418943747
Luis Fonsi: 0.9935181546908086
The Trammps: 0.9934971905778708
The Shadows: 0.9925010998281452
Mud: 0.9921205814439888
Traveling Wilburys: 0.9919629378813106
Gigi D'Agostino: 0.9917956139230645

Top 10 most similar artists to Backstreet Boys:
Jamiroquai: 0.9945499047474489
First Aid Kit: 0.9940147516952118
Boney M.: 0.9936712441463216
Survivor: 0.9931686181098808
OneRepublic: 0.993141972798605
Natasha Bedingfield: 0.9931344502445689
Traveling Wilb