In [32]:
import pandas as pd
import numpy as np
import faiss
from sklearn.preprocessing import StandardScaler

In [33]:
df = pd.read_csv('data/spotify_dataset.csv')

In [3]:
df.head()

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.461,1,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.42,0.166,1,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,acoustic
2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,0.438,0.359,0,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,acoustic
3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,0.266,0.0596,0,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,acoustic
4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,198853,False,0.618,0.443,2,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,acoustic


In [34]:
feature_columns = [
    'danceability', 'energy', 'key', 'loudness', 'mode', 
    'speechiness', 'acousticness', 'instrumentalness', 
    'liveness', 'valence', 'tempo', 'time_signature'
]

df[feature_columns].head()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature
0,0.676,0.461,1,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4
1,0.42,0.166,1,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4
2,0.438,0.359,0,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4
3,0.266,0.0596,0,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3
4,0.618,0.443,2,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4


In [4]:
# Select only numerical audio features
feature_columns = [
    'danceability', 'energy', 'key', 'loudness', 'mode', 
    'speechiness', 'acousticness', 'instrumentalness', 
    'liveness', 'valence', 'tempo', 'time_signature'
]

# Extract features and normalize them
features = df[feature_columns].values
scaler = StandardScaler()
features_normalized = scaler.fit_transform(features).astype(np.float32)

# Build Faiss index
dimension = len(feature_columns)
index = faiss.IndexFlatL2(dimension)
index.add(features_normalized)

# Create a mapping to retrieve track info after search
index_to_track_id = df['track_id'].to_dict()

# Example: search for similar tracks to the first track
query_vector = features_normalized[0:1]  # Taking the first track as an example
k = 5  # Number of results to return
distances, indices = index.search(query_vector, k)

# Get the recommended tracks
for i, idx in enumerate(indices[0]):
    print(f"Recommendation {i+1}: {df.loc[idx, 'track_name']} by {df.loc[idx, 'artists']}")
    print(f"  Distance: {distances[0][i]}")

Recommendation 1: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 2: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 3: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 4: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 5: JAMAICA by Feid;Sech
  Distance: 1.0294830799102783


In [7]:
# Create a mapping from Faiss index position to track_id
faiss_idx_to_track_id = {i: tid for i, tid in enumerate(df['track_id'])}

# When retrieving search results:
distances, indices = index.search(query_vector, k)

# Use the mapping to get track_ids
for i, idx in enumerate(indices[0]):
    track_id = faiss_idx_to_track_id[idx]
    track_info = df[df['track_id'] == track_id].iloc[0]
    print(f"Recommendation {i+1}: {track_info['track_name']} by {track_info['artists']}")
    print(f"  Distance: {distances[0][i]}")

Recommendation 1: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 2: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 3: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 4: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 5: JAMAICA by Feid;Sech
  Distance: 1.0294830799102783


In [11]:
features_normalized[0]

array([ 0.62924427, -0.71714795, -1.2104424 ,  0.30082834, -1.326281  ,
        0.5518475 , -0.8502015 , -0.5041086 ,  0.7587433 ,  0.92930585,
       -1.1418628 ,  0.22182319], dtype=float32)

In [14]:
features[0:2]

array([[ 6.7600e-01,  4.6100e-01,  1.0000e+00, -6.7460e+00,  0.0000e+00,
         1.4300e-01,  3.2200e-02,  1.0100e-06,  3.5800e-01,  7.1500e-01,
         8.7917e+01,  4.0000e+00],
       [ 4.2000e-01,  1.6600e-01,  1.0000e+00, -1.7235e+01,  1.0000e+00,
         7.6300e-02,  9.2400e-01,  5.5600e-06,  1.0100e-01,  2.6700e-01,
         7.7489e+01,  4.0000e+00]])

In [16]:
indices[0]

array([     0,  62102,  99152, 102151,  81529])

In [28]:
df.loc[0]

track_id            5SuOikwiRyPMVoIQDJUgSV
artists                        Gen Hoshino
album_name                          Comedy
track_name                          Comedy
popularity                              73
duration_ms                         230666
explicit                             False
danceability                         0.676
energy                               0.461
key                                      1
loudness                            -6.746
mode                                     0
speechiness                          0.143
acousticness                        0.0322
instrumentalness                  0.000001
liveness                             0.358
valence                              0.715
tempo                               87.917
time_signature                           4
track_genre                       acoustic
Name: 0, dtype: object

In [33]:
df.iloc[99152]

track_id            5SuOikwiRyPMVoIQDJUgSV
artists                        Gen Hoshino
album_name                          Comedy
track_name                          Comedy
popularity                              73
duration_ms                         230666
explicit                             False
danceability                         0.676
energy                               0.461
key                                      1
loudness                            -6.746
mode                                     0
speechiness                          0.143
acousticness                        0.0322
instrumentalness                  0.000001
liveness                             0.358
valence                              0.715
tempo                               87.917
time_signature                           4
track_genre              singer-songwriter
Name: 99152, dtype: object

In [36]:
import requests

In [39]:
response.text

'<!DOCTYPE html>\n<html ng-app="accounts" ng-csp>\n  <head>\n    <meta charset="utf-8">\n    <title>Error - Spotify</title>\n    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">\n    <base href="/">\n    <link rel="icon" href="https://accounts.scdn.co/oauth2/images/favicon.ace4d8543bbb017893402a1e9d1ac1fa.ico">\n    <link href="" media="screen" rel="stylesheet">\n  </head>\n  <body>\n  <div class="head">\n    <a class="spotify-logo" href="/" tabindex="-1" title="Spotify"></a>\n  </div>\n\n    <div class="container-fluid error">\n      <div class="content">\n        <h1 class="h1">Error</h1>\n        <p>\n          Oops! Something went wrong, please try again or check out our <a href="https://www.spotify.com/help">help area</a>.\n        </p>\n      </div>\n    </div>\n    <script async defer src="{2}" sp-error=\'{3}\'></script>\n  </body>\n</html>\n'

In [1]:
## Step 3: Use the API to Get Artist Data

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# Set up credentials
client_id = 'ff8910e7b0984afc894c0f193652f85d'
client_secret = '4eed35c58bcb4aa9a54da6ff6a3723df'

# Initialize Spotify client
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [28]:
# Get data for a specific artist by name
def get_artist_data(artist_name):
    try:
        # Search for the artist
        results = sp.search(q='artist:' + artist_name, type='artist')
        items = results['artists']['items']
        
        if len(items) > 0:
            artist = items[0]
            print(f"Artist: {artist['name']}")
            print(f"Popularity: {artist['popularity']}")
            print(f"Followers: {artist['followers']['total']}")
            print(f"Genres: {', '.join(artist['genres'])}")
            print(f"Spotify URL: {artist['external_urls']['spotify']}")
            
            # Get the artist's top tracks with error handling
            try:
                top_tracks = sp.artist_top_tracks(artist['id'])
                print("\nTop Tracks:")
                for i, track in enumerate(top_tracks['tracks'][:5], 1):
                    print(f"{i}. {track['name']}")
            except Exception as e:
                print(f"\nCouldn't retrieve top tracks: {str(e)}")
        
        return artist
    except:
        print(f"No artist found with the name {artist_name}")
        return None

# Example usage
artist_data = get_artist_data("NMIXX")

Artist: NMIXX
Popularity: 67
Followers: 3566649
Genres: k-pop
Spotify URL: https://open.spotify.com/artist/28ot3wh4oNmoFOdVajibBl

Top Tracks:
1. Love Me Like This
2. DASH
3. High Horse
4. DICE
5. O.O


In [26]:
artist_data = get_artist_data("NMIXX")

Artist: NMIXX
Popularity: 67
Followers: 3566649
Genres: k-pop
Spotify URL: https://open.spotify.com/artist/28ot3wh4oNmoFOdVajibBl

Top Tracks:
1. Love Me Like This
2. DASH
3. High Horse
4. DICE
5. O.O
No artist found with the name NMIXX


In [6]:
results = sp.search(q='artist:Radiohead' , type='artist')

In [10]:
results['artists']['items'][0]

{'external_urls': {'spotify': 'https://open.spotify.com/artist/4Z8W4fKeB5YxbusRsdQVPb'},
 'followers': {'href': None, 'total': 11692479},
 'genres': ['art rock', 'alternative rock'],
 'href': 'https://api.spotify.com/v1/artists/4Z8W4fKeB5YxbusRsdQVPb',
 'id': '4Z8W4fKeB5YxbusRsdQVPb',
 'images': [{'url': 'https://i.scdn.co/image/ab6761610000e5eba03696716c9ee605006047fd',
   'height': 640,
   'width': 640},
  {'url': 'https://i.scdn.co/image/ab67616100005174a03696716c9ee605006047fd',
   'height': 320,
   'width': 320},
  {'url': 'https://i.scdn.co/image/ab6761610000f178a03696716c9ee605006047fd',
   'height': 160,
   'width': 160}],
 'name': 'Radiohead',
 'popularity': 84,
 'type': 'artist',
 'uri': 'spotify:artist:4Z8W4fKeB5YxbusRsdQVPb'}

In [29]:
# Getting multiple artists
def get_artists_by_genre(genre, limit=10):
    results = sp.search(q=f'genre:{genre}', type='artist', limit=limit)
    artists = results['artists']['items']
    
    print(f"\nTop {limit} {genre} Artists:")
    for i, artist in enumerate(artists, 1):
        print(f"{i}. {artist['name']} (Popularity: {artist['popularity']})")
    
    return artists

# Example usage
rock_artists = get_artists_by_genre("rock")


Top 10 rock Artists:
1. Genki Rockets (Popularity: 21)
2. Gene Rockwell (Popularity: 24)
3. Imagine Dragons (Popularity: 87)
4. ONE OK ROCK (Popularity: 73)
5. OneRepublic (Popularity: 84)
6. Linkin Park (Popularity: 89)
7. 芒果醬 Mango Jump (Popularity: 51)
8. Mrs. GREEN APPLE (Popularity: 82)
9. TRASH (Popularity: 50)
10. Aimer (Popularity: 65)


In [31]:
get_artists_by_genre("indie")


Top 10 indie Artists:
1. 告五人 (Popularity: 61)
2. 理想混蛋 (Popularity: 58)
3. Amazing Show (Popularity: 55)
4. EggPlantEgg (Popularity: 55)
5. Crowd Lu (Popularity: 55)
6. Mixer (Popularity: 52)
7. Anderson .Paak (Popularity: 77)
8. No Party For Cao Dong (Popularity: 53)
9. TRASH (Popularity: 50)
10. Chappell Roan (Popularity: 87)


[{'external_urls': {'spotify': 'https://open.spotify.com/artist/6xErgeZYatiaQ36SB5bvi8'},
  'followers': {'href': None, 'total': 1080937},
  'genres': ['taiwanese indie',
   'chinese indie',
   'mandopop',
   'taiwanese pop',
   'c-pop',
   'chinese rock'],
  'href': 'https://api.spotify.com/v1/artists/6xErgeZYatiaQ36SB5bvi8',
  'id': '6xErgeZYatiaQ36SB5bvi8',
  'images': [{'url': 'https://i.scdn.co/image/ab6761610000e5ebee1d003e321b2e02a9abf983',
    'height': 640,
    'width': 640},
   {'url': 'https://i.scdn.co/image/ab67616100005174ee1d003e321b2e02a9abf983',
    'height': 320,
    'width': 320},
   {'url': 'https://i.scdn.co/image/ab6761610000f178ee1d003e321b2e02a9abf983',
    'height': 160,
    'width': 160}],
  'name': '告五人',
  'popularity': 61,
  'type': 'artist',
  'uri': 'spotify:artist:6xErgeZYatiaQ36SB5bvi8'},
 {'external_urls': {'spotify': 'https://open.spotify.com/artist/0Awqm7GXGiBp8fJNGvywra'},
  'followers': {'href': None, 'total': 207222},
  'genres': ['mandopop',
   '

In [36]:
def get_track_id(song_name, artist_name=None, sp=None, market='US', return_info=False):
    """
    Get Spotify track ID by song name and optionally artist name.
    
    Parameters:
    ----------
    song_name : str
        Name of the song to search for
    artist_name : str, optional
        Name of the artist (for more accurate results)
    sp : spotipy.Spotify, optional
        Authenticated Spotify client. Will create one if not provided.
    market : str, optional
        Market code to use for the search (default: 'US')
    return_info : bool, optional
        If True, returns a tuple of (track_id, track_name, artist_name, album_name)
        If False, returns just the track_id (default)
        
    Returns:
    -------
    str or tuple
        Track ID or tuple with track info if return_info=True
        Returns None if no matching track is found
    """
    # Initialize Spotify client if not provided
    if sp is None:
        # You need to set these environment variables or pass them directly
        client_id = "YOUR_CLIENT_ID"
        client_secret = "YOUR_CLIENT_SECRET"
        
        client_credentials_manager = SpotifyClientCredentials(
            client_id=client_id,
            client_secret=client_secret
        )
        sp = spotipy.Spotify(
            client_credentials_manager=client_credentials_manager,
            retries=3,
            status_retries=3,
            backoff_factor=0.3
        )
    
    # Build the search query
    query = f"track:{song_name}"
    if artist_name:
        query += f" artist:{artist_name}"
    
    try:
        # Execute the search
        results = sp.search(q=query, type='track', limit=10, market=market)
        
        # Check if we got any results
        if not results['tracks']['items']:
            # If no results with track: and artist: syntax, try a more general search
            if artist_name:
                fallback_query = f"{song_name} {artist_name}"
                results = sp.search(q=fallback_query, type='track', limit=10, market=market)
                
                # Still no results
                if not results['tracks']['items']:
                    print(f"No tracks found for '{song_name}' by '{artist_name}'")
                    return None
            else:
                print(f"No tracks found for '{song_name}'")
                return None
        
        # Get the first (best) match
        track = results['tracks']['items'][0]
        track_id = track['id']
        
        if return_info:
            track_name = track['name']
            artist_name = track['artists'][0]['name']
            album_name = track['album']['name']
            return (track_id, track_name, artist_name, album_name)
        else:
            return track_id
            
    except Exception as e:
        print(f"Error looking up track: {str(e)}")
        return None

In [39]:
get_track_id("POWER", sp=sp)

'2uwnP6tZVVmTovzX5ELooy'

In [40]:
def get_audio_features(track_id, sp=None):
    """
    Extract audio features for a single Spotify track.
    
    Parameters:
    ----------
    track_id : str
        Spotify track ID
    sp : spotipy.Spotify, optional
        Authenticated Spotify client. Will create one if not provided.
        
    Returns:
    -------
    dict
        Dictionary containing the audio features for the track
        Returns None if features couldn't be retrieved
    """
    # Initialize Spotify client if not provided
    if sp is None:
        client_id = "YOUR_CLIENT_ID"
        client_secret = "YOUR_CLIENT_SECRET"
        
        client_credentials_manager = SpotifyClientCredentials(
            client_id=client_id,
            client_secret=client_secret
        )
        sp = spotipy.Spotify(
            client_credentials_manager=client_credentials_manager,
            retries=3
        )
    
    # Try to get audio features with retries
    max_retries = 3
    for attempt in range(max_retries):
        try:
            # Call the Spotify API
            features = sp.audio_features(track_id)
            
            # Check if we got valid results
            if features and features[0]:
                # Extract just the features we need
                feature_data = features[0]
                return {
                    'danceability': feature_data['danceability'],
                    'energy': feature_data['energy'],
                    'key': feature_data['key'],
                    'loudness': feature_data['loudness'],
                    'mode': feature_data['mode'],
                    'speechiness': feature_data['speechiness'],
                    'acousticness': feature_data['acousticness'],
                    'instrumentalness': feature_data['instrumentalness'],
                    'liveness': feature_data['liveness'],
                    'valence': feature_data['valence'],
                    'tempo': feature_data['tempo'],
                    'time_signature': feature_data['time_signature']
                }
            else:
                print(f"No features found for track {track_id}")
                return None
                
        except Exception as e:
            print(f"Error on attempt {attempt+1}/{max_retries}: {str(e)}")
            
            if attempt < max_retries - 1:
                # Wait before trying again
                time.sleep(1)
            else:
                print(f"Could not get features for track {track_id}")
                return None

In [42]:
import time
get_audio_features(get_track_id("POWER", sp=sp), sp=sp)

HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=2uwnP6tZVVmTovzX5ELooy with Params: {} returned 403 due to None


Error on attempt 1/3: http status: 403, code: -1 - https://api.spotify.com/v1/audio-features/?ids=2uwnP6tZVVmTovzX5ELooy:
 None, reason: None


HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=2uwnP6tZVVmTovzX5ELooy with Params: {} returned 403 due to None


Error on attempt 2/3: http status: 403, code: -1 - https://api.spotify.com/v1/audio-features/?ids=2uwnP6tZVVmTovzX5ELooy:
 None, reason: None


HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=2uwnP6tZVVmTovzX5ELooy with Params: {} returned 403 due to None


Error on attempt 3/3: http status: 403, code: -1 - https://api.spotify.com/v1/audio-features/?ids=2uwnP6tZVVmTovzX5ELooy:
 None, reason: None
Could not get features for track 2uwnP6tZVVmTovzX5ELooy


In [43]:
sp.audio_features('2uwnP6tZVVmTovzX5ELooy')

HTTP Error for GET to https://api.spotify.com/v1/audio-features/?ids=2uwnP6tZVVmTovzX5ELooy with Params: {} returned 403 due to None


SpotifyException: http status: 403, code: -1 - https://api.spotify.com/v1/audio-features/?ids=2uwnP6tZVVmTovzX5ELooy:
 None, reason: None