In [1]:
import pandas as pd
import numpy as np
import faiss
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv('data/spotify_dataset.csv')

In [3]:
df.head()

Unnamed: 0,track_id,artists,album_name,track_name,popularity,duration_ms,explicit,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,time_signature,track_genre
0,5SuOikwiRyPMVoIQDJUgSV,Gen Hoshino,Comedy,Comedy,73,230666,False,0.676,0.461,1,-6.746,0,0.143,0.0322,1e-06,0.358,0.715,87.917,4,acoustic
1,4qPNDBW1i3p13qLCt0Ki3A,Ben Woodward,Ghost (Acoustic),Ghost - Acoustic,55,149610,False,0.42,0.166,1,-17.235,1,0.0763,0.924,6e-06,0.101,0.267,77.489,4,acoustic
2,1iJBSr7s7jYXzM8EGcbK5b,Ingrid Michaelson;ZAYN,To Begin Again,To Begin Again,57,210826,False,0.438,0.359,0,-9.734,1,0.0557,0.21,0.0,0.117,0.12,76.332,4,acoustic
3,6lfxq3CG4xtTiEg7opyCyx,Kina Grannis,Crazy Rich Asians (Original Motion Picture Sou...,Can't Help Falling In Love,71,201933,False,0.266,0.0596,0,-18.515,1,0.0363,0.905,7.1e-05,0.132,0.143,181.74,3,acoustic
4,5vjLSffimiIP26QG5WcN2K,Chord Overstreet,Hold On,Hold On,82,198853,False,0.618,0.443,2,-9.681,1,0.0526,0.469,0.0,0.0829,0.167,119.949,4,acoustic


In [4]:
# Select only numerical audio features
feature_columns = [
    'danceability', 'energy', 'key', 'loudness', 'mode', 
    'speechiness', 'acousticness', 'instrumentalness', 
    'liveness', 'valence', 'tempo', 'time_signature'
]

# Extract features and normalize them
features = df[feature_columns].values
scaler = StandardScaler()
features_normalized = scaler.fit_transform(features).astype(np.float32)

# Build Faiss index
dimension = len(feature_columns)
index = faiss.IndexFlatL2(dimension)
index.add(features_normalized)

# Create a mapping to retrieve track info after search
index_to_track_id = df['track_id'].to_dict()

# Example: search for similar tracks to the first track
query_vector = features_normalized[0:1]  # Taking the first track as an example
k = 5  # Number of results to return
distances, indices = index.search(query_vector, k)

# Get the recommended tracks
for i, idx in enumerate(indices[0]):
    print(f"Recommendation {i+1}: {df.loc[idx, 'track_name']} by {df.loc[idx, 'artists']}")
    print(f"  Distance: {distances[0][i]}")

Recommendation 1: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 2: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 3: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 4: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 5: JAMAICA by Feid;Sech
  Distance: 1.0294830799102783


In [7]:
# Create a mapping from Faiss index position to track_id
faiss_idx_to_track_id = {i: tid for i, tid in enumerate(df['track_id'])}

# When retrieving search results:
distances, indices = index.search(query_vector, k)

# Use the mapping to get track_ids
for i, idx in enumerate(indices[0]):
    track_id = faiss_idx_to_track_id[idx]
    track_info = df[df['track_id'] == track_id].iloc[0]
    print(f"Recommendation {i+1}: {track_info['track_name']} by {track_info['artists']}")
    print(f"  Distance: {distances[0][i]}")

Recommendation 1: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 2: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 3: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 4: Comedy by Gen Hoshino
  Distance: 0.0
Recommendation 5: JAMAICA by Feid;Sech
  Distance: 1.0294830799102783


In [11]:
features_normalized[0]

array([ 0.62924427, -0.71714795, -1.2104424 ,  0.30082834, -1.326281  ,
        0.5518475 , -0.8502015 , -0.5041086 ,  0.7587433 ,  0.92930585,
       -1.1418628 ,  0.22182319], dtype=float32)

In [14]:
features[0:2]

array([[ 6.7600e-01,  4.6100e-01,  1.0000e+00, -6.7460e+00,  0.0000e+00,
         1.4300e-01,  3.2200e-02,  1.0100e-06,  3.5800e-01,  7.1500e-01,
         8.7917e+01,  4.0000e+00],
       [ 4.2000e-01,  1.6600e-01,  1.0000e+00, -1.7235e+01,  1.0000e+00,
         7.6300e-02,  9.2400e-01,  5.5600e-06,  1.0100e-01,  2.6700e-01,
         7.7489e+01,  4.0000e+00]])

In [16]:
indices[0]

array([     0,  62102,  99152, 102151,  81529])

In [28]:
df.loc[0]

track_id            5SuOikwiRyPMVoIQDJUgSV
artists                        Gen Hoshino
album_name                          Comedy
track_name                          Comedy
popularity                              73
duration_ms                         230666
explicit                             False
danceability                         0.676
energy                               0.461
key                                      1
loudness                            -6.746
mode                                     0
speechiness                          0.143
acousticness                        0.0322
instrumentalness                  0.000001
liveness                             0.358
valence                              0.715
tempo                               87.917
time_signature                           4
track_genre                       acoustic
Name: 0, dtype: object

In [33]:
df.iloc[99152]

track_id            5SuOikwiRyPMVoIQDJUgSV
artists                        Gen Hoshino
album_name                          Comedy
track_name                          Comedy
popularity                              73
duration_ms                         230666
explicit                             False
danceability                         0.676
energy                               0.461
key                                      1
loudness                            -6.746
mode                                     0
speechiness                          0.143
acousticness                        0.0322
instrumentalness                  0.000001
liveness                             0.358
valence                              0.715
tempo                               87.917
time_signature                           4
track_genre              singer-songwriter
Name: 99152, dtype: object

In [36]:
import requests

In [38]:
response = requests.post(
    "https://accounts.spotify.com/api/token",
    data = {
        "Content-Type: application/x-www-form-urlencoded",
        "grant_type=client_credentials&client_id=bc31e0ab9ee1423dac853f1305583d47&client_secret=7e604447be9148f18b731e0a7991181d"
    }
)

In [39]:
response.text

'<!DOCTYPE html>\n<html ng-app="accounts" ng-csp>\n  <head>\n    <meta charset="utf-8">\n    <title>Error - Spotify</title>\n    <meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">\n    <base href="/">\n    <link rel="icon" href="https://accounts.scdn.co/oauth2/images/favicon.ace4d8543bbb017893402a1e9d1ac1fa.ico">\n    <link href="" media="screen" rel="stylesheet">\n  </head>\n  <body>\n  <div class="head">\n    <a class="spotify-logo" href="/" tabindex="-1" title="Spotify"></a>\n  </div>\n\n    <div class="container-fluid error">\n      <div class="content">\n        <h1 class="h1">Error</h1>\n        <p>\n          Oops! Something went wrong, please try again or check out our <a href="https://www.spotify.com/help">help area</a>.\n        </p>\n      </div>\n    </div>\n    <script async defer src="{2}" sp-error=\'{3}\'></script>\n  </body>\n</html>\n'

In [5]:
## Step 3: Use the API to Get Artist Data

import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

# Set up credentials
client_id = 'bc31e0ab9ee1423dac853f1305583d47'
client_secret = '7e604447be9148f18b731e0a7991181d'

# Initialize Spotify client
client_credentials_manager = SpotifyClientCredentials(client_id=client_id, client_secret=client_secret)
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [8]:
# Get data for a specific artist by name
def get_artist_data(artist_name):
    try:
        # Search for the artist
        results = sp.search(q='artist:' + artist_name, type='artist')
        items = results['artists']['items']
        
        if len(items) > 0:
            artist = items[0]
            print(f"Artist: {artist['name']}")
            print(f"Popularity: {artist['popularity']}")
            print(f"Followers: {artist['followers']['total']}")
            print(f"Genres: {', '.join(artist['genres'])}")
            print(f"Spotify URL: {artist['external_urls']['spotify']}")
            
            # Get the artist's top tracks with error handling
            try:
                top_tracks = sp.artist_top_tracks(artist['id'])
                print("\nTop Tracks:")
                for i, track in enumerate(top_tracks['tracks'][:5], 1):
                    print(f"{i}. {track['name']}")
            except Exception as e:
                print(f"\nCouldn't retrieve top tracks: {str(e)}")
                
            # Add a small delay between API calls to avoid rate limiting
            time.sleep(0.5)
        
        # Get related artists with error handling
        try:
            related = sp.artist_related_artists(artist['id'])
            print("\nRelated Artists:")
            for i, related_artist in enumerate(related['artists'][:5], 1):
                print(f"{i}. {related_artist['name']}")
        except Exception as e:
            print(f"\nCouldn't retrieve related artists: {str(e)}")
        
        return artist
    except:
        print(f"No artist found with the name {artist_name}")
        return None

# Example usage
artist_data = get_artist_data("Radiohead")

Artist: Radiohead
Popularity: 84
Followers: 11678151
Genres: art rock, alternative rock
Spotify URL: https://open.spotify.com/artist/4Z8W4fKeB5YxbusRsdQVPb

Top Tracks:
1. Creep
2. No Surprises
3. Karma Police
4. All I Need
5. Jigsaw Falling Into Place
No artist found with the name Radiohead


In [11]:
results = sp.search(q='artist: Radiohead' , type='artist')

In [15]:
results['artists']['items'][0]

{'external_urls': {'spotify': 'https://open.spotify.com/artist/3zGGhKGLorCRgcXc22gYDX'},
 'followers': {'href': None, 'total': 22309},
 'genres': ['taiwanese indie',
  'chinese indie',
  'chinese rock',
  'taiwanese pop'],
 'href': 'https://api.spotify.com/v1/artists/3zGGhKGLorCRgcXc22gYDX',
 'id': '3zGGhKGLorCRgcXc22gYDX',
 'images': [{'url': 'https://i.scdn.co/image/ab6761610000e5eb3bbca1d080c9362e623b97db',
   'height': 640,
   'width': 640},
  {'url': 'https://i.scdn.co/image/ab676161000051743bbca1d080c9362e623b97db',
   'height': 320,
   'width': 320},
  {'url': 'https://i.scdn.co/image/ab6761610000f1783bbca1d080c9362e623b97db',
   'height': 160,
   'width': 160}],
 'name': 'FUMON',
 'popularity': 42,
 'type': 'artist',
 'uri': 'spotify:artist:3zGGhKGLorCRgcXc22gYDX'}

In [None]:
# Getting multiple artists
def get_artists_by_genre(genre, limit=10):
    results = sp.search(q=f'genre:{genre}', type='artist', limit=limit)
    artists = results['artists']['items']
    
    print(f"\nTop {limit} {genre} Artists:")
    for i, artist in enumerate(artists, 1):
        print(f"{i}. {artist['name']} (Popularity: {artist['popularity']})")
    
    return artists

# Example usage
rock_artists = get_artists_by_genre("rock")

In [None]:
## Step 4: Get More Detailed Artist Information
# Get an artist's albums
def get_artist_albums(artist_name, album_limit=10):
    # Search for the artist first
    results = sp.search(q='artist:' + artist_name, type='artist')
    items = results['artists']['items']
    
    if len(items) > 0:
        artist = items[0]
        artist_id = artist['id']
        
        # Get albums
        albums = sp.artist_albums(artist_id, album_type='album', limit=album_limit)
        
        print(f"\nAlbums by {artist['name']}:")
        for i, album in enumerate(albums['items'], 1):
            print(f"{i}. {album['name']} ({album['release_date'][:4]})")
            
            # Get tracks for each album
            tracks = sp.album_tracks(album['id'])
            for j, track in enumerate(tracks['items'], 1):
                print(f"   {j}. {track['name']} - {track['duration_ms'] // 60000}:{(track['duration_ms'] % 60000) // 1000:02d}")
        
        return albums['items']
    else:
        print(f"No artist found with the name {artist_name}")
        return None

# Example usage
radiohead_albums = get_artist_albums("Radiohead", album_limit=5)
```

## Notes on Authentication
- The above examples use Client Credentials Flow, which is suitable for accessing public data
- If you need to access user-specific data, you'll need to use Authorization Code Flow
- For user-specific data, modify your authentication like this:

```python
from spotipy.oauth2 import SpotifyOAuth

scope = "user-library-read"  # Add more scopes as needed
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(
    client_id=client_id,
    client_secret=client_secret,
    redirect_uri="http://localhost:8888/callback",  # Must match your app settings
    scope=scope
))

# Now you can access user data
results = sp.current_user_saved_tracks()
for item in results['items']:
    track = item['track']
    print(f"{track['artists'][0]['name']} - {track['name']}")
```