# MIllion Playlists Dataset Data Extraction
This Notebook is used to extract the required Data from the Million Playlist Dataset created by Spotify.

---

### Importing the Libraries needed for the data extraction

In [20]:
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd
import spotipy
import config
import math
import json
import os

### Spotify's Authentication
We must first parse API credentials to Spotipy's client manager, in order to extract the required data from Spotify's API

In [21]:
client_credentials_manager = SpotifyClientCredentials(client_id = config.client_ID, client_secret = config.client_secret)
sp = spotipy.Spotify(client_credentials_manager = client_credentials_manager)

### Importing the Data from the Dataset

In [22]:
def get_sliced_data(path, slices_limit = 10):
    # Get a list of all the files in the directory
    file_list = os.listdir(path)
    
    #Create an empty list with all JSON files
    data_list = []
    
    # Loop through the files and open each one
    for counter, filename in enumerate(file_list):
        # Construct the full file path
        file_path = os.path.join(path, filename)

        # Open the file and load the data into a variable
        with open(file_path, 'r') as f:
            data = json.load(f)
            data = pd.json_normalize(data)['playlists'][0]
            data_list.append(data)
        
        # Exit the Loop if the max number of slices (files) has been reached
        if counter == slices_limit:
            break

    # Return the whole Dataset as a list of JSONs
    return data_list
    


In [23]:
def make_data_df(data_list):
    # Create an empty list to store the flattened dictionaries
    flat_list = []

    # Flatten the dictionaries and add them to the list
    for sub_list in data_list:
        for sub_dict in sub_list:
            flat_list.append(sub_dict)
            
    # Convert the flattened list of dictionaries to a pandas DataFrame
    df = pd.DataFrame(flat_list)

    return df

In [24]:
path = r'C:\Users\yakin\Downloads\spotify_million_playlist_dataset\data'
data = get_sliced_data(path, slices_limit = 0)

In [25]:
df = make_data_df(data)
df

Unnamed: 0,name,collaborative,pid,modified_at,num_tracks,num_albums,num_followers,tracks,num_edits,duration_ms,num_artists,description
0,Throwbacks,false,0,1493424000,52,47,1,"[{'pos': 0, 'artist_name': 'Missy Elliott', 't...",6,11532414,37,
1,Awesome Playlist,false,1,1506556800,39,23,1,"[{'pos': 0, 'artist_name': 'Survivor', 'track_...",5,11656470,21,
2,korean,false,2,1505692800,64,51,1,"[{'pos': 0, 'artist_name': 'Hoody', 'track_uri...",18,14039958,31,
3,mat,false,3,1501027200,126,107,1,"[{'pos': 0, 'artist_name': 'Camille Saint-Saën...",4,28926058,86,
4,90s,false,4,1401667200,17,16,2,"[{'pos': 0, 'artist_name': 'The Smashing Pumpk...",7,4335282,16,
...,...,...,...,...,...,...,...,...,...,...,...,...
995,old,false,995,1507852800,41,40,1,"[{'pos': 0, 'artist_name': 'Katrina', 'track_u...",8,9917901,36,
996,Daze,false,996,1479254400,17,17,1,"[{'pos': 0, 'artist_name': 'PARTYNEXTDOOR', 't...",13,3699248,15,
997,rap,false,997,1410307200,119,98,1,"[{'pos': 0, 'artist_name': 'LoveRance', 'track...",63,27538723,82,
998,Country,false,998,1507939200,108,75,1,"[{'pos': 0, 'artist_name': 'Hunter Hayes', 'tr...",37,24950143,40,


### Extracting the information from every track in every playlist

In [26]:
def extract_tracks_data(df):
    # Normalize the JSON data into a DataFrame for the entire 'tracks' column
    track_data = pd.json_normalize(df['tracks'].explode())

    # Drop unnecessary columns
    final_df = track_data.drop(['pos', 'duration_ms'], axis=1)

    # Set 'track_uri' as the index for efficient duplicate removal
    final_df.set_index('track_uri', inplace=True)

    # Drop any repeated rows based on the index (i.e., 'track_uri')
    final_df = final_df[~final_df.index.duplicated()].reset_index()

    return final_df

In [27]:
track_df = extract_tracks_data(df)
track_df

Unnamed: 0,track_uri,artist_name,artist_uri,track_name,album_uri,album_name
0,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,Missy Elliott,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,The Cookbook
1,spotify:track:6I9VzXrHxO9rA9A5euc8Ak,Britney Spears,spotify:artist:26dSoYclwsYLMAKD3tpOr4,Toxic,spotify:album:0z7pVBGOD7HCIB7S8eLkLI,In The Zone
2,spotify:track:0WqIKmW4BTrj3eJFmnCKMv,Beyoncé,spotify:artist:6vWDO969PvNqNYHIOW5v0m,Crazy In Love,spotify:album:25hVFAxTlDvXbx2X2QkUkE,Dangerously In Love (Alben für die Ewigkeit)
3,spotify:track:1AWQoqb9bSvzTjaLralEkT,Justin Timberlake,spotify:artist:31TPClRtHm23RisEBtV3X7,Rock Your Body,spotify:album:6QPkyl04rXwTGlGlcYaRoW,Justified
4,spotify:track:1lzr43nnXAijIGYnCT8M8H,Shaggy,spotify:artist:5EvFsr3kj42KNv97ZEnqij,It Wasn't Me,spotify:album:6NmFmPX56pcLBOFMhIiKvF,Hot Shot
...,...,...,...,...,...,...
34438,spotify:track:3uCHI1gfOUL5j5swEh0TcH,Jon D,spotify:artist:5HCypjplgh5uQezvBpOfXN,I Don't Know,spotify:album:2KEQtuVl1cYsTYtVRUrNVi,Roots
34439,spotify:track:0P1oO2gREMYUCoOkzYAyFu,Big Words,spotify:artist:0sHN89qak07mnug3LVVjzP,The Answer,spotify:album:5jrsRHRAmetu5e7RRBoxj7,"Hollywood, a Beautiful Coincidence"
34440,spotify:track:2oM4BuruDnEvk59IvIXCwn,Allan Rayman,spotify:artist:6Yv6OBXD6ZQakEljaGaDAk,25.22,spotify:album:3CbNgBzI7r9o0F6VjH9sTY,Roadhouse 01
34441,spotify:track:4Ri5TTUgjM96tbQZd5Ua7V,Jon Jason,spotify:artist:77bNdkKYBBmc30CisCA6tE,Good Feeling,spotify:album:2dZ7oVNQBeLlpoUYfbEsJP,Good Feeling


In [29]:
#additional_info_df = get_additional_info(track_df)
#additional_info_df

### Requesting to Spotify's API the features of every Track in the dataset

In [30]:
def extract_track_features(df, batch_size = 50, verbose = True):
  # Set 'track_uri' as the index for efficient merging
  df = df.set_index('track_uri')
  
  # Get a list of all the track URIs in the DataFrame
  track_uris = df.index.to_list()
  
  # Calculate the number of batches needed based on batch size
  num_batches = math.ceil(len(track_uris) / batch_size)
  
  # Initialize an empty list to store the results
  audio_features = []
  
  # Make requests to the Spotify API in batches of size 'batch_size'
  for i in range(num_batches):
    # Calculate the start and end indices of the current batch
    start_idx = i * batch_size
    end_idx = min((i + 1) * batch_size, len(track_uris))
    
    # Extract the track URIs for the current batch
    track_batch_uris = track_uris[start_idx:end_idx]
    
    # Request audio features and track's data for the current batch of track URIs from the Spotify API
    track_batch_features = sp.audio_features(track_batch_uris)
    
    # Extend the list of audio features with the results of the current batch
    audio_features.extend(track_batch_features)
    
    #If Verbose is Enabled, the function will print current API requests status in terminal.
    if verbose:
      print(f'----------------------------------------------------------------------------------')
      print(f'Actual Batch: {i}. Number of Tracks retrieved: {i * batch_size}. Tracks Remaining: {len(df) - i * batch_size}')

  # Create a list of dictionaries with only the desired keys
  desired_keys = ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms', 'time_signature', 'uri']
  audio_features = [{k: track.get(k) for k in desired_keys} for track in audio_features]
  
  # Convert audio_features list to DataFrame
  features_df = pd.DataFrame(audio_features)
  
  # Merge audio features DataFrame with the final_df based on 'track_uri' index
  final_df = df.merge(features_df, left_index = True, right_on= 'uri')
  final_df = final_df.rename(columns = {'uri' : 'track_uri'})
  
  return final_df

In [31]:
track_features_df = extract_track_features(track_df)
track_features_df

----------------------------------------------------------------------------------
Actual Batch: 0. Number of Tracks retrieved: 0. Tracks Remaining: 34443
----------------------------------------------------------------------------------
Actual Batch: 1. Number of Tracks retrieved: 50. Tracks Remaining: 34393
----------------------------------------------------------------------------------
Actual Batch: 2. Number of Tracks retrieved: 100. Tracks Remaining: 34343
----------------------------------------------------------------------------------
Actual Batch: 3. Number of Tracks retrieved: 150. Tracks Remaining: 34293
----------------------------------------------------------------------------------
Actual Batch: 4. Number of Tracks retrieved: 200. Tracks Remaining: 34243
----------------------------------------------------------------------------------
Actual Batch: 5. Number of Tracks retrieved: 250. Tracks Remaining: 34193
-------------------------------------------------------------

Unnamed: 0,artist_name,artist_uri,track_name,album_uri,album_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,track_uri
0,Missy Elliott,spotify:artist:2wIVse2owClT7go1WT98tk,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,The Cookbook,0.904,0.813,4,-7.105,0,0.1210,0.03110,0.006970,0.0471,0.810,125.461,226864,4,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI
1,Britney Spears,spotify:artist:26dSoYclwsYLMAKD3tpOr4,Toxic,spotify:album:0z7pVBGOD7HCIB7S8eLkLI,In The Zone,0.774,0.838,5,-3.914,0,0.1140,0.02490,0.025000,0.2420,0.924,143.040,198800,4,spotify:track:6I9VzXrHxO9rA9A5euc8Ak
2,Beyoncé,spotify:artist:6vWDO969PvNqNYHIOW5v0m,Crazy In Love,spotify:album:25hVFAxTlDvXbx2X2QkUkE,Dangerously In Love (Alben für die Ewigkeit),0.664,0.758,2,-6.583,0,0.2100,0.00238,0.000000,0.0598,0.701,99.259,235933,4,spotify:track:0WqIKmW4BTrj3eJFmnCKMv
3,Justin Timberlake,spotify:artist:31TPClRtHm23RisEBtV3X7,Rock Your Body,spotify:album:6QPkyl04rXwTGlGlcYaRoW,Justified,0.892,0.714,4,-6.055,0,0.1410,0.20100,0.000234,0.0521,0.817,100.972,267267,4,spotify:track:1AWQoqb9bSvzTjaLralEkT
4,Shaggy,spotify:artist:5EvFsr3kj42KNv97ZEnqij,It Wasn't Me,spotify:album:6NmFmPX56pcLBOFMhIiKvF,Hot Shot,0.853,0.606,0,-4.596,1,0.0713,0.05610,0.000000,0.3130,0.654,94.759,227600,4,spotify:track:1lzr43nnXAijIGYnCT8M8H
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34438,Jon D,spotify:artist:5HCypjplgh5uQezvBpOfXN,I Don't Know,spotify:album:2KEQtuVl1cYsTYtVRUrNVi,Roots,0.669,0.228,2,-12.119,1,0.0690,0.79200,0.065000,0.0944,0.402,83.024,189184,4,spotify:track:3uCHI1gfOUL5j5swEh0TcH
34439,Big Words,spotify:artist:0sHN89qak07mnug3LVVjzP,The Answer,spotify:album:5jrsRHRAmetu5e7RRBoxj7,"Hollywood, a Beautiful Coincidence",0.493,0.727,1,-5.031,1,0.2170,0.08730,0.000000,0.1290,0.289,73.259,263680,4,spotify:track:0P1oO2gREMYUCoOkzYAyFu
34440,Allan Rayman,spotify:artist:6Yv6OBXD6ZQakEljaGaDAk,25.22,spotify:album:3CbNgBzI7r9o0F6VjH9sTY,Roadhouse 01,0.702,0.524,7,-10.710,1,0.0793,0.33200,0.055300,0.2980,0.265,140.089,189213,4,spotify:track:2oM4BuruDnEvk59IvIXCwn
34441,Jon Jason,spotify:artist:77bNdkKYBBmc30CisCA6tE,Good Feeling,spotify:album:2dZ7oVNQBeLlpoUYfbEsJP,Good Feeling,0.509,0.286,8,-14.722,1,0.1230,0.40200,0.000012,0.1310,0.259,121.633,194720,4,spotify:track:4Ri5TTUgjM96tbQZd5Ua7V


### Requesting to Spotify's API the features of every Artist in the dataset

In [32]:
def extract_artist_features(df, batch_size = 50, verbose = True):
    
    # Set 'artist_uri' as the index for efficient merging
    df = df.set_index('artist_uri')

    #Get all the Artist's URIs from dataframe
    artist_uris = df.index.to_list()

    # Calculate the number of batches needed based on batch size
    num_batches = math.ceil(len(artist_uris) / batch_size)

    # Initialize an empty list to store the results
    artists_features = []
    
    # Make requests to the Spotify API in batches of size 'batch_size'
    for i in range(num_batches):
        print(f'Started Gathering Data')
        # Calculate the start and end indices of the current batch
        start_idx = i * batch_size
        end_idx = min((i + 1) * batch_size, len(artist_uris))
        
        # Extract the artists URIs for the current batch
        artist_batch_uris = artist_uris[start_idx:end_idx]
        
        print(f'Making API Request...')
        # Request audio features for the current batch of artist URIs from the Spotify API
        artist_batch_features = sp.artists(artist_batch_uris)
        print(f'Succesful!')
        
        # Extend the list of artists features with the results of the current batch
        artists_features.extend(artist_batch_features['artists'])
        
        #If Verbose is Enabled, the function will print current API requests status in terminal.
        if verbose:
            print(f'----------------------------------------------------------------------------------')
            print(f'Actual Batch: {i}. Number of Tracks retrieved: {i * batch_size}. Tracks Remaining: {len(df) - i * batch_size}')
            
    # Convert audio_features list to DataFrame
    features_df = pd.DataFrame(artists_features)
    
    # Remove duplicates based on the 'uri' column
    features_df = features_df.drop_duplicates(subset='uri')
    
    #Drop unnecesary columns
    drop_columns = ['external_urls', 'followers', 'href', 'id', 'images', 'name', 'type']
    features_df = features_df.drop(drop_columns, axis = 1)
    
    # Merge audio features DataFrame with the final_df based on 'artist_uri' index
    final_df = df.merge(features_df, left_index = True, right_on= 'uri')
    final_df = final_df.rename(columns = {'uri' : 'artist_uri', 'popularity': 'artist_popularity'})
    
    return final_df

In [33]:
artist_df = extract_artist_features(track_features_df)
artist_df 

Started Gathering Data
Making API Request...
Succesful!
----------------------------------------------------------------------------------
Actual Batch: 0. Number of Tracks retrieved: 0. Tracks Remaining: 34443
Started Gathering Data
Making API Request...
Succesful!
----------------------------------------------------------------------------------
Actual Batch: 1. Number of Tracks retrieved: 50. Tracks Remaining: 34393
Started Gathering Data
Making API Request...
Succesful!
----------------------------------------------------------------------------------
Actual Batch: 2. Number of Tracks retrieved: 100. Tracks Remaining: 34343
Started Gathering Data
Making API Request...
Succesful!
----------------------------------------------------------------------------------
Actual Batch: 3. Number of Tracks retrieved: 150. Tracks Remaining: 34293
Started Gathering Data
Making API Request...
Succesful!
----------------------------------------------------------------------------------
Actual Batch

Unnamed: 0,artist_name,track_name,album_uri,album_name,danceability,energy,key,loudness,mode,speechiness,...,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,track_uri,genres,artist_popularity,artist_uri
0,Missy Elliott,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,The Cookbook,0.904,0.813,4,-7.105,0,0.1210,...,0.006970,0.0471,0.810,125.461,226864,4,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,"[dance pop, hip hop, hip pop, neo soul, pop ra...",72,spotify:artist:2wIVse2owClT7go1WT98tk
0,Missy Elliott,Work It,spotify:album:6DeU398qrJ1bLuryetSmup,Under Construction,0.884,0.677,1,-5.603,1,0.2830,...,0.000000,0.0732,0.584,101.868,263227,4,spotify:track:3jagJCUbdqhDSPuxP8cAqF,"[dance pop, hip hop, hip pop, neo soul, pop ra...",72,spotify:artist:2wIVse2owClT7go1WT98tk
0,Missy Elliott,Get Ur Freak On,spotify:album:6epR3D622KWsnuHye7ApOl,Respect M.E.,0.794,0.805,0,-6.554,1,0.2300,...,0.122000,0.0952,0.658,177.799,236933,4,spotify:track:3XplJgPz8VjbDzbGwGgZdq,"[dance pop, hip hop, hip pop, neo soul, pop ra...",72,spotify:artist:2wIVse2owClT7go1WT98tk
0,Missy Elliott,One Minute Man (feat. Ludacris),spotify:album:20t54K6C80QQH7vbcpfJcP,Miss E...So Addictive,0.622,0.669,9,-8.419,1,0.3290,...,0.000003,0.1520,0.570,93.839,252987,4,spotify:track:0jG92AlXau21qgCQRxGLic,"[dance pop, hip hop, hip pop, neo soul, pop ra...",72,spotify:artist:2wIVse2owClT7go1WT98tk
0,Missy Elliott,Get Ur Freak On,spotify:album:20t54K6C80QQH7vbcpfJcP,Miss E...So Addictive,0.797,0.750,0,-9.369,1,0.2470,...,0.108000,0.0950,0.740,177.870,211120,4,spotify:track:6zsk6uF3MxfIeHPlubKBvR,"[dance pop, hip hop, hip pop, neo soul, pop ra...",72,spotify:artist:2wIVse2owClT7go1WT98tk
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34435,Layla,Oh My Love,spotify:album:4eTl12dc7uQXvgDhtMgW5p,Yellow Circles EP,0.434,0.279,8,-11.947,1,0.0465,...,0.042400,0.1330,0.157,145.264,203676,3,spotify:track:0KMrYUEfexgam36li6d9F0,[indie anthem-folk],23,spotify:artist:04BsVprJtIhl2C4fgPEz4W
34437,Aayushi,Diamond Child,spotify:album:5bWtDTwS9llWcnhmgRkav3,Diamond Child,0.416,0.394,11,-9.269,1,0.0641,...,0.001550,0.0988,0.131,81.988,237008,4,spotify:track:1msfqzqHggvi1mlCT4Z7O5,[],22,spotify:artist:1r2kTJ27zuaEoXasQT5NDd
34438,Jon D,I Don't Know,spotify:album:2KEQtuVl1cYsTYtVRUrNVi,Roots,0.669,0.228,2,-12.119,1,0.0690,...,0.065000,0.0944,0.402,83.024,189184,4,spotify:track:3uCHI1gfOUL5j5swEh0TcH,[channel pop],41,spotify:artist:5HCypjplgh5uQezvBpOfXN
34439,Big Words,The Answer,spotify:album:5jrsRHRAmetu5e7RRBoxj7,"Hollywood, a Beautiful Coincidence",0.493,0.727,1,-5.031,1,0.2170,...,0.000000,0.1290,0.289,73.259,263680,4,spotify:track:0P1oO2gREMYUCoOkzYAyFu,[australian r&b],41,spotify:artist:0sHN89qak07mnug3LVVjzP


### Requesting to Spotify's API the data of every Artist in the dataset

In [34]:
def extract_track_data(df, batch_size = 50, verbose = True):
  # Set 'track_uri' as the index for efficient merging
  df = df.set_index('track_uri')
  
  # Get a list of all the track URIs in the DataFrame
  track_uris = df.index.to_list()
  
  # Calculate the number of batches needed based on batch size
  num_batches = math.ceil(len(track_uris) / batch_size)
  
  # Initialize an empty list to store the results
  audio_features = []
  
  # Make requests to the Spotify API in batches of size 'batch_size'
  for i in range(num_batches):
    # Calculate the start and end indices of the current batch
    start_idx = i * batch_size
    end_idx = min((i + 1) * batch_size, len(track_uris))
    
    # Extract the track URIs for the current batch
    track_batch_uris = track_uris[start_idx:end_idx]
    
    # Request audio features and track's data for the current batch of track URIs from the Spotify API
    track_batch_data = sp.tracks(track_batch_uris)['tracks']
    
    # Extend the list of audio features with the results of the current batch
    audio_features.extend(track_batch_data)
    
    #If Verbose is Enabled, the function will print current API requests status in terminal.
    if verbose:
      print(f'----------------------------------------------------------------------------------')
      print(f'Actual Batch: {i}. Number of Tracks retrieved: {i * batch_size}. Tracks Remaining: {len(df) - i * batch_size}')

  # Convert audio_features list to DataFrame
  features_df = pd.DataFrame([af for af in audio_features if af is not None])
  
  # Remove duplicates based on the 'uri' column
  features_df = features_df.drop_duplicates(subset='uri')
  
  # Extract 'release_date' from the dictionaries within 'album' column
  features_df['release_date'] = features_df['album'].apply(lambda x: x['release_date'])
  
  # Drop unnecesary columns
  drop_columns = ['disc_number',	'explicit',	'external_ids',	'external_urls',	'href',	'is_local',	'name', 'artists', 'available_markets',	'id', 'preview_url',	'track_number',	'type', 'duration_ms', 'album']
  features_df = features_df.drop(drop_columns, axis = 1)
  
  # Merge audio features DataFrame with the final_df based on 'track_uri' index
  final_df = df.merge(features_df, left_index = True, right_on= 'uri')
  final_df = final_df.rename(columns = {'uri' : 'track_uri', 'popularity' : 'track_popularity'})
  
  return final_df

In [35]:
track_data_df = extract_track_data(artist_df)
track_data_df

----------------------------------------------------------------------------------
Actual Batch: 0. Number of Tracks retrieved: 0. Tracks Remaining: 34323
----------------------------------------------------------------------------------
Actual Batch: 1. Number of Tracks retrieved: 50. Tracks Remaining: 34273
----------------------------------------------------------------------------------
Actual Batch: 2. Number of Tracks retrieved: 100. Tracks Remaining: 34223
----------------------------------------------------------------------------------
Actual Batch: 3. Number of Tracks retrieved: 150. Tracks Remaining: 34173
----------------------------------------------------------------------------------
Actual Batch: 4. Number of Tracks retrieved: 200. Tracks Remaining: 34123
----------------------------------------------------------------------------------
Actual Batch: 5. Number of Tracks retrieved: 250. Tracks Remaining: 34073
-------------------------------------------------------------

Unnamed: 0,artist_name,track_name,album_uri,album_name,danceability,energy,key,loudness,mode,speechiness,...,valence,tempo,duration_ms,time_signature,genres,artist_popularity,artist_uri,track_popularity,track_uri,release_date
0,Missy Elliott,Lose Control (feat. Ciara & Fat Man Scoop),spotify:album:6vV5UrXcfyQD1wu4Qo2I9K,The Cookbook,0.904,0.813,4,-7.105,0,0.1210,...,0.810,125.461,226864,4,"[dance pop, hip hop, hip pop, neo soul, pop ra...",72,spotify:artist:2wIVse2owClT7go1WT98tk,69,spotify:track:0UaMYEvWZi0ZqiDOoHU3YI,2005-07-04
1,Missy Elliott,Work It,spotify:album:6DeU398qrJ1bLuryetSmup,Under Construction,0.884,0.677,1,-5.603,1,0.2830,...,0.584,101.868,263227,4,"[dance pop, hip hop, hip pop, neo soul, pop ra...",72,spotify:artist:2wIVse2owClT7go1WT98tk,71,spotify:track:3jagJCUbdqhDSPuxP8cAqF,2002-11-11
2,Missy Elliott,Get Ur Freak On,spotify:album:6epR3D622KWsnuHye7ApOl,Respect M.E.,0.794,0.805,0,-6.554,1,0.2300,...,0.658,177.799,236933,4,"[dance pop, hip hop, hip pop, neo soul, pop ra...",72,spotify:artist:2wIVse2owClT7go1WT98tk,45,spotify:track:3XplJgPz8VjbDzbGwGgZdq,2006-09-04
3,Missy Elliott,One Minute Man (feat. Ludacris),spotify:album:20t54K6C80QQH7vbcpfJcP,Miss E...So Addictive,0.622,0.669,9,-8.419,1,0.3290,...,0.570,93.839,252987,4,"[dance pop, hip hop, hip pop, neo soul, pop ra...",72,spotify:artist:2wIVse2owClT7go1WT98tk,58,spotify:track:0jG92AlXau21qgCQRxGLic,2001-05-14
4,Missy Elliott,Get Ur Freak On,spotify:album:20t54K6C80QQH7vbcpfJcP,Miss E...So Addictive,0.797,0.750,0,-9.369,1,0.2470,...,0.740,177.870,211120,4,"[dance pop, hip hop, hip pop, neo soul, pop ra...",72,spotify:artist:2wIVse2owClT7go1WT98tk,71,spotify:track:6zsk6uF3MxfIeHPlubKBvR,2001-05-14
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34316,Layla,Oh My Love,spotify:album:4eTl12dc7uQXvgDhtMgW5p,Yellow Circles EP,0.434,0.279,8,-11.947,1,0.0465,...,0.157,145.264,203676,3,[indie anthem-folk],23,spotify:artist:04BsVprJtIhl2C4fgPEz4W,30,spotify:track:0KMrYUEfexgam36li6d9F0,2013-12-02
34317,Aayushi,Diamond Child,spotify:album:5bWtDTwS9llWcnhmgRkav3,Diamond Child,0.416,0.394,11,-9.269,1,0.0641,...,0.131,81.988,237008,4,[],22,spotify:artist:1r2kTJ27zuaEoXasQT5NDd,0,spotify:track:1msfqzqHggvi1mlCT4Z7O5,2015-06-16
34318,Jon D,I Don't Know,spotify:album:2KEQtuVl1cYsTYtVRUrNVi,Roots,0.669,0.228,2,-12.119,1,0.0690,...,0.402,83.024,189184,4,[channel pop],41,spotify:artist:5HCypjplgh5uQezvBpOfXN,22,spotify:track:3uCHI1gfOUL5j5swEh0TcH,2015-03-28
34319,Big Words,The Answer,spotify:album:5jrsRHRAmetu5e7RRBoxj7,"Hollywood, a Beautiful Coincidence",0.493,0.727,1,-5.031,1,0.2170,...,0.289,73.259,263680,4,[australian r&b],41,spotify:artist:0sHN89qak07mnug3LVVjzP,37,spotify:track:0P1oO2gREMYUCoOkzYAyFu,2017-09-22


### Saving resulting data in a CSV file

In [36]:
# Save final_df to a CSV file
track_data_df.to_csv(r'Data/tracks_features.csv', index = False)  