# Extracting Audio Features from PLaylist IDs

# Test

In [8]:
import json
import pandas as pd
import os

# List of JSON file paths --> local file path
file_paths = [
    '/Users/khieuvon/Documents/10_Personal Stuff/01_Masterarbeit/Data for ML Model/Collected Spotify Data from Friends/All Extracted Library/00_YourLibrary_wadthy.json',
    '/Users/khieuvon/Documents/10_Personal Stuff/01_Masterarbeit/Data for ML Model/Collected Spotify Data from Friends/All Extracted Library/01_YourLibrary_withy.json',
    '/Users/khieuvon/Documents/10_Personal Stuff/01_Masterarbeit/Data for ML Model/Collected Spotify Data from Friends/All Extracted Library/02_YourLibrary_yoojin.json',
    '/Users/khieuvon/Documents/10_Personal Stuff/01_Masterarbeit/Data for ML Model/Collected Spotify Data from Friends/All Extracted Library/03_YourLibrary_moni.json',
    '/Users/khieuvon/Documents/10_Personal Stuff/01_Masterarbeit/Data for ML Model/Collected Spotify Data from Friends/All Extracted Library/04_YourLibrary_nga.json',
    '/Users/khieuvon/Documents/10_Personal Stuff/01_Masterarbeit/Data for ML Model/Collected Spotify Data from Friends/All Extracted Library/05_YourLibrary_makra.json',
    '/Users/khieuvon/Documents/10_Personal Stuff/01_Masterarbeit/Data for ML Model/Collected Spotify Data from Friends/All Extracted Library/06_YourLibrary_soeren.json',
    '/Users/khieuvon/Documents/10_Personal Stuff/01_Masterarbeit/Data for ML Model/Collected Spotify Data from Friends/All Extracted Library/07_YourLibrary_simon.json',
    '/Users/khieuvon/Documents/10_Personal Stuff/01_Masterarbeit/Data for ML Model/Collected Spotify Data from Friends/All Extracted Library/09_YourLibrary_yeonju.json',
    '/Users/khieuvon/Documents/10_Personal Stuff/01_Masterarbeit/Data for ML Model/Collected Spotify Data from Friends/All Extracted Library/10_YourLibrary_han.json'
]

# Dictionary to store DataFrames
dataframes = {}

# Loop through each file, load the data, and create a DataFrame
for file_path in file_paths:
    with open(file_path, 'r') as file:
        data = json.load(file)

    # Extract song information
    songs = data['tracks']

    # Create a list to hold song data
    song_list = []

    # Iterate over each song and extract relevant details
    for song in songs:
        song_info = {
            'Title': song.get('track', 'N/A'),
            'Artist': song.get('artist', 'N/A'),
            'Album': song.get('album', 'N/A'),
            'URI': song.get('uri', 'N/A')
        }
        song_list.append(song_info)

    # Create a DataFrame from the song list
    df = pd.DataFrame(song_list)

    # Use the file name (without extension) as the key for the DataFrame in the dictionary
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    dataframes[file_name] = df

# Accessing and analyzing individual DataFrames
for name, df in dataframes.items():
    print(f"DataFrame for {name}:")
    print(df.head())  # Display the first few rows of the DataFrame

    # Example analysis: Print the number of songs in each DataFrame
    print(f"Number of songs in {name}: {len(df)}")

    # Optionally, save each DataFrame to a separate CSV file
    df.to_csv(f'{name}_songs.csv', index=False)

# Example of specific DataFrame access for further analysis
# Access a specific DataFrame by its key, e.g., "05_YourLibrary_makra" --> getting DFs of all collected Users
df_wadthy = dataframes.get("00_YourLibrary_wadthy") # 3157 entries
df_withy = dataframes.get("01_YourLibrary_withy") # 601
df_yoojin = dataframes.get("02_YourLibrary_yoojin") # 46
df_moni = dataframes.get("03_YourLibrary_moni") # 13
df_nga = dataframes.get("04_YourLibrary_nga") # 1152
df_makra = dataframes.get("05_YourLibrary_makra") # 12
df_soeren = dataframes.get("06_YourLibrary_soeren") # 1086
df_simon = dataframes.get("07_YourLibrary_simon") # 681
# van's daten
df_yeonju = dataframes.get("09_YourLibrary_yeonju") # 260
df_han = dataframes.get("10_YourLibrary_han") # 106
# trang's daten


DataFrame for 00_YourLibrary_wadthy:
                     Title       Artist              Album  \
0  Smells Like Teen Spirit      Nirvana            Nirvana   
1               Sure Thing       Miguel  All I Want Is You   
2              Fancy Shoes  The Walters     Songs for Dads   
3                Tokyo Inn       HYUKOH                 23   
4             Konoha Peace         Kato       Naruto Vibes   

                                    URI  
0  spotify:track:4hy4fb5D1KL50b3sng9cjw  
1  spotify:track:0JXXNGljqupsJaZsgSbMZV  
2  spotify:track:1YVVAiBD5WhX2ZdHtlSOhz  
3  spotify:track:4myeBw35GUMw5FyDGZcOON  
4  spotify:track:0wIfYaveiZku0eL44UXtHk  
Number of songs in 00_YourLibrary_wadthy: 3157
DataFrame for 01_YourLibrary_withy:
                                       Title           Artist          Album  \
0  Ordinaryish People (feat. Blue Man Group)              AJR   OK ORCHESTRA   
1                                   Good Day       Jake Scott       Lavender   
2              

In [113]:
# getting random sample of 300 entries for each of the MBTI DFs to reduce the chance of hitting the Spotify API rate limit

df_wadthy_sampled = df_wadthy.sample(n=200, random_state=42) # random_state=42 to make the DF reproducible
df_withy_sampled = df_withy.sample(n=200, random_state=42)
# df_yoojin_sampled = df_yoojin.sample(n=min(300, len(df)), random_state=42) -- has already less than 300 entries
# df_moni_sampled = df_moni.sample(n=min(300, len(df)), random_state=42)
df_nga_sampled = df_nga.sample(n=200, random_state=42)
#df_makra_sampled = df_makra.sample(n=300, random_state=42)
df_soeren_sampled = df_soeren.sample(n=200, random_state=42)
df_simon_sampled = df_simon.sample(n=200, random_state=42)
df_yeonju_sampled = df_yeonju.sample(n=200, random_state=42)
df_han_sampled = df_han.sample(n=100, random_state=42)

In [104]:
df_yeonju_sampled.head(20)

Unnamed: 0,Title,Artist,Album,URI
30,superstars,Christian French,superstars,spotify:track:0TqaX16qLpX7YqFcAl9lDy
181,Dangerously,Charlie Puth,Nine Track Mind,spotify:track:3qonjOrhFCfTnaaMruHzxW
223,Love Is Gone,SLANDER,Love Is Gone,spotify:track:39glqzRVRAy4vq3PqeTGb8
185,Running Up That Hill (A Deal With God),Kate Bush,Hounds Of Love,spotify:track:1PtQJZVZIdWIYdARpZRDFO
211,I'm The Greatest,TAEYEON,Stay,spotify:track:4vAQgQxs2gY3TlXBALUsKM
258,Strawberries & Cigarettes,Troye Sivan,Strawberries & Cigarettes,spotify:track:3afkJSKX0EAMsJXTZnDXXJ
173,To the Dawn,MRCH,To the Dawn,spotify:track:1fDQ8cZ2CWJY06zfdgv3m3
142,learned from you,Munn,only human.,spotify:track:1LssaQkPdWYdTfna8B0198
9,Ross And Rachel,Jake Miller,Silver Lining II,spotify:track:4vCiNW18AyeaUAZosVnX61
177,Just Friends,Why Don't We,Just Friends,spotify:track:0tjYK2gtxo8fT2kdD4XGeF


In [122]:
df_yoojin['track_id'] = df_yoojin.iloc[:, 3].str.split(':').str.get(-1)

In [132]:
df_yoojin.head(20)

Unnamed: 0,Title,Artist,Album,URI,track_id
0,TOO BAD (prod. CarTa),Anandelight,TOO BAD,spotify:track:06vHdq1RN59tke4Ixq6usN,06vHdq1RN59tke4Ixq6usN
1,"Young, Wild & Free (feat. Bruno Mars)",Snoop Dogg,"Young, Wild & Free (feat. Bruno Mars)",spotify:track:5HQVUIKwCEXpe7JIHyY734,5HQVUIKwCEXpe7JIHyY734
2,Dancing in the Moonlight,Toploader,Onka's Big Moka,spotify:track:3Fzlg5r1IjhLk2qRw667od,3Fzlg5r1IjhLk2qRw667od
3,NDGGA,meenoi,NDGGA,spotify:track:4k4PQkx007qoo6XPbijp9z,4k4PQkx007qoo6XPbijp9z
4,Etude of Memory,Exhibition,Exhibition,spotify:track:3YpiZu52TdVbNKExZpZAWW,3YpiZu52TdVbNKExZpZAWW
5,Weekend,TAEYEON,INVU - The 3rd Album,spotify:track:0n1jo6k1GUzyrcXW2aS0iL,0n1jo6k1GUzyrcXW2aS0iL
6,Ah Koritsi Mou,Giannis Ploutarhos,Mikres Fotografies,spotify:track:6RM4uRehk0XvKUzYO3Y93L,6RM4uRehk0XvKUzYO3Y93L
7,Bumblebee,ABBA,Voyage,spotify:track:4dmSIn2DxtfX1rySnUDamj,4dmSIn2DxtfX1rySnUDamj
8,Den Thelo Allo,Giannis Ploutarhos,I Dinami Tou Erota,spotify:track:3Su4Xx8b8ko1kGrjYs2CIh,3Su4Xx8b8ko1kGrjYs2CIh
9,The Rubberband Man,The Spinners,The Rubberband Man / Now That We're Together,spotify:track:13Mzsb8VzRSZ5w3pM48cn6,13Mzsb8VzRSZ5w3pM48cn6


In [130]:
df_yoojin.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 46 entries, 0 to 45
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Title     46 non-null     object
 1   Artist    46 non-null     object
 2   Album     46 non-null     object
 3   URI       46 non-null     object
 4   track_id  46 non-null     object
dtypes: object(5)
memory usage: 1.9+ KB


df_wadthy_sampled.head()

In [133]:
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials

client_credentials_manager = SpotifyClientCredentials(client_id='582341de1c87493291783ae774754039', client_secret='a21c97ce2062459a8f257d476ed5fe97')
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

In [134]:
import time

def get_audio_features_batch(track_ids, batch_size=100):
    audio_features = []
    for i in range(0, len(track_ids), batch_size):
        batch = track_ids[i:i+batch_size]
        features = sp.audio_features(batch)
        audio_features.extend(features)
        time.sleep(1)  # Add a 1-second delay between batches to respect rate limits
    return audio_features

In [135]:
all_track_ids = df_yoojin['track_id'].tolist()
all_audio_features = get_audio_features_batch(all_track_ids)

In [125]:
audio_features_df = pd.DataFrame(all_audio_features)
result_df = pd.merge(df_yoojin, audio_features_df, left_on='track_id', right_on='id', how='left')

In [126]:
result_df.head(20)



Unnamed: 0,Title,Artist,Album,URI,track_id,danceability,energy,key,loudness,mode,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,TOO BAD (prod. CarTa),Anandelight,TOO BAD,spotify:track:06vHdq1RN59tke4Ixq6usN,06vHdq1RN59tke4Ixq6usN,0.579,0.791,0,-5.49,0,...,0.207,0.619,165.988,audio_features,06vHdq1RN59tke4Ixq6usN,spotify:track:06vHdq1RN59tke4Ixq6usN,https://api.spotify.com/v1/tracks/06vHdq1RN59t...,https://api.spotify.com/v1/audio-analysis/06vH...,213976,4
1,"Young, Wild & Free (feat. Bruno Mars)",Snoop Dogg,"Young, Wild & Free (feat. Bruno Mars)",spotify:track:5HQVUIKwCEXpe7JIHyY734,5HQVUIKwCEXpe7JIHyY734,0.715,0.655,0,-6.425,1,...,0.115,0.531,95.078,audio_features,5HQVUIKwCEXpe7JIHyY734,spotify:track:5HQVUIKwCEXpe7JIHyY734,https://api.spotify.com/v1/tracks/5HQVUIKwCEXp...,https://api.spotify.com/v1/audio-analysis/5HQV...,207333,4
2,Dancing in the Moonlight,Toploader,Onka's Big Moka,spotify:track:3Fzlg5r1IjhLk2qRw667od,3Fzlg5r1IjhLk2qRw667od,0.632,0.856,10,-3.463,1,...,0.347,0.867,119.397,audio_features,3Fzlg5r1IjhLk2qRw667od,spotify:track:3Fzlg5r1IjhLk2qRw667od,https://api.spotify.com/v1/tracks/3Fzlg5r1IjhL...,https://api.spotify.com/v1/audio-analysis/3Fzl...,232693,4
3,NDGGA,meenoi,NDGGA,spotify:track:4k4PQkx007qoo6XPbijp9z,4k4PQkx007qoo6XPbijp9z,0.749,0.381,8,-7.529,1,...,0.377,0.517,109.434,audio_features,4k4PQkx007qoo6XPbijp9z,spotify:track:4k4PQkx007qoo6XPbijp9z,https://api.spotify.com/v1/tracks/4k4PQkx007qo...,https://api.spotify.com/v1/audio-analysis/4k4P...,175412,4
4,Etude of Memory,Exhibition,Exhibition,spotify:track:3YpiZu52TdVbNKExZpZAWW,3YpiZu52TdVbNKExZpZAWW,0.444,0.37,3,-13.858,1,...,0.294,0.121,135.862,audio_features,3YpiZu52TdVbNKExZpZAWW,spotify:track:3YpiZu52TdVbNKExZpZAWW,https://api.spotify.com/v1/tracks/3YpiZu52TdVb...,https://api.spotify.com/v1/audio-analysis/3Ypi...,313107,4
5,Weekend,TAEYEON,INVU - The 3rd Album,spotify:track:0n1jo6k1GUzyrcXW2aS0iL,0n1jo6k1GUzyrcXW2aS0iL,0.74,0.854,8,-2.209,1,...,0.253,0.856,114.029,audio_features,0n1jo6k1GUzyrcXW2aS0iL,spotify:track:0n1jo6k1GUzyrcXW2aS0iL,https://api.spotify.com/v1/tracks/0n1jo6k1GUzy...,https://api.spotify.com/v1/audio-analysis/0n1j...,233413,4
6,Ah Koritsi Mou,Giannis Ploutarhos,Mikres Fotografies,spotify:track:6RM4uRehk0XvKUzYO3Y93L,6RM4uRehk0XvKUzYO3Y93L,0.507,0.549,10,-7.027,0,...,0.0953,0.469,104.168,audio_features,6RM4uRehk0XvKUzYO3Y93L,spotify:track:6RM4uRehk0XvKUzYO3Y93L,https://api.spotify.com/v1/tracks/6RM4uRehk0Xv...,https://api.spotify.com/v1/audio-analysis/6RM4...,233400,4
7,Bumblebee,ABBA,Voyage,spotify:track:4dmSIn2DxtfX1rySnUDamj,4dmSIn2DxtfX1rySnUDamj,0.455,0.271,10,-11.129,1,...,0.31,0.227,135.016,audio_features,4dmSIn2DxtfX1rySnUDamj,spotify:track:4dmSIn2DxtfX1rySnUDamj,https://api.spotify.com/v1/tracks/4dmSIn2DxtfX...,https://api.spotify.com/v1/audio-analysis/4dmS...,237467,3
8,Den Thelo Allo,Giannis Ploutarhos,I Dinami Tou Erota,spotify:track:3Su4Xx8b8ko1kGrjYs2CIh,3Su4Xx8b8ko1kGrjYs2CIh,0.646,0.826,10,-3.332,0,...,0.294,0.588,119.993,audio_features,3Su4Xx8b8ko1kGrjYs2CIh,spotify:track:3Su4Xx8b8ko1kGrjYs2CIh,https://api.spotify.com/v1/tracks/3Su4Xx8b8ko1...,https://api.spotify.com/v1/audio-analysis/3Su4...,215293,4
9,The Rubberband Man,The Spinners,The Rubberband Man / Now That We're Together,spotify:track:13Mzsb8VzRSZ5w3pM48cn6,13Mzsb8VzRSZ5w3pM48cn6,0.801,0.446,10,-14.604,1,...,0.0832,0.924,97.894,audio_features,13Mzsb8VzRSZ5w3pM48cn6,spotify:track:13Mzsb8VzRSZ5w3pM48cn6,https://api.spotify.com/v1/tracks/13Mzsb8VzRSZ...,https://api.spotify.com/v1/audio-analysis/13Mz...,213000,4


In [127]:
result_df['MBTI'] = 'ENFP'

In [128]:
result_df.head(20)

Unnamed: 0,Title,Artist,Album,URI,track_id,danceability,energy,key,loudness,mode,...,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature,MBTI
0,TOO BAD (prod. CarTa),Anandelight,TOO BAD,spotify:track:06vHdq1RN59tke4Ixq6usN,06vHdq1RN59tke4Ixq6usN,0.579,0.791,0,-5.49,0,...,0.619,165.988,audio_features,06vHdq1RN59tke4Ixq6usN,spotify:track:06vHdq1RN59tke4Ixq6usN,https://api.spotify.com/v1/tracks/06vHdq1RN59t...,https://api.spotify.com/v1/audio-analysis/06vH...,213976,4,ENFP
1,"Young, Wild & Free (feat. Bruno Mars)",Snoop Dogg,"Young, Wild & Free (feat. Bruno Mars)",spotify:track:5HQVUIKwCEXpe7JIHyY734,5HQVUIKwCEXpe7JIHyY734,0.715,0.655,0,-6.425,1,...,0.531,95.078,audio_features,5HQVUIKwCEXpe7JIHyY734,spotify:track:5HQVUIKwCEXpe7JIHyY734,https://api.spotify.com/v1/tracks/5HQVUIKwCEXp...,https://api.spotify.com/v1/audio-analysis/5HQV...,207333,4,ENFP
2,Dancing in the Moonlight,Toploader,Onka's Big Moka,spotify:track:3Fzlg5r1IjhLk2qRw667od,3Fzlg5r1IjhLk2qRw667od,0.632,0.856,10,-3.463,1,...,0.867,119.397,audio_features,3Fzlg5r1IjhLk2qRw667od,spotify:track:3Fzlg5r1IjhLk2qRw667od,https://api.spotify.com/v1/tracks/3Fzlg5r1IjhL...,https://api.spotify.com/v1/audio-analysis/3Fzl...,232693,4,ENFP
3,NDGGA,meenoi,NDGGA,spotify:track:4k4PQkx007qoo6XPbijp9z,4k4PQkx007qoo6XPbijp9z,0.749,0.381,8,-7.529,1,...,0.517,109.434,audio_features,4k4PQkx007qoo6XPbijp9z,spotify:track:4k4PQkx007qoo6XPbijp9z,https://api.spotify.com/v1/tracks/4k4PQkx007qo...,https://api.spotify.com/v1/audio-analysis/4k4P...,175412,4,ENFP
4,Etude of Memory,Exhibition,Exhibition,spotify:track:3YpiZu52TdVbNKExZpZAWW,3YpiZu52TdVbNKExZpZAWW,0.444,0.37,3,-13.858,1,...,0.121,135.862,audio_features,3YpiZu52TdVbNKExZpZAWW,spotify:track:3YpiZu52TdVbNKExZpZAWW,https://api.spotify.com/v1/tracks/3YpiZu52TdVb...,https://api.spotify.com/v1/audio-analysis/3Ypi...,313107,4,ENFP
5,Weekend,TAEYEON,INVU - The 3rd Album,spotify:track:0n1jo6k1GUzyrcXW2aS0iL,0n1jo6k1GUzyrcXW2aS0iL,0.74,0.854,8,-2.209,1,...,0.856,114.029,audio_features,0n1jo6k1GUzyrcXW2aS0iL,spotify:track:0n1jo6k1GUzyrcXW2aS0iL,https://api.spotify.com/v1/tracks/0n1jo6k1GUzy...,https://api.spotify.com/v1/audio-analysis/0n1j...,233413,4,ENFP
6,Ah Koritsi Mou,Giannis Ploutarhos,Mikres Fotografies,spotify:track:6RM4uRehk0XvKUzYO3Y93L,6RM4uRehk0XvKUzYO3Y93L,0.507,0.549,10,-7.027,0,...,0.469,104.168,audio_features,6RM4uRehk0XvKUzYO3Y93L,spotify:track:6RM4uRehk0XvKUzYO3Y93L,https://api.spotify.com/v1/tracks/6RM4uRehk0Xv...,https://api.spotify.com/v1/audio-analysis/6RM4...,233400,4,ENFP
7,Bumblebee,ABBA,Voyage,spotify:track:4dmSIn2DxtfX1rySnUDamj,4dmSIn2DxtfX1rySnUDamj,0.455,0.271,10,-11.129,1,...,0.227,135.016,audio_features,4dmSIn2DxtfX1rySnUDamj,spotify:track:4dmSIn2DxtfX1rySnUDamj,https://api.spotify.com/v1/tracks/4dmSIn2DxtfX...,https://api.spotify.com/v1/audio-analysis/4dmS...,237467,3,ENFP
8,Den Thelo Allo,Giannis Ploutarhos,I Dinami Tou Erota,spotify:track:3Su4Xx8b8ko1kGrjYs2CIh,3Su4Xx8b8ko1kGrjYs2CIh,0.646,0.826,10,-3.332,0,...,0.588,119.993,audio_features,3Su4Xx8b8ko1kGrjYs2CIh,spotify:track:3Su4Xx8b8ko1kGrjYs2CIh,https://api.spotify.com/v1/tracks/3Su4Xx8b8ko1...,https://api.spotify.com/v1/audio-analysis/3Su4...,215293,4,ENFP
9,The Rubberband Man,The Spinners,The Rubberband Man / Now That We're Together,spotify:track:13Mzsb8VzRSZ5w3pM48cn6,13Mzsb8VzRSZ5w3pM48cn6,0.801,0.446,10,-14.604,1,...,0.924,97.894,audio_features,13Mzsb8VzRSZ5w3pM48cn6,spotify:track:13Mzsb8VzRSZ5w3pM48cn6,https://api.spotify.com/v1/tracks/13Mzsb8VzRSZ...,https://api.spotify.com/v1/audio-analysis/13Mz...,213000,4,ENFP


In [129]:
# Save the extended DataFrame
result_df.to_csv('extended_songs_yoojin.csv', index=False)