In [2]:
import pandas as pd
from tqdm.notebook import tqdm
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import getpass

# Spotify


In [46]:
client_id = str(getpass.getpass('client_id?'))
client_sectret = str(getpass.getpass('client_secret?'))

In [47]:
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(
    client_id=client_id, client_secret=client_sectret),requests_timeout=10)

# Functions

In [18]:
def artists_from_playlist(playlist):
    songs = playlist['items']
    while playlist['next']:
        playlist = sp.next(playlist)
        songs.extend(playlist['items'])
    artists = []
    for item in songs:
        artists.append(item['track']['artists'][0]['name'])
    artist_id_list = []
    for artist in artists:
        if len(sp.search(artist)['tracks']['items']) > 1:
            artist_id = sp.search(artist)['tracks']['items'][0]['artists'][0]['id']
            artist_id_list.append(artist_id)
    return list(set(artist_id_list))

In [6]:
def all_albums_from_artists(artists_id_list):
    albums_ids = []
    for artist_id in artists_id_list:
        albums = sp.artist_albums(artist_id)
        for album in range(len(albums['items'])):
            albums_ids.append(albums['items'][album]['id'])
    return list(set(albums_ids))

In [7]:
def all_songs_from_albums(albums_id_list):
    ids = []
    for album_id in tqdm(albums_id_list):
        songs_ids = sp.album_tracks(album_id)
        for id in range(len(songs_ids['items'])):
            ids.append(songs_ids['items'][id]['id'])         
    ids_chunks = []
    for i in tqdm(range(0, len(ids), 100)):
        ids_chunks.append(ids[i:i + 100])
    audio_feat = []
    for i in tqdm(ids_chunks):
        audio_feat.append(sp.audio_features(tracks=i))
    audio_feat_total = []
    for i in tqdm(range(len(audio_feat))):
        audio_feat_total += audio_feat[i]
    audio_feats_clean = [dct for dct in audio_feat_total if dct]
    df = pd.DataFrame(audio_feats_clean)
    return df.drop(['type', 'track_href', 'uri', 'analysis_url'], axis=1)
    

In [8]:
def get_track_id(song):
    id_search = sp.search(song)
    if len(id_search['tracks']['items']) > 0:
        print()
        return id_search['tracks']['items'][0]['id']
    else:
        print('Typo? Try again')

In [9]:
def get_audio_features(song_id):
    a_f = sp.audio_features(tracks=song_id)
    if a_f:
        df = pd.DataFrame(a_f)
        return df.drop(['type', 'track_href', 'uri', 'analysis_url', 'id'], axis=1)
    else:
        return print('no features :(..')

In [10]:
def release_year(df):
    years = []
    for id in tqdm(df['id']):
        track = sp.track(id)
        years.append(track['album']['release_date'][0:4])
    return years

In [11]:
def scrape(playlist, name):
    artists = artists_from_playlist(playlist)
    albums = all_albums_from_artists(artists)
    songs = all_songs_from_albums(albums)
    songs.to_csv(name + '.csv', index=False)

In [95]:
def fix_tempo(df):
    df['tempo'] = df['tempo'].apply(lambda x: x*2 if x < 60 else x)
    df['tempo'] = df['tempo'].apply(lambda x: x/2 if x > 200 else x)
    df['tempo'] = df['tempo'].apply(lambda x: 120 if x == 0 else x)
    return df

# Playlists

In [57]:
rock_playlist = sp.user_playlist_tracks('spotify', '37i9dQZF1DWXRqgorJj26U')

In [58]:
hard_rock_playlist = sp.user_playlist_tracks('spotify', '37i9dQZF1DX68H8ZujdnN7')

In [59]:
jazz_playlist = sp.user_playlist_tracks('spotify', '37i9dQZF1DXbITWG1ZJKYt')

In [60]:
jazz_cool_playlist = sp.user_playlist_tracks('spotify', '37i9dQZF1DXbOVU4mpMJjh')

In [15]:
jazz_modern_playlist = sp.user_playlist_tracks('spotify', '37i9dQZF1DX377iwEnODkR')

In [64]:
pop_playlist = sp.user_playlist_tracks('spotify', '3ZgmfR6lsnCwdffZUan8EA')

In [65]:
hiphop_playlist = sp.user_playlist_tracks('spotify', '37i9dQZF1DX30w0JtSIv4j')

In [66]:
rap_playlist = sp.user_playlist_tracks('spotify', '37i9dQZF1DX48TTZL62Yht')

In [67]:
trap_playlist = sp.user_playlist_tracks('spotify', '37i9dQZF1DXde9tuMHuIsj')

In [68]:
deephouse_playlist = sp.user_playlist_tracks('spotify', '37i9dQZF1DWWEJlAGA9gs0')

In [69]:
techno_playlist = sp.user_playlist_tracks('spotify', '37i9dQZF1DX6J5NfMJS675')

# Scraping

In [70]:
scrape(rock_playlist, 'rock_songs')

In [None]:
scrape(hard_rock_playlist, 'hard_rock_songs')

In [None]:
scrape(jazz_playlist, 'jazz_songs')

  0%|          | 0/1500 [00:00<?, ?it/s]

  0%|          | 0/237 [00:00<?, ?it/s]

  0%|          | 0/237 [00:00<?, ?it/s]

  0%|          | 0/237 [00:00<?, ?it/s]

In [None]:
scrape(jazz_cool_playlist, 'jazz_cool_songs')

  0%|          | 0/336 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

In [20]:
scrape(jazz_modern_playlist, 'jazz_modern_songs')

  0%|          | 0/1240 [00:00<?, ?it/s]

  0%|          | 0/130 [00:00<?, ?it/s]

  0%|          | 0/130 [00:00<?, ?it/s]

  0%|          | 0/130 [00:00<?, ?it/s]

In [None]:
scrape(pop_playlist, 'pop_songs')

  0%|          | 0/2707 [00:00<?, ?it/s]

  0%|          | 0/283 [00:00<?, ?it/s]

  0%|          | 0/283 [00:00<?, ?it/s]

  0%|          | 0/283 [00:00<?, ?it/s]

In [None]:
scrape(hiphop_playlist, 'hiphop_songs')

  0%|          | 0/696 [00:00<?, ?it/s]

  0%|          | 0/88 [00:00<?, ?it/s]

  0%|          | 0/88 [00:00<?, ?it/s]

  0%|          | 0/88 [00:00<?, ?it/s]

In [None]:
scrape(rap_playlist, 'rap_songs')

  0%|          | 0/1260 [00:00<?, ?it/s]

  0%|          | 0/103 [00:00<?, ?it/s]

  0%|          | 0/103 [00:00<?, ?it/s]

  0%|          | 0/103 [00:00<?, ?it/s]

In [None]:
scrape(deephouse_playlist, 'deephouse_songs')

  0%|          | 0/1727 [00:00<?, ?it/s]

  0%|          | 0/339 [00:00<?, ?it/s]

  0%|          | 0/339 [00:00<?, ?it/s]

  0%|          | 0/339 [00:00<?, ?it/s]

In [None]:
scrape(techno_playlist, 'techno_songs')

  0%|          | 0/942 [00:00<?, ?it/s]

  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/43 [00:00<?, ?it/s]

  0%|          | 0/43 [00:00<?, ?it/s]

In [None]:
scrape(trap_playlist, 'trap_songs')

  0%|          | 0/498 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

  0%|          | 0/22 [00:00<?, ?it/s]

In [None]:
genres_dict = {'jazz': 1 , 'rock': 2, 'hiphop':3, 'pop': 4, 'electronic': 5} 

# Wrangling & Cleaning

In [6]:
jazz = pd.read_csv('data/jazz_songs.csv')

In [7]:
jazz_cool = pd.read_csv('data/jazz_cool_songs.csv')

In [23]:
jazz_modern = pd.read_csv('jazz_modern_songs.csv')

In [8]:
rock = pd.read_csv('data/rock_songs.csv')

In [9]:
hard_rock = pd.read_csv('data/hard_rock_songs.csv')

In [10]:
hiphop = pd.read_csv('data/hiphop_songs.csv')

In [11]:
rap = pd.read_csv('data/rap_songs.csv')

In [12]:
trap = pd.read_csv('data/trap_songs.csv')

In [13]:
pop = pd.read_csv('data/pop_songs.csv')

In [14]:
techno = pd.read_csv('data/techno_songs.csv')

In [15]:
deep = pd.read_csv('data/deephouse_songs.csv')

## Genres and release years

In [16]:
jazz['genre'] = 1
jazz['year'] = release_year(jazz)

  0%|          | 0/23686 [00:00<?, ?it/s]

In [17]:
jazz_cool['genre'] = 1
jazz_cool['year'] = release_year(jazz_cool)

  0%|          | 0/3198 [00:00<?, ?it/s]

In [31]:
jazz_modern['genre'] = 1
jazz_modern['year'] = release_year(jazz_modern)

  0%|          | 0/12914 [00:00<?, ?it/s]

In [23]:
rock['genre'] = 2
rock['year'] = release_year(rock)

  0%|          | 0/23458 [00:00<?, ?it/s]

In [27]:
hard_rock['genre'] = 2
hard_rock['year'] = release_year(hard_rock)

  0%|          | 0/14533 [00:00<?, ?it/s]

In [29]:
hiphop['genre'] = 3
hiphop['year'] = release_year(hiphop)

  0%|          | 0/8797 [00:00<?, ?it/s]

In [30]:
rap['genre'] = 3
rap['year'] = release_year(rap)

  0%|          | 0/10241 [00:00<?, ?it/s]

In [48]:
rap_2021 = pd.read_csv('data/rap_songs_2021.csv')
rap_2021['genre'] = 3
rap_2021['year'] = release_year(rap_2021)

  0%|          | 0/12041 [00:00<?, ?it/s]

In [49]:
rap_2021.to_csv('data/rap_2021.csv')

In [31]:
trap['genre'] = 3
trap['year'] = release_year(trap)

  0%|          | 0/2117 [00:00<?, ?it/s]

In [None]:
rap_2021['genre'] = 3
rap_2021['year'] = release_year(trap)

In [32]:
pop['genre'] = 4
pop['year'] = release_year(pop)

  0%|          | 0/28274 [00:00<?, ?it/s]

In [33]:
techno['genre'] = 5
techno['year'] = release_year(techno)

  0%|          | 0/4202 [00:00<?, ?it/s]

In [34]:
deep['genre'] = 5
deep['year'] = release_year(deep)

  0%|          | 0/33792 [00:00<?, ?it/s]

## Making the main DataFrame

In [35]:
music = pd.concat([jazz, jazz_cool, rock, hard_rock, hiphop, rap, trap, pop, techno, deep], axis=0, ignore_index=True)

In [36]:
music.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152298 entries, 0 to 152297
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   danceability      152298 non-null  float64
 1   energy            152298 non-null  float64
 2   key               152298 non-null  int64  
 3   loudness          152298 non-null  float64
 4   mode              152298 non-null  int64  
 5   speechiness       152298 non-null  float64
 6   acousticness      152298 non-null  float64
 7   instrumentalness  152298 non-null  float64
 8   liveness          152298 non-null  float64
 9   valence           152298 non-null  float64
 10  tempo             152298 non-null  float64
 11  id                152298 non-null  object 
 12  duration_ms       152298 non-null  int64  
 13  time_signature    152298 non-null  int64  
 14  genre             152298 non-null  int64  
 15  year              152298 non-null  object 
dtypes: float64(9), int64

In [37]:
music.tail()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,duration_ms,time_signature,genre,year
152293,0.373,0.074,1,-24.595,1,0.0509,0.745,0.819,0.102,0.135,122.238,41PhuLu40MhT6vg3vr4778,375773,4,5,2021
152294,0.123,0.0128,2,-27.916,0,0.0401,0.692,0.291,0.109,0.0383,71.621,7Fa0w7GNcS57sMfTofiG4R,824013,4,5,2021
152295,0.342,0.0257,0,-31.669,1,0.0447,0.776,0.0956,0.112,0.362,158.439,4SniQ36o6fHpIKwHVNnJoN,305600,3,5,2021
152296,0.142,0.00248,7,-34.535,1,0.0476,0.27,0.0232,0.0596,0.0366,74.57,0lotD0m9lAMrmZZ85N9QDC,750187,4,5,2021
152297,0.165,0.0983,10,-24.322,1,0.0432,0.721,0.643,0.305,0.0461,72.211,1rlxhEmCV8D2eyL6rJwXLD,600280,4,5,2021


## Check and drop duplicates

In [38]:
music.duplicated(subset=['id']).value_counts()

False    142783
True       9515
dtype: int64

In [39]:
music.drop_duplicates(subset=['id'], inplace=True, ignore_index=True)

In [40]:
music.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 142783 entries, 0 to 142782
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   danceability      142783 non-null  float64
 1   energy            142783 non-null  float64
 2   key               142783 non-null  int64  
 3   loudness          142783 non-null  float64
 4   mode              142783 non-null  int64  
 5   speechiness       142783 non-null  float64
 6   acousticness      142783 non-null  float64
 7   instrumentalness  142783 non-null  float64
 8   liveness          142783 non-null  float64
 9   valence           142783 non-null  float64
 10  tempo             142783 non-null  float64
 11  id                142783 non-null  object 
 12  duration_ms       142783 non-null  int64  
 13  time_signature    142783 non-null  int64  
 14  genre             142783 non-null  int64  
 15  year              142783 non-null  object 
dtypes: float64(9), int64

In [41]:
music.tail()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,duration_ms,time_signature,genre,year
142778,0.373,0.074,1,-24.595,1,0.0509,0.745,0.819,0.102,0.135,122.238,41PhuLu40MhT6vg3vr4778,375773,4,5,2021
142779,0.123,0.0128,2,-27.916,0,0.0401,0.692,0.291,0.109,0.0383,71.621,7Fa0w7GNcS57sMfTofiG4R,824013,4,5,2021
142780,0.342,0.0257,0,-31.669,1,0.0447,0.776,0.0956,0.112,0.362,158.439,4SniQ36o6fHpIKwHVNnJoN,305600,3,5,2021
142781,0.142,0.00248,7,-34.535,1,0.0476,0.27,0.0232,0.0596,0.0366,74.57,0lotD0m9lAMrmZZ85N9QDC,750187,4,5,2021
142782,0.165,0.0983,10,-24.322,1,0.0432,0.721,0.643,0.305,0.0461,72.211,1rlxhEmCV8D2eyL6rJwXLD,600280,4,5,2021


In [42]:
#music.to_csv('music.csv', index=False)

# IT 2

In [74]:
music = pd.read_csv('data/music.csv')

In [75]:
music.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 142783 entries, 0 to 142782
Data columns (total 17 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   Unnamed: 0        142783 non-null  int64  
 1   danceability      142783 non-null  float64
 2   energy            142783 non-null  float64
 3   key               142783 non-null  int64  
 4   loudness          142783 non-null  float64
 5   mode              142783 non-null  int64  
 6   speechiness       142783 non-null  float64
 7   acousticness      142783 non-null  float64
 8   instrumentalness  142783 non-null  float64
 9   liveness          142783 non-null  float64
 10  valence           142783 non-null  float64
 11  tempo             142783 non-null  float64
 12  id                142783 non-null  object 
 13  duration_ms       142783 non-null  int64  
 14  time_signature    142783 non-null  int64  
 15  genre             142783 non-null  int64  
 16  year              14

In [76]:
music.drop(['Unnamed: 0'], axis=1, inplace=True)
# drop all from genre 'electronic' with release year before 1990
music = music.drop(music[(music['genre'] == 5) & (music['year'] < 1990)].index)
music.reset_index(drop=True, inplace=True)

In [78]:
music.tail()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,duration_ms,time_signature,genre,year
142706,0.373,0.074,1,-24.595,1,0.0509,0.745,0.819,0.102,0.135,122.238,41PhuLu40MhT6vg3vr4778,375773,4,5,2021
142707,0.123,0.0128,2,-27.916,0,0.0401,0.692,0.291,0.109,0.0383,71.621,7Fa0w7GNcS57sMfTofiG4R,824013,4,5,2021
142708,0.342,0.0257,0,-31.669,1,0.0447,0.776,0.0956,0.112,0.362,158.439,4SniQ36o6fHpIKwHVNnJoN,305600,3,5,2021
142709,0.142,0.00248,7,-34.535,1,0.0476,0.27,0.0232,0.0596,0.0366,74.57,0lotD0m9lAMrmZZ85N9QDC,750187,4,5,2021
142710,0.165,0.0983,10,-24.322,1,0.0432,0.721,0.643,0.305,0.0461,72.211,1rlxhEmCV8D2eyL6rJwXLD,600280,4,5,2021


In [79]:
rap_2021 = pd.read_csv('data/rap_2021.csv')

In [80]:
rap_2021.drop(['Unnamed: 0'], axis=1, inplace=True)

In [81]:
rap_2021.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12041 entries, 0 to 12040
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   danceability      12041 non-null  float64
 1   energy            12041 non-null  float64
 2   key               12041 non-null  int64  
 3   loudness          12041 non-null  float64
 4   mode              12041 non-null  int64  
 5   speechiness       12041 non-null  float64
 6   acousticness      12041 non-null  float64
 7   instrumentalness  12041 non-null  float64
 8   liveness          12041 non-null  float64
 9   valence           12041 non-null  float64
 10  tempo             12041 non-null  float64
 11  id                12041 non-null  object 
 12  duration_ms       12041 non-null  int64  
 13  time_signature    12041 non-null  int64  
 14  genre             12041 non-null  int64  
 15  year              12041 non-null  int64  
dtypes: float64(9), int64(6), object(1)
memor

In [82]:
pop_2021 = pd.read_csv('data/pop_2021.csv')

In [83]:
pop_2021.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5514 entries, 0 to 5513
Data columns (total 17 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Unnamed: 0        5514 non-null   int64  
 1   danceability      5514 non-null   float64
 2   energy            5514 non-null   float64
 3   key               5514 non-null   int64  
 4   loudness          5514 non-null   float64
 5   mode              5514 non-null   int64  
 6   speechiness       5514 non-null   float64
 7   acousticness      5514 non-null   float64
 8   instrumentalness  5514 non-null   float64
 9   liveness          5514 non-null   float64
 10  valence           5514 non-null   float64
 11  tempo             5514 non-null   float64
 12  id                5514 non-null   object 
 13  duration_ms       5514 non-null   int64  
 14  time_signature    5514 non-null   int64  
 15  genre             5514 non-null   int64  
 16  year              5514 non-null   int64  


In [84]:
jazz_modern.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12914 entries, 0 to 12913
Data columns (total 16 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   danceability      12914 non-null  float64
 1   energy            12914 non-null  float64
 2   key               12914 non-null  int64  
 3   loudness          12914 non-null  float64
 4   mode              12914 non-null  int64  
 5   speechiness       12914 non-null  float64
 6   acousticness      12914 non-null  float64
 7   instrumentalness  12914 non-null  float64
 8   liveness          12914 non-null  float64
 9   valence           12914 non-null  float64
 10  tempo             12914 non-null  float64
 11  id                12914 non-null  object 
 12  duration_ms       12914 non-null  int64  
 13  time_signature    12914 non-null  int64  
 14  genre             12914 non-null  int64  
 15  year              12914 non-null  object 
dtypes: float64(9), int64(5), object(2)
memor

In [85]:
pop_2021.drop(['Unnamed: 0'], axis=1, inplace=True)

In [86]:
music_new = pd.concat([music, rap_2021, pop_2021, jazz_modern], axis=0, ignore_index=True)

In [87]:
music_new.tail()

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,duration_ms,time_signature,genre,year
173175,0.436,0.518,2,-9.001,1,0.0419,0.722,0.669,0.14,0.537,166.328,6jExul7q96UmCMwvbycdRC,511973,3,1,2021
173176,0.418,0.204,7,-14.206,1,0.0405,0.933,0.504,0.118,0.096,63.369,49FZ1rTwrkK2eDLjHOX0uf,342027,4,1,2021
173177,0.372,0.225,2,-15.257,1,0.0402,0.861,0.48,0.128,0.229,72.663,1O7MzM9mnosQPtOgxzQfv9,452160,4,1,2021
173178,0.477,0.461,8,-9.685,1,0.0434,0.778,0.478,0.0885,0.238,122.137,3JF23DQPLgHWg7WqZ9jjIa,503333,4,1,2021
173179,0.49,0.421,4,-9.935,0,0.0423,0.49,0.262,0.075,0.437,96.184,5LQ7SJFxbjxBfjACCCNRia,445387,1,1,2021


In [88]:
music_new.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173180 entries, 0 to 173179
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   danceability      173180 non-null  float64
 1   energy            173180 non-null  float64
 2   key               173180 non-null  int64  
 3   loudness          173180 non-null  float64
 4   mode              173180 non-null  int64  
 5   speechiness       173180 non-null  float64
 6   acousticness      173180 non-null  float64
 7   instrumentalness  173180 non-null  float64
 8   liveness          173180 non-null  float64
 9   valence           173180 non-null  float64
 10  tempo             173180 non-null  float64
 11  id                173180 non-null  object 
 12  duration_ms       173180 non-null  int64  
 13  time_signature    173180 non-null  int64  
 14  genre             173180 non-null  int64  
 15  year              173180 non-null  object 
dtypes: float64(9), int64

In [89]:
music_new.duplicated(subset=['id']).value_counts()

False    162530
True      10650
dtype: int64

In [90]:
music_new.drop_duplicates(subset=['id'], inplace=True, ignore_index=True)

In [100]:
music_new.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 162530 entries, 0 to 162529
Data columns (total 16 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   danceability      162530 non-null  float64
 1   energy            162530 non-null  float64
 2   key               162530 non-null  int64  
 3   loudness          162530 non-null  float64
 4   mode              162530 non-null  int64  
 5   speechiness       162530 non-null  float64
 6   acousticness      162530 non-null  float64
 7   instrumentalness  162530 non-null  float64
 8   liveness          162530 non-null  float64
 9   valence           162530 non-null  float64
 10  tempo             162530 non-null  float64
 11  id                162530 non-null  object 
 12  duration_ms       162530 non-null  int64  
 13  time_signature    162530 non-null  int64  
 14  genre             162530 non-null  int64  
 15  year              162530 non-null  object 
dtypes: float64(9), int64

In [93]:
music_new['genre'].value_counts()

5    37658
1    37615
2    34062
4    28874
3    24321
Name: genre, dtype: int64

In [96]:
fix_tempo(music_new)

Unnamed: 0,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,id,duration_ms,time_signature,genre,year
0,0.706,0.742,5,-6.431,1,0.1000,0.17800,0.01910,0.4330,0.299,82.483,6t4CS8bsKY5Gu1LwvfgATh,310680,4,1,2007
1,0.842,0.420,7,-9.656,1,0.1120,0.07430,0.09640,0.1220,0.583,92.005,4c0FWOg4R7KsSlq4vdQDrX,311080,4,1,2007
2,0.766,0.963,10,-5.000,1,0.2860,0.58400,0.00565,0.4650,0.692,98.023,04zDTgL1znpSw5SkobWMk9,240960,4,1,2007
3,0.650,0.827,5,-8.784,1,0.0452,0.00127,0.46800,0.1170,0.248,96.016,1dioz5qr8Rxm2ADIKkyaHc,303333,4,1,2007
4,0.749,0.863,7,-6.865,1,0.1310,0.01120,0.15700,0.0969,0.627,87.959,1AE7Kt2nz88jjaqyOYEg7r,228360,4,1,2007
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162525,0.436,0.518,2,-9.001,1,0.0419,0.72200,0.66900,0.1400,0.537,166.328,6jExul7q96UmCMwvbycdRC,511973,3,1,2021
162526,0.418,0.204,7,-14.206,1,0.0405,0.93300,0.50400,0.1180,0.096,63.369,49FZ1rTwrkK2eDLjHOX0uf,342027,4,1,2021
162527,0.372,0.225,2,-15.257,1,0.0402,0.86100,0.48000,0.1280,0.229,72.663,1O7MzM9mnosQPtOgxzQfv9,452160,4,1,2021
162528,0.477,0.461,8,-9.685,1,0.0434,0.77800,0.47800,0.0885,0.238,122.137,3JF23DQPLgHWg7WqZ9jjIa,503333,4,1,2021


In [101]:
music_new.to_csv('music_new.csv', index=False)