In [77]:
# Import modules

import requests
import os
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
from spotipy.oauth2 import SpotifyOAuth
import spotipy.util as util
from env import client_id, client_secret, redirect_uri

In [78]:
# Define Spotify environment
os.environ['SPOTIPY_CLIENT_ID'] = client_id
os.environ['SPOTIPY_CLIENT_SECRET'] = client_secret
os.environ['SPOTIPY_REDIRECT_URI'] = redirect_uri

### for potential future use:
# auth_url = 'https://accounts.spotify.com/api/token'
# auth_response = requests.post(auth_url, {
#     'grant_type': 'client_credentials',
#     'client_id': client_id,
#     'client_secret': client_secret,
# })
# auth_response_data = auth_response.json()
# access_token = auth_response_data['access_token']
# headers = {
#     'Authorization': 'Bearer {token}'.format(token=access_token),
#     'Content-Type': 'application/json'
# }


In [3]:
# What I'm looking for
scope = 'user-top-read'
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

# Retrieve 30 Day Top
short_term_tracks = sp.current_user_top_tracks(limit=50,offset=0,time_range='short_term')
# Write to JSON
with open('short_term_top_tracks.json','w',encoding='utf-8') as f:
    json.dump(short_term_tracks,f, ensure_ascii = False, indent = 4)

# Retrieve 6 Month Top
medium_term_tracks = sp.current_user_top_tracks(limit=50, offset = 0, time_range = 'medium_term')
# Write to JSON
with open('medium_term_top_tracks.json','w',encoding='utf-8') as f:
    json.dump(medium_term_tracks,f, ensure_ascii = False, indent = 4)

# Retrieve Years Top
long_term_tracks = sp.current_user_top_tracks(limit=50, offset = 0, time_range = 'long_term')
# Write to JSON
with open('long_term_top_tracks.json','w',encoding='utf-8') as f:
    json.dump(long_term_tracks,f, ensure_ascii = False, indent = 4)

In [4]:
# Open all JSON
with open('short_term_top_tracks.json') as f:
    short_data = json.load(f)
with open('medium_term_top_tracks.json') as f:
    medium_data = json.load(f)
with open('long_term_top_tracks.json') as f:
    long_data = json.load(f)

### Create dataframes for each term of data

In [5]:
artist_name = []
artist_uri = []
song_name = []
song_uri = []
album_name = []
album_uri = []
for track in short_data['items']:
    track['album']
    artist_name.append(track['artists'][0]['name'])
    artist_uri.append(track['artists'][0]['uri'])
    song_name.append(track['name'])
    song_uri.append(track['uri'])
    album_name.append(track['album']['name'])
    album_uri.append(track['album']['uri'])


short_df = pd.DataFrame({'artist':artist_name,
'song':song_name,
'album':album_name,
'song_uri':song_uri,
'album_uri':album_uri,
'artist_uri':artist_uri})

In [6]:
artist_name = []
artist_uri = []
song_name = []
song_uri = []
album_name = []
album_uri = []
for track in medium_data['items']:
    track['album']
    artist_name.append(track['artists'][0]['name'])
    artist_uri.append(track['artists'][0]['uri'])
    song_name.append(track['name'])
    song_uri.append(track['uri'])
    album_name.append(track['album']['name'])
    album_uri.append(track['album']['uri'])

medium_df = pd.DataFrame({'artist':artist_name,
'song':song_name,
'album':album_name,
'song_uri':song_uri,
'album_uri':album_uri,
'artist_uri':artist_uri})

In [7]:
artist_name = []
artist_uri = []
song_name = []
song_uri = []
album_name = []
album_uri = []
for track in long_data['items']:
    track['album']
    artist_name.append(track['artists'][0]['name'])
    artist_uri.append(track['artists'][0]['uri'])
    song_name.append(track['name'])
    song_uri.append(track['uri'])
    album_name.append(track['album']['name'])
    album_uri.append(track['album']['uri'])
long_df = pd.DataFrame({'artist':artist_name,
'song':song_name,
'album':album_name,
'song_uri':song_uri,
'album_uri':album_uri,
'artist_uri':artist_uri})

In [8]:
print(short_df.shape)
print(medium_df.shape)
print(long_df.shape)

(50, 6)
(50, 6)
(50, 6)


In [9]:
# Combine all three dataframes into one top songs dataframe
dfs = [short_df, medium_df, long_df]
df = pd.concat(dfs)

In [10]:
df.head()

Unnamed: 0,artist,song,album,song_uri,album_uri,artist_uri
0,King Crimson,Discipline,Discipline,spotify:track:2uRCY0IMvoOnfeNfZpoLEI,spotify:album:7KxUQCCvE0edGiqBS6ywEd,spotify:artist:7M1FPw29m5FbicYzS2xdpi
1,"Tomorrow, St. Peter",Figurine,"Tomorrow, St. Peter",spotify:track:379wjU9IdHmTmXuEdi4FAP,spotify:album:2ustIkBBy5Zd15Plog1oRl,spotify:artist:554Jo4IkGANNr7h25xPggq
2,B'z,Into Free -Dangan-,B'z,spotify:track:2iGWeVCQy5k4qY7JVgNE70,spotify:album:7fLVUv5s3ntbMp2lmFoc1R,spotify:artist:7i9bNUSGORP5MIgrii3cJc
3,Incubus,Aqueous Transmission,Morning View,spotify:track:5M67k54BVUDADZPryaqV1y,spotify:album:1rQZbncicoXyB64DqoH7OY,spotify:artist:3YcBF2ttyueytpXtEzn1Za
4,Delta Sleep,The Softest Touch,Spring Island,spotify:track:4xe78Q33kyICK0bToXY0cn,spotify:album:5xZ4se52uiNjoRvmm062Nt,spotify:artist:05MlomiA9La0OiNIAGqECk


In [11]:
# retrieve all songs from all albums for songs in top songs, expand song dataset
album_data = []
for s in df.album_uri.to_list():
    album_data.append(sp.album(s))

In [12]:
# for future use: get related artists
# related_artists = []
# for a in artist_uris['uri']:
#     related_artists.append(requests.get(f'https://api.spotify.com/v1/artists/{a}/related-artist', headers = headers))

In [13]:
# create album dataframe with all songs from albums found in top songs
artist_name = []
artist_uri = []
song_name = []
song_uri = []
album_name = []
album_uri = []
genre = []
track
for album in album_data:
    for song in album['tracks']['items']:
        artist_name.append(song['artists'][0]['name'])
        artist_uri.append(song['artists'][0]['uri'])
        song_name.append(song['name'])
        song_uri.append(song['uri'])
        album_name.append(album['name'])
        album_uri.append(album['uri'])

df = pd.DataFrame({'artist':artist_name,
'song':song_name,
'album':album_name,
'song_uri':song_uri,
'album_uri':album_uri,
'artist_uri':artist_uri})

In [14]:
# check shape
df.shape

(1450, 6)

In [15]:
# drop duplicates
df.drop_duplicates(inplace=True)
df.shape

(765, 6)

In [16]:
# pull audio features of all songs
audio_features = []
for row in df.itertuples():
    audio_features.append(sp.audio_features(tracks = row.song_uri))

In [17]:
# create dataframe of audio features
audio_features_df = pd.DataFrame.from_dict(audio_features)

In [18]:
# turn dicionary of columns to complete dataframe
audio_features_df = audio_features_df[0].dropna().apply(pd.Series)

In [19]:
# merge audio features data with songs dataframe
df = pd.merge(df,audio_features_df,how='right',left_on='song_uri',right_on='uri')

In [20]:
# check shape
df.shape

(765, 24)

In [21]:
# df.drop(columns = ['analysis_url','track_href','uri','id','type','song_uri','album_uri'],inplace=True)

In [22]:
# check the dataframe
df.head()

Unnamed: 0,artist,song,album,song_uri,album_uri,artist_uri,danceability,energy,key,loudness,...,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,King Crimson,Elephant Talk,Discipline,spotify:track:1VeYMKim09aEymk9grhXRf,spotify:album:7KxUQCCvE0edGiqBS6ywEd,spotify:artist:7M1FPw29m5FbicYzS2xdpi,0.649,0.725,9,-11.573,...,0.0603,0.712,114.578,audio_features,1VeYMKim09aEymk9grhXRf,spotify:track:1VeYMKim09aEymk9grhXRf,https://api.spotify.com/v1/tracks/1VeYMKim09aE...,https://api.spotify.com/v1/audio-analysis/1VeY...,282240,4
1,King Crimson,Frame By Frame,Discipline,spotify:track:0yg93GXlS0ZmLsFpXG5bT2,spotify:album:7KxUQCCvE0edGiqBS6ywEd,spotify:artist:7M1FPw29m5FbicYzS2xdpi,0.388,0.729,1,-11.217,...,0.391,0.428,157.594,audio_features,0yg93GXlS0ZmLsFpXG5bT2,spotify:track:0yg93GXlS0ZmLsFpXG5bT2,https://api.spotify.com/v1/tracks/0yg93GXlS0Zm...,https://api.spotify.com/v1/audio-analysis/0yg9...,307933,4
2,King Crimson,Matte Kudasai,Discipline,spotify:track:38DwzePhZtPxZJYgGuY7IY,spotify:album:7KxUQCCvE0edGiqBS6ywEd,spotify:artist:7M1FPw29m5FbicYzS2xdpi,0.181,0.286,4,-13.167,...,0.102,0.147,66.397,audio_features,38DwzePhZtPxZJYgGuY7IY,spotify:track:38DwzePhZtPxZJYgGuY7IY,https://api.spotify.com/v1/tracks/38DwzePhZtPx...,https://api.spotify.com/v1/audio-analysis/38Dw...,229320,4
3,King Crimson,Indiscipline,Discipline,spotify:track:4hnB3tR0ueIBnkaIvrlZtC,spotify:album:7KxUQCCvE0edGiqBS6ywEd,spotify:artist:7M1FPw29m5FbicYzS2xdpi,0.355,0.452,7,-10.771,...,0.0941,0.585,122.274,audio_features,4hnB3tR0ueIBnkaIvrlZtC,spotify:track:4hnB3tR0ueIBnkaIvrlZtC,https://api.spotify.com/v1/tracks/4hnB3tR0ueIB...,https://api.spotify.com/v1/audio-analysis/4hnB...,273000,4
4,King Crimson,Thela Hun Ginjeet,Discipline,spotify:track:02vXjp2iu7SbGfhf231dS9,spotify:album:7KxUQCCvE0edGiqBS6ywEd,spotify:artist:7M1FPw29m5FbicYzS2xdpi,0.579,0.87,11,-9.156,...,0.347,0.655,130.957,audio_features,02vXjp2iu7SbGfhf231dS9,spotify:track:02vXjp2iu7SbGfhf231dS9,https://api.spotify.com/v1/tracks/02vXjp2iu7Sb...,https://api.spotify.com/v1/audio-analysis/02vX...,386520,4


### Now that I have the dataframe where I want it, I'll add the genre by retrieving artist data. Previous data pulls that contain genre are empty lists, but the artist data has populated genre dictionaries.

In [23]:
# create artist_uris dataframe to drop duplicate uris
artist_uris = pd.DataFrame(df.artist_uri.to_list()).drop_duplicates()
# rename columns for ease of access
artist_uris.rename(columns={0:'uri'},inplace = True)

artist_data = []
for a in artist_uris.uri.to_list():
    artist_data.append(sp.artist(a))

In [24]:
artist_data_df = pd.DataFrame.from_dict(artist_data)

In [25]:
artist_data_df.drop(columns=['id','external_urls','followers','href','images','name','popularity','type'],inplace= True)

In [41]:
df = pd.merge(df,artist_data_df,how='left',left_on='artist_uri',right_on='uri')

In [42]:
df.columns.to_list()

['artist',
 'song',
 'album',
 'song_uri',
 'album_uri',
 'artist_uri',
 'danceability',
 'energy',
 'key',
 'loudness',
 'mode',
 'speechiness',
 'acousticness',
 'instrumentalness',
 'liveness',
 'valence',
 'tempo',
 'type',
 'id',
 'uri_x',
 'track_href',
 'analysis_url',
 'duration_ms',
 'time_signature',
 'genres',
 'uri_y']

In [43]:
# drop columns
df.drop(columns = ['song_uri','album_uri','artist_uri','type','id','uri_x','track_href','analysis_url','uri_y'],inplace=True)

In [44]:
# double check dataframe
df.head()

Unnamed: 0,artist,song,album,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,genres
0,King Crimson,Elephant Talk,Discipline,0.649,0.725,9,-11.573,1,0.0658,0.544,0.00204,0.0603,0.712,114.578,282240,4,"[album rock, art rock, classic rock, instrumen..."
1,King Crimson,Frame By Frame,Discipline,0.388,0.729,1,-11.217,0,0.0431,0.553,0.195,0.391,0.428,157.594,307933,4,"[album rock, art rock, classic rock, instrumen..."
2,King Crimson,Matte Kudasai,Discipline,0.181,0.286,4,-13.167,1,0.0367,0.709,0.0245,0.102,0.147,66.397,229320,4,"[album rock, art rock, classic rock, instrumen..."
3,King Crimson,Indiscipline,Discipline,0.355,0.452,7,-10.771,0,0.0516,0.0173,0.31,0.0941,0.585,122.274,273000,4,"[album rock, art rock, classic rock, instrumen..."
4,King Crimson,Thela Hun Ginjeet,Discipline,0.579,0.87,11,-9.156,1,0.0571,0.087,0.0554,0.347,0.655,130.957,386520,4,"[album rock, art rock, classic rock, instrumen..."


In [45]:
df.to_csv('spotify_data.csv')

### At this point I created an acquire.py module to reproduce the results of all the code seen thus far. 

In [2]:
import acquire

df = acquire.get_spotify_data()

# Do the same thing but for discographies of top artists, which will be used moving forward

In [4]:
# What I'm looking for
scope = 'user-top-read'
sp = spotipy.Spotify(auth_manager=SpotifyOAuth(scope=scope))

# Retrieve 30 Day Top
short_term_artists = sp.current_user_top_artists(limit=50,offset=0,time_range='short_term')
# Write to JSON
with open('short_term_top_artists.json','w',encoding='utf-8') as f:
    json.dump(short_term_artists,f, ensure_ascii = False, indent = 4)

# Retrieve 6 Month Top
medium_term_artists = sp.current_user_top_artists(limit=50, offset = 0, time_range = 'medium_term')
# Write to JSON
with open('medium_term_top_artists.json','w',encoding='utf-8') as f:
    json.dump(medium_term_artists,f, ensure_ascii = False, indent = 4)

# Retrieve Years Top
long_term_artists = sp.current_user_top_artists(limit=50, offset = 0, time_range = 'long_term')
# Write to JSON
with open('long_term_top_artists.json','w',encoding='utf-8') as f:
    json.dump(long_term_artists,f, ensure_ascii = False, indent = 4)

In [5]:
# Open all JSON
with open('short_term_top_artists.json') as f:
    short_data = json.load(f)
with open('medium_term_top_artists.json') as f:
    medium_data = json.load(f)
with open('long_term_top_artists.json') as f:
    long_data = json.load(f)

In [26]:
# create list of artist uri's
artist_uri = []
for a in short_data['items']:
    artist_uri.append(a['uri'])

In [29]:
# pull the discography for each artist
discographies = []
for a in artist_uri:
    discographies.append(sp.artist_albums(a))

In [37]:
album_uris = []
for artist in discographies:
    for album in artist['items']:
        album_uris.append(album['uri'])

In [42]:
song_list = []
for a in album_uris:
    song_list.append(sp.album(a))
with open('artist_discography_tracks.json','w',encoding='utf-8') as f:
    json.dump(song_list,f, ensure_ascii = False, indent = 4)

In [44]:
artist_name = []
artist_uri = []
song_name = []
song_uri = []
album_name = []
album_uri = []
genre = []
track
for album in song_list:
    for song in album['tracks']['items']:
        artist_name.append(song['artists'][0]['name'])
        artist_uri.append(song['artists'][0]['uri'])
        song_name.append(song['name'])
        song_uri.append(song['uri'])
        album_name.append(album['name'])
        album_uri.append(album['uri'])

df = pd.DataFrame({'artist':artist_name,
'song':song_name,
'album':album_name,
'song_uri':song_uri,
'album_uri':album_uri,
'artist_uri':artist_uri})

In [45]:
df

Unnamed: 0,artist,song,album,song_uri,album_uri,artist_uri
0,Delta Sleep,Water Fall,Spring Island,spotify:track:7APfwee9lIJ1CawIqQtb7s,spotify:album:5xZ4se52uiNjoRvmm062Nt,spotify:artist:05MlomiA9La0OiNIAGqECk
1,Delta Sleep,The Detail,Spring Island,spotify:track:5T3yCk8ViLyImNXK9uJOwR,spotify:album:5xZ4se52uiNjoRvmm062Nt,spotify:artist:05MlomiA9La0OiNIAGqECk
2,Delta Sleep,View to a Fill,Spring Island,spotify:track:2tUwww5lN0qf2CwYTUTvn3,spotify:album:5xZ4se52uiNjoRvmm062Nt,spotify:artist:05MlomiA9La0OiNIAGqECk
3,Delta Sleep,Planet Fantastic,Spring Island,spotify:track:7G3qJ7hyjP9Tsx1pGlvy0q,spotify:album:5xZ4se52uiNjoRvmm062Nt,spotify:artist:05MlomiA9La0OiNIAGqECk
4,Delta Sleep,Forest Fire,Spring Island,spotify:track:1iewaQu7wes3iSIz8w7GjN,spotify:album:5xZ4se52uiNjoRvmm062Nt,spotify:artist:05MlomiA9La0OiNIAGqECk
...,...,...,...,...,...,...
9964,Glocca Morra,Burning Love,Split With Glocca Morra and Summer Vacation,spotify:track:6yD1ASoXilIC4qRjvbxnBM,spotify:album:3EpXPjK65xQpl16gp6bnQu,spotify:artist:5XU0u7HIexXCAh4CcoqXCA
9965,Glocca Morra,Burning Desire,Split With Glocca Morra and Summer Vacation,spotify:track:4uVP6C7JU6cXLwomil5l4g,spotify:album:3EpXPjK65xQpl16gp6bnQu,spotify:artist:5XU0u7HIexXCAh4CcoqXCA
9966,Summer Vacation,Boycott Will Smith,Split With Glocca Morra and Summer Vacation,spotify:track:3AYLbk0xfIxHGUHuaKtDod,spotify:album:3EpXPjK65xQpl16gp6bnQu,spotify:artist:3Nm8bci1wAtWOuLbQVaTid
9967,Summer Vacation,Secret Shopper,Split With Glocca Morra and Summer Vacation,spotify:track:0pBHxQebPdRIiQZErd91MU,spotify:album:3EpXPjK65xQpl16gp6bnQu,spotify:artist:3Nm8bci1wAtWOuLbQVaTid


In [47]:
df.drop_duplicates(inplace=True)
df.shape


(9969, 6)

In [48]:
# pull audio features of all songs
audio_features = []
for row in df.itertuples():
    audio_features.append(sp.audio_features(tracks = row.song_uri))

In [50]:
audio_features_df = pd.DataFrame.from_dict(audio_features)

In [52]:
# turn dicionary of columns to complete dataframe
audio_features_df = audio_features_df[0].dropna().apply(pd.Series)

In [53]:
# merge audio features data with songs dataframe
df = pd.merge(df,audio_features_df,how='right',left_on='song_uri',right_on='uri')

In [59]:
# get genres
artist_data = []
for a in artist_uri:
    artist_data.append(sp.artist(a))
 # create dataframe of artist data
artist_data_df = pd.DataFrame.from_dict(artist_data)


KeyError: "['uri_x' 'uri_y'] not found in axis"

In [63]:
# merge genres onto dataframe
df = pd.merge(df,artist_data_df,how='left',left_on='artist_uri',right_on='uri')


In [65]:
df.columns.to_list()

['artist',
 'song',
 'album',
 'song_uri',
 'album_uri',
 'artist_uri',
 'danceability',
 'energy',
 'key',
 'loudness',
 'mode',
 'speechiness',
 'acousticness',
 'instrumentalness',
 'liveness',
 'valence',
 'tempo',
 'type',
 'id',
 'uri_x',
 'track_href',
 'analysis_url',
 'duration_ms',
 'time_signature',
 'genres',
 'uri_y']

In [66]:
# drop unneccessary columns
df.drop(columns = ['song_uri','album_uri','artist_uri','type','id','uri_x','track_href','analysis_url','uri_y'],inplace=True)
# write to csv
df.to_csv('spotify_top_artist_discography_data.csv')

In [60]:
import acquire
df = acquire.get_spotify_top_artists_discography_data()

  mask |= (ar1 == a)


In [62]:
df.drop_duplicates(inplace=True)

In [63]:
df.reset_index(inplace = True)

In [64]:
df.artist.value_counts().index.to_list()

['Germaine Franco',
 'Queen',
 'King Crimson',
 'The Cure',
 'Bob Marley & The Wailers',
 'Daryl Hall & John Oates',
 'Kid Cudi',
 'John Butler Trio',
 'Lupe Fiasco',
 'Jack Johnson',
 'The Avett Brothers',
 'Incubus',
 'Stone Sour',
 'Gorillaz',
 'The Weeknd',
 'King Gizzard & The Lizard Wizard',
 'Billy Idol',
 'Joan Of Arc',
 'Childish Gambino',
 'Tenacious D',
 'Minus the Bear',
 'Third Eye Blind',
 'Iration',
 'Rooney',
 'Piebald',
 'Lin-Manuel Miranda',
 'Paramore',
 'Boston',
 'Turnover',
 'Young the Giant',
 'TTNG',
 'Hiatus Kaiyote',
 'Vampire Weekend',
 'Tera Melos',
 'Hozier',
 'All Them Witches',
 'Leon Bridges',
 'Delta Sleep',
 'A Great Big Pile of Leaves',
 'Bad Suns',
 'Good Kid',
 'Chon',
 'Origami Angel',
 'Hum',
 'Sebastian Yatra',
 'Algernon Cadwallader',
 'Shafiq Husayn',
 'Feed Me Jack',
 'Glocca Morra',
 'MEDUZA',
 'Carlos Vives',
 'Pete Townshend',
 'Talib Kweli',
 'Cornelius',
 'Hall',
 'Bay Faction',
 'Tomorrow, St. Peter',
 'Queen + Paul Rodgers',
 'Delle&Cop

In [65]:
# change str of list in df to list
from ast import literal_eval
genre = []
for song in df.genres:
    genre.append(literal_eval(song))

In [66]:
# create df of genre list
genre_df = pd.DataFrame(genre)

In [67]:
# rename all columns to genre_C
for col in genre_df.columns.to_list():
    genre_df.rename(columns = {col:f'genre_{col}'},inplace = True)

In [68]:
# see what a merged dataframe with split genres looks like
df1 = pd.merge(df, genre_df, how = 'right',left_index=True, right_index = True)
df1.head()


Unnamed: 0,index,artist,song,album,danceability,energy,key,loudness,mode,speechiness,...,genre_5,genre_6,genre_7,genre_8,genre_9,genre_10,genre_11,genre_12,genre_13,genre_14
0,0,Delta Sleep,Water Fall,Spring Island,0.217,0.208,7,-11.112,1,0.0434,...,,,,,,,,,,
1,81,Delta Sleep,The Detail,Spring Island,0.477,0.637,7,-7.244,1,0.0385,...,,,,,,,,,,
2,162,Delta Sleep,View to a Fill,Spring Island,0.349,0.795,11,-5.813,0,0.0693,...,,,,,,,,,,
3,243,Delta Sleep,Planet Fantastic,Spring Island,0.519,0.753,4,-5.334,1,0.0412,...,,,,,,,,,,
4,324,Delta Sleep,Forest Fire,Spring Island,0.478,0.262,2,-10.825,1,0.0281,...,,,,,,,,,,


That's kind of a mess, try something new.

In [69]:
# change the genres column to be a list instead of str
df['genres']=genre

In [70]:
# make sure they're a list and not a str. It works!
df.genres[0]

['brighton indie',
 'british math rock',
 'math rock',
 'progressive post-hardcore']

In [71]:
# let's try and explode it...
df.explode('genres')

Unnamed: 0,index,artist,song,album,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature,genres
0,0,Delta Sleep,Water Fall,Spring Island,0.217,0.208,7,-11.112,1,0.0434,0.96700,0.0108,0.2920,0.0787,206.289,99220,3,brighton indie
0,0,Delta Sleep,Water Fall,Spring Island,0.217,0.208,7,-11.112,1,0.0434,0.96700,0.0108,0.2920,0.0787,206.289,99220,3,british math rock
0,0,Delta Sleep,Water Fall,Spring Island,0.217,0.208,7,-11.112,1,0.0434,0.96700,0.0108,0.2920,0.0787,206.289,99220,3,math rock
0,0,Delta Sleep,Water Fall,Spring Island,0.217,0.208,7,-11.112,1,0.0434,0.96700,0.0108,0.2920,0.0787,206.289,99220,3,progressive post-hardcore
1,81,Delta Sleep,The Detail,Spring Island,0.477,0.637,7,-7.244,1,0.0385,0.05600,0.0143,0.0904,0.5450,112.574,269174,4,brighton indie
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9232,2344952,Glocca Morra,Burning Desire,Split With Glocca Morra and Summer Vacation,0.242,0.944,9,-5.151,1,0.0716,0.01120,0.6590,0.0545,0.3540,138.228,219238,4,midwest emo
9232,2344952,Glocca Morra,Burning Desire,Split With Glocca Morra and Summer Vacation,0.242,0.944,9,-5.151,1,0.0716,0.01120,0.6590,0.0545,0.3540,138.228,219238,4,philly indie
9233,2344979,Summer Vacation,Boycott Will Smith,Split With Glocca Morra and Summer Vacation,0.328,0.990,1,-4.279,0,0.0746,0.09680,0.2630,0.1760,0.2560,93.914,98734,4,
9234,2344982,Summer Vacation,Secret Shopper,Split With Glocca Morra and Summer Vacation,0.294,0.964,7,-4.629,1,0.1250,0.00165,0.0963,0.7500,0.3040,98.860,98438,4,


Looks a lot easier to manage, but this nearly quintupled my rows.

In [76]:
df.genres.value_counts()

TypeError: unhashable type: 'list'

Exception ignored in: 'pandas._libs.index.IndexEngine._call_map_locations'
Traceback (most recent call last):
  File "pandas/_libs/hashtable_class_helper.pxi", line 4588, in pandas._libs.hashtable.PyObjectHashTable.map_locations
TypeError: unhashable type: 'list'


[]                                                                                                                                                 814
[classic rock, glam rock, rock]                                                                                                                    469
[album rock, art rock, classic rock, instrumental rock, jazz fusion, jazz rock, progressive rock, psychedelic rock, rock, symphonic rock, zolo]    426
[new wave, permanent wave, rock, uk post-punk]                                                                                                     339
[reggae, roots reggae]                                                                                                                             304
                                                                                                                                                  ... 
[instrumental rock, math pop]                                                                 