# IMPORTS, CLASSES, FUNCTIONS, AUTHENTICATION, SETUP

In [203]:
from dotenv import load_dotenv
import os
load_dotenv()
spotify_client_id = os.getenv("SPOTIFY_CLIENT_ID")
spotify_client_secret = os.getenv("SPOTIFY_CLIENT_SECRET")
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
spotify = spotipy.Spotify(client_credentials_manager=SpotifyClientCredentials(spotify_client_id, spotify_client_secret))
import pandas as pd
import numpy as np
import re
from pathlib import Path
def tracks(track):
    tracks_data = {}
    audio_features_data = spotify.audio_features(track['id'])[0]
    audio_features = {key: audio_features_data[key] for key in ['danceability', 'energy', 'key', 'loudness', 'mode', 'speechiness', 'acousticness', 'instrumentalness', 'liveness', 'valence', 'tempo', 'duration_ms']}
    tracks_data.update(audio_features)
    album_data = {key: track['album'][key] for key in ['release_date']}
    artists_data = {"artists": [artist['name'] for artist in track['artists']]}
    available_markets_data = spotify.album(track['album']['id'])['available_markets']
    other_data = {key: track[key] for key in ['popularity', 'explicit', 'id', 'name']} 
    tracks_data.update(other_data)
    tracks_data.update(album_data)
    tracks_data.update(artists_data)
    tracks_data.update({'available_markets': len(available_markets_data)})
    return tracks_data
def dataframe(tracks_artist):
    tracks_data_list = []
    for track in tracks_artist['tracks']:
        tracks_data = tracks(track)
        tracks_data_list.append(tracks_data)
    return pd.DataFrame(tracks_data_list)
def series(tracks_data):
    return pd.DataFrame(tracks_data)
def normalize(df):    
    df["available_markets"] = round(df["available_markets"]/(np.maximum(78, df["available_markets"].max()))*5)/5
    df["popularity"] = df["popularity"]/100
    df["tempo"] = round(df["tempo"]/20).astype(int)
    df["loudness"] = round((df["loudness"]+60)/10).astype(int)
    df["loudness"][df["loudness"]<0] == 0
    df["duration_ms"] = round(df["duration_ms"]/60000).astype(int)
    df = df.rename(columns = {"duration_ms": "duration_min"})
    df["explicit"] = df["explicit"].apply(lambda x: x*(1))
    return df
def save_file(df):    
        if(not(os.path.exists("tracks_df.csv"))):
            Path("tracks_df.csv").touch()
        if(os.path.getsize("tracks_df.csv") != 0):
            existing_df = pd.read_csv("tracks_df.csv")
            existing_df = existing_df.append(df, ignore_index=True)
            existing_df.drop_duplicates(subset = "id", keep = "last", inplace = True)
            existing_df.to_csv("tracks_df.csv", mode = "w", index = None)
            return existing_df
        else:
            df.to_csv("tracks_df.csv", mode = "w", index = None)
            return df
def auto_save(artist_list):
    for artist_id in artist_list:
        df = spotify.artist_top_tracks(artist_id)
        df = dataframe(df)
        df = normalize(df)
        df = save_file(df)
        return df
def others(df):
    df_timeline = df.set_index(["release_date"]).sort_values(by = ["release_date"], ascending = True)
    listreal = []
    listrealid = []
    for i, ser in enumerate(df_timeline["artists"]):
        if(type(ser) == str):
            strser = ser.split(",")
            strser[0] = strser[0].replace("[", "")
            strser[-1] = strser[-1].replace("]", "")
            strser = [serstr.replace("\'", "").strip() for serstr in strser]
            ser = strser
        if(len(ser) != 1):
            listrealid.append(i)
        for artist in ser:
            df_timeline = df_timeline.copy()
            df_timeline.iat[i, np.array([int(x == "artists") for x in df_timeline.columns]).argmax()] = artist
            listreal.append(df_timeline.iloc[i])
    dfreal = pd.DataFrame(listreal)
    collab1 = pd.Series(np.ones(len(dfreal.index), dtype = int), index = dfreal.index, name = "collab")
    dfreal = pd.concat([dfreal, collab1], axis = 1)
    df_timeline.drop(df_timeline.loc[df_timeline['id'].isin(listrealid)].index, inplace=True)
    df_timeline.drop_duplicates(subset="id", keep="last", inplace=True)
    collab0 = pd.Series(np.zeros(len(df_timeline.index), dtype = int), index = df_timeline.index, name = "collab")
    df_timeline = pd.concat([df_timeline, collab0], axis = 1)
    df_timeline = df_timeline.append(dfreal)
    df_timeline["artists"] = df_timeline["artists"].astype(str)
    agg = {}
    for key in ['danceability', 'energy', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'popularity', 'duration_min', 'collab']:
        agg.update({key: np.mean})
    for key in ['key', 'mode', 'explicit', 'available_markets', 'tempo',
       'loudness']:
        agg.update({key: lambda x: x.value_counts().index[0]}) 
    df_artists = df_timeline.groupby('artists').agg(agg)
    df_artists = df_artists.sort_values(by = ["popularity"], ascending = False)
    track_count = pd.Series(df_timeline["artists"].value_counts(), name = "track_count")
    track_count.reindex(df_artists.index)
    df_artists = pd.concat([df_artists, track_count], axis = 1)            
    df_artists.to_csv("artists_df.csv", mode = "w")
    df_timeline.to_csv("timeline_df.csv", mode = "w")
def create_dfs(artist_list):
    if(type(artist_list) == str):
        artist = artist_list
        artist_list = []
        artist_list.append(artist)
    df_tracks = auto_save(artist_list)
    others(df_tracks)
def query_artist(string = "--"):
    for i, artist in enumerate(df_artists.index):
        if(len(re.findall("(.*?)"+string+"(.*?)", "".join(artist))) != 0):
            print("Name: ", df_artists.index[i], 
                  "\nSingles: ", int(df_artists["track_count"][i]*(1-df_artists["collab"][i])),
                  "\nCollabs: ", int(df_artists["track_count"][i]*df_artists["collab"][i]))
def search_artist(string = "--"):
    results = spotify.search(q='artist:'+string, type='artist')
    items = results['artists']['items']
    if len(items) > 0:
        artist = items[0]
        df_artist = pd.Series(artist)
        df_artist = df_artist[df_artist.index.isin(['name', 'followers', 'genres', 'id', 'popularity'])]
        df_artist["followers"] = df_artist["followers"]['total']
        return df_artist
def copy_paste_links(list_copy_paste):
    list_ready = [mat.replace("spotify:artist:", "") for mat in list_copy_paste]
    for ready in list_ready:
        if(len(spotify.artist_top_tracks(ready)["tracks"]) == 1):
            print([x["name"] for x in spotify.artist_top_tracks(ready)["artists"]], "has/have very few songs")
        else:
            create_dfs(ready)

In [154]:
pd.read_csv("timeline_df.csv").columns

Index(['Unnamed: 0', 'danceability', 'energy', 'key', 'mode', 'speechiness',
       'acousticness', 'instrumentalness', 'liveness', 'valence', 'popularity',
       'explicit', 'id', 'name', 'artists', 'available_markets', 'tempo',
       'loudness', 'duration_min', 'collab'],
      dtype='object')

In [146]:
#--- Search for the artist ---
search_artist("Gram Parsons")

followers                                                 77648
genres        [alternative country, cosmic american, country...
id                                       1KA3WXYMPLxomNuoE22LYd
name                                               Gram Parsons
popularity                                                   50
dtype: object

In [205]:
#--- Copy-Paste Spotify URI of the artist ---
copy_paste_links(["1KA3WXYMPLxomNuoE22LYd"])

In [206]:
#--- Find the artist from the database --- 
df_artists = pd.read_csv("artists_df.csv")
df_artists.set_index(df_artists.columns[0], inplace = True)
df_artists.rename_axis("artist", axis = 0, inplace = True)
query_artist("Gram")

Name:  Gram Parsons 
Singles:  9 
Collabs:  10


In [88]:
import urllib.request
response = urllib.request.urlopen("https://en.wikipedia.org/wiki/Rolling_Stone%27s_100_Greatest_Artists_of_All_Time")
html = response.read()
text = html.decode()
from bs4 import BeautifulSoup
soup = BeautifulSoup(text, 'html.parser')
artists = []
artists_list = soup.table.find_all("td")
for artist in artists_list:
    artists.append(re.findall('title=\"(.*?)\"', str(artist)))
artists.remove([])
artists = [x[0] for x in artists]

In [90]:
links = ["spotify:artist:3WrFJ7ztbogyGnTHbHJFl2", "spotify:artist:74ASZWbe4lXaubB36ztrGX", "spotify:artist:43ZHCT0cAZBISjO8DG9PnE", "spotify:artist:22bE4uQ6baNwSHPVcDxLCe", "spotify:artist:293zczrfYafIItmnmM3coR", "spotify:artist:776Uo845nYHJpNaStv1Ds4", "spotify:artist:7GaxyUddsPok8BuhxN6OUW", "spotify:artist:4xls23Ye9WR9yy3yYMpAMm", "spotify:artist:7nwUJBm0HE4ZxD3f5cy5ok", "spotify:artist:1eYhYunlNJlDoQhtYBvPsi", "spotify:artist:2QsynagSdAqZj3U9HgDzjD", "spotify:artist:3oDbviiivRWhXwIE8hxkVV", "spotify:artist:3wYyutjgII8LJVVOLrGI0D", "spotify:artist:36QJpDe2go2KgaRleHCDTp", "spotify:artist:7guDJrEfX3qb6FEbdPA5qi", "spotify:artist:6hnWRPzGGKiapVX1UCdEAC", "spotify:artist:4y6J8jwRAwO4dssiSmN91R", "spotify:artist:3koiLjNrgRTNbOwViDipeA", "spotify:artist:1nJvji2KIlWSseXRSlNYsC", "spotify:artist:2bmixwMZXlkl2sbIbOfviq", "spotify:artist:60df5JBRRPcnSpsIMxxwQm", "spotify:artist:51Blml2LZPmy7TTiAg47vQ", "spotify:artist:3eqjTLE0HfPfh78zjh6TqT", "spotify:artist:2zyz0VJqrDXeFDIyrfVXSo", "spotify:artist:09C0xjtosNAIXP36wTnWxd", "spotify:artist:1co4F2pPNH8JjTutZkmgSm", "spotify:artist:6olE6TJLqED3rqDCT0FyPh", "spotify:artist:5a2EaR3hamoenG9rDuVn8j", "spotify:artist:3RGLhK1IP9jnYFH4BRFJBS", "spotify:artist:67ea9eGLXYMsO2eYQRui3w", "spotify:artist:6kACVPfCOnqzgfEF5ryl0x", "spotify:artist:0h9smro0z3HqUbD94jotU8", "spotify:artist:6TqQLejnHXMGr7KcegxUND", "spotify:artist:4ACplpEqD6JIVgKrafauzs", "spotify:artist:6v8FB84lnmJs434UJf2Mrm", "spotify:artist:3fMbdgg4jU18AjLCKBhRSm", "spotify:artist:6tbjWDEIzxoDsBA1FuhfPW", "spotify:artist:0JDkhL4rjiPNEp92jAgJnS", "spotify:artist:4x1nvY2FN8jxqAFA0DA02H", "spotify:artist:0oSGxfWSnnOXhD2fKuz2Gy", "spotify:artist:70cRZdQywnSFp9pnc2WTCE", "spotify:artist:22WZ7M8sxp5THdruNY3gXt", "spotify:artist:44NX2ffIYHr6D4n7RaZF7A", "spotify:artist:5m8H6zSadhu1j9Yi04VLqD", "spotify:artist:6Mo9PoU6svvhgEum7wh2Nd", "spotify:artist:1PCZpxHJz7WAMF8EEq8bfc", "spotify:artist:4NgfOZCL9Ml67xzM0xzIvC", "spotify:artist:0vYkHhJ48Bs3jWcvZXvOrP", "spotify:artist:3CQIn7N5CuRDP8wEI7FiDA", "spotify:artist:3PhoLpVuITZKcymswpck5b", "spotify:artist:4vpDg7Y7fU982Ds30zawDA", "spotify:artist:0Wxy5Qka8BN9crcFkiAxSR", "spotify:artist:0k17h0D3J5VfsdmQ1iZtE9", "spotify:artist:4wQ3PyMz3WwJGI5uEqHUVR", "spotify:artist:1dfeR4HaWDbWqFHLkxsg1d", "spotify:artist:6PAt558ZEZl0DmdXlnjMgD", "spotify:artist:6DPYiyq5kWVQS4RGwxzPC7", "spotify:artist:4TMHGUX5WI7OOm53PqSDAT", "spotify:artist:450o9jw6AtiQlQkHCdH6Ru", "spotify:artist:7Ey4PD4MYsKc5I2dolUwbH", "spotify:artist:1u7kkVrr14iBvrpYnZILJR", "spotify:artist:2nRbxpnBMMbtMBWH5QdqH2", "spotify:artist:5hW4L92KnC6dX9t7tYM4Ve", "spotify:artist:1zuJe6b1roixEKMOtyrEak", "spotify:artist:2ye2Wgw4gimLv2eAKyk1NB", "spotify:artist:0iOVhN3tnSvgDbcg25JoJb", "spotify:artist:5hW4L92KnC6dX9t7tYM4Ve", "spotify:artist:3jVMgT4X7YeuYE4aludcmE", "spotify:artist:3dkbV4qihUeMsqN4vBGg93", "spotify:artist:1SQRv42e4PjEYfPhS0Tk9E", "spotify:artist:74oJ4qxwOZvX6oSsu1DGnw", "spotify:artist:3RwQ26hR2tJtA8F9p2n7jG", "spotify:artist:4VnomLtKTm9Ahe1tZfmZju", "spotify:artist:5hIClg6noTaCzMu2s5wp4f", "spotify:artist:5NGO30tJxFlKixkPSgXcFE", "spotify:artist:6ra4GIOgCZQZMOaUECftGN", "spotify:artist:711MCceyCBcFnzjGY4Q7Un", "spotify:artist:4Z8W4fKeB5YxbusRsdQVPb", "spotify:artist:1FClsNYBUoNFtGgzeG74dW", "spotify:artist:0ECwFtbIWEVNwjlrfc6xoL", "spotify:artist:0x83OBqixqdCHnStP5VMcn", "spotify:artist:03r4iKL2g2442PT9n2UKsx", "spotify:artist:4BFMTELQyWJU1SwqcXMBm3", "spotify:artist:4Tfb5u63OB7hJFylDlsZTE", "spotify:artist:2BGRfQgtzikz1pzAD0kaEn", "spotify:artist:1FqqOl9itIUpXr4jZPIVoT", "spotify:artist:7dGJo4pcD2V6oG8kP0tJRR", "spotify:artist:3IYUhFvPQItj6xySrBmZkd", "spotify:artist:4EnEZVjo3w1cwcQYePccay", "spotify:artist:0vn7UBvSQECKJm2817Yf1P", "spotify:artist:5M52tdBnJaKSvOpJGz8mfZ", "spotify:artist:1ZwdS5xdxEREPySFridCfh", "spotify:artist:1KA3WXYMPLxomNuoE22LYd", "spotify:artist:0kbYTNQb4Pb1rPbbaF0pT4", "spotify:artist:3nFkdlSjzX9mRTtwJOzDYB", "spotify:artist:2lxX1ivRYp26soIavdG9bX", "spotify:artist:6GI52t8N5F02MxU0g5U69P", "spotify:artist:73sSFVlM6pkweLXE8qw1OS", "spotify:artist:2UZMlIwnkgAEDBsw1Rejkn", "spotify:artist:3qm84nBOXUEQ2vnTfUTTFC", "spotify:artist:2vDV0T8sxx2ENnKXds75e5", "spotify:artist:0X380XXQSNBYuleKzav5UO", "spotify:artist:4MVyzYMgTwdP7Z49wAZHx0", "spotify:artist:1Pe5hlKMCTULjosqZ6KanP", "spotify:artist:3MdG05syQeRYPPcClLaUGl", "spotify:artist:4KWTAlx2RvbpseOGMEmROg", "spotify:artist:3fhOTtm0LBJ3Ojn4hIljLo", "spotify:artist:2AV6XDIs32ofIJhkkDevjm", "spotify:artist:5hIClg6noTaCzMu2s5wp4f", "spotify:artist:2x9SpqnPi8rlE9pjHBwmSC"]

In [91]:
links = list(set(links))

In [None]:
 'Gram Parsons',
 'Miles Davis',
 'Jay-Z',
 'The Yardbirds',
 'Carlos Santana',
 'Ricky Nelson',
 'Tom Petty',
 "Guns N' Roses",
 "Booker T. &amp; the M.G.'s",
 'Nine Inch Nails',
 'Lynyrd Skynyrd',
 'Martha &amp; The Vandellas',
 'Diana Ross and the Supremes',
 'Diana Ross and the Supremes',
 'R.E.M.',
 'Roxy Music',
 'Curtis Mayfield',
 'Curtis Mayfield',
 'Carl Perkins',
 'Talking Heads']
['The Beatles',
 'Bob Dylan',
 'Elvis Presley',
 'The Rolling Stones',
 'Chuck Berry',
 'Jimi Hendrix',
 'James Brown',
 'Little Richard',
 'Aretha Franklin',
 'Ray Charles',
 'Bob Marley',
 'Beach Boys',
 'Buddy Holly',
 'Led Zeppelin',
 'Stevie Wonder',
 'Sam Cooke',
 'Muddy Waters',
 'Marvin Gaye',
 'The Velvet Underground',
 'Bo Diddley',
 'Otis Redding',
 'U2',
 'Bruce Springsteen',
 'Jerry Lee Lewis',
 'Fats Domino',
 'Ramones',
 'Nirvana (band)',
 'Prince (musician)',
 'Prince (musician)',
 'The Clash',
 'The Who',
 'The Clash',
 'Nirvana (band)',
 'Johnny Cash',
 'Smokey Robinson &amp; The Miracles',
 'The Everly Brothers',
 'Neil Young',
 'Michael Jackson',
  'Madonna (entertainer)',
 'Roy Orbison',
 'John Lennon',
 'David Bowie',
 'Simon &amp; Garfunkel',
 'The Doors',
 'Van Morrison',
 'Sly &amp; the Family Stone',
 'Public Enemy (band)',
 'The Byrds',
 'Janis Joplin',
 'Patti Smith',
 'Run-D.M.C.',
 'Elton John',
 'The Band',
 "Howlin' Wolf",
 'Pink Floyd',
 'The Allman Brothers Band',
 'Queen (band)',
 'Eric Clapton',
 'The Allman Brothers Band',
 'Dr. Dre',
 "Howlin' Wolf",
 'Grateful Dead',
 'Eric Clapton',
 'Parliament-Funkadelic',
 'Dr. Dre',
 'Aerosmith',
 'Grateful Dead',
 'Sex Pistols',
 'Parliament-Funkadelic',
 'Louis Jordan',
 'Aerosmith',
 'Joni Mitchell',
 'The Sex Pistols',
 'Tina Turner',
 'Metallica',
 'Etta James',
 'Joni Mitchell',
 'Phil Spector',
 'Tina Turner',
 'The Kinks',
 'Phil Spector',
 'Al Green',
 'The Kinks',
 'Cream (band)',
 'Al Green',
 'The Temptations',
 'Cream (band)',
 'Jackie Wilson',
 'The Temptations',
 'Carl Perkins',
 'Jackie Wilson',
 'The Police',
 'Frank Zappa',
 'AC/DC',
 'Radiohead',
 'Hank Williams',
 'The Eagles',
 'The Shirelles',
 'Beastie Boys',
 'The Stooges',
 'The Four Tops',
 'Elvis Costello',
 'The Drifters',
 'Eminem',
 'Creedence Clearwater Revival',
 'N.W.A.',
 'Eminem',
 'James Taylor',
 'Black Sabbath',
 '2Pac',