## Get all artist info

Using Spotify API: https://developer.spotify.com/documentation/web-api/reference/get-an-artist

In [35]:
import requests
import json
import os
from dotenv import load_dotenv
import base64
from tqdm import tqdm

load_dotenv()

True

In [29]:
# Set stuff up with Spotify API
# https://developer.spotify.com/documentation/web-api/tutorials/client-credentials-flow

SPOTIFY_CLIENT_ID = os.environ["SPOTIFY_CLIENT_ID"]
SPOTIFY_CLIENT_SECRET = os.environ["SPOTIFY_CLIENT_SECRET"]

endpoint = "https://accounts.spotify.com/api/token"

headers = {
    "Content-Type": "application/x-www-form-urlencoded"
}
data = {
    "grant_type": "client_credentials",
    "client_id": SPOTIFY_CLIENT_ID,
    "client_secret": SPOTIFY_CLIENT_SECRET
}
res = requests.post(endpoint, data=data, headers=headers)
API_TOKEN = res.json()["access_token"]

## Make 1k requests

In [36]:
endpoint = "https://api.spotify.com/v1/artists"

def get_artist_data(id):
    headers = {
        "Authorization": f"Bearer {API_TOKEN}"
    }
    res = requests.get(f"{endpoint}/{id}", headers=headers)
    return res.json()

In [60]:
with open("./data/1m_artist_freq.json", encoding="utf8") as fin:
    artist_freq_dict = json.load(fin)

with open("./data/name_id_maps.json", encoding="utf8") as fin:
    data = json.load(fin)
    name2id = data["name2id"]
    id2name = data["id2name"]

N = 2500
artist_freq = sorted(artist_freq_dict.items(), key=lambda x: -x[1])
artist_ids = [a[0] for a in artist_freq][:N]

In [61]:
# Hopefully Spotify doesn't ratelimit too harshly
# https://developer.spotify.com/documentation/web-api/concepts/rate-limits
# Should take ~5 min

with open("./data/1m_artist_data.json", encoding="utf8") as fin:
    # TODO: Make this robust to if file doesn't exist
    artist_data = json.load(fin)

for artist_id in tqdm(artist_ids):
    if artist_id in artist_data: continue
    artist_data[artist_id] = get_artist_data(artist_id)

100%|██████████| 2500/2500 [04:25<00:00,  9.40it/s]  


In [62]:
with open("./data/1m_artist_data.json", "w", encoding="utf8") as fout:
    json.dump(artist_data, fout, indent=2)