# BeatSaver API usage and Data Processing notebook

This notebook is for developing code associated with downloading Beat Saber custom songs and mappings from the BeatSaver API. Goals include:

- Systematically download songs, maps, and associated metadata
- Process data
- Save data to database

In [122]:
import numpy as np
import pandas as pd
import librosa
import json
import requests
import pickle
import matplotlib.pyplot as plt

1. Download metadata from all beatsaver records
2. Examine distribution of level difficulties
3. Extract download link for records that have at least 'normal' difficulty level present
4. Save links in a list for iteration

In [17]:
call = requests.get("https://beatsaver.com/api/maps/latest/1")

In [31]:
def get_metadata(limit):
    """Function that will iterate through all pages of beatsaver.com api to download metadata."""
    page = 1
    metadata = []
    call = requests.get(f"https://beatsaver.com/api/maps/latest/{page}").json()
    for x in range(len(call['docs'])):
        metadata.append(call['docs'][x])
    while call['nextPage'] != 'None' and call['nextPage'] < limit:
        page = call['nextPage']
        call = requests.get(f"https://beatsaver.com/api/maps/latest/{page}").json()
        for x in range(len(call['docs'])):
            metadata.append(call['docs'][x])        
    return metadata

In [35]:
def api_call(page):
    """Downloads json file from beatsaver.com API at given 'page'. Only downloads if status is 200, otherwise throws
    an error. Each page in this API stores 10 records, as well as information about the page."""
    call = requests.get(f"https://beatsaver.com/api/maps/latest/{page}")
    if call.status_code == 200:
        json = call.json()
        next_page = json['nextPage']
        return json, next_page
    else:
        print(f"API call failed with error code: {call.status_code} at page: {page}")

In [115]:
def get_metadata_recursive(limit, init_page = 1):
    """Function that will iterate through all pages of beatsaver.com api to download metadata. 'limit' is the maximum
    page number to download to, 'init_page' is the first page number to start downloading from. There seems to be a 
    1000-call limit on this API."""
    page = init_page
    metadata = []
    json, next_page = api_call(page)
    for x in range(len(json['docs'])):
        metadata.append(json['docs'][x])
    while next_page < limit:
        page = next_page
        json, next_page = api_call(page)
        for x in range(len(json['docs'])):
            metadata.append(json['docs'][x])
        if next_page == 'None' or next_page == None:
            break
    return metadata    

In [110]:
metadata = get_metadata_recursive(limit = 1000)

In [111]:
metadata2 = get_metadata_recursive(limit = 2000, init_page = 1001)

In [116]:
metadata3 = get_metadata_recursive(limit = 3000, init_page = 2001)

In [119]:
metadata_total = []
for x in list([metadata, metadata2, metadata3]):
    for y in x:
        metadata_total.append(y)

In [120]:
len(metadata_total)

20605

In [124]:
with open('metadata.pkl', 'wb') as f:
    pickle.dump(metadata_total, f)

In [94]:
def difficulty_dist(metadata):
    """Counts the number of representatives for each difficulty level in the list of metadata."""
    difficulty = {'easy': 0, 'normal': 0, 'hard': 0, 'expert': 0, 'expertPlus': 0}
    for x in metadata:
        for level, value in list(x['metadata']['difficulties'].items()):
            if x['metadata']['difficulties'][level] == True:
                difficulty[f'{level}'] +=1
    return difficulty    

In [126]:
difficulty_dist(metadata_total)

{'easy': 2003,
 'normal': 2860,
 'hard': 6137,
 'expert': 11026,
 'expertPlus': 8568}