# Get a list of species in the Museum of Victoria collection

In [29]:
import requests
from tqdm.auto import tqdm
import pandas as pd

In [7]:
SEARCH_URL = 'https://collections.museumsvictoria.com.au/api/search'

In [27]:
def get_totals(params):
    response = requests.get(SEARCH_URL, params=params)
    # The total results and pages values are in the API response's headers!
    total_results = int(response.headers['Total-Results'])
    total_pages = int(response.headers['Total-Pages'])
    return (total_results, total_pages)

def harvest_species():
    species = []
    params = {
        'query': ' ',
        'recordtype': 'species',
        'sort': 'date',
        'perpage': 100
    }
    total_results, total_pages = get_totals(params)
    for page in tqdm(range(1, total_pages + 1)):
        params['page'] = page
        response = requests.get(SEARCH_URL, params=params)
        for record in response.json():
            taxonomy = record['taxonomy']
            if taxonomy:
                species.append({'id': record['id'], 'taxon_name': taxonomy['taxonName'], 'common_name': taxonomy['commonName']})
    return species

In [28]:
species = harvest_species()

HBox(children=(FloatProgress(value=0.0, max=15.0), HTML(value='')))




In [30]:
df = pd.DataFrame(species)
df.head()

Unnamed: 0,id,taxon_name,common_name
0,species/15849,Nephila edulis,Australian Golden Orb-weaver Spider
1,species/16848,Enypniastes eximia,Swimming sea cucumber
2,species/8608,Cryptocheilus bicolor,Spider Hunting Wasp
3,species/15155,Argiope trifasciata,Banded Garden Spider
4,species/12426,Stanwellia grisea,Melbourne Trapdoor Spider


How many species are recorded in the Museum of Victoria collection?

In [31]:
df.shape

(1408, 3)

Save the list as a CSV file so we can make use of it elsewhere

In [32]:
df.to_csv('museum-victoria-species.csv', index=False)