

---


# Freesound audio corpus generation with geotags


> This notebook contains code for downloading sounds and geotag collection from freesound.
> The file paths to audio and its corresponding geotag will be saved to `mappings.json`

---




In [1]:
import os
import json
import requests
import numpy as np
import pandas as pd
import freesound
from dotenv import load_dotenv

### Freesound API

> Create `.env` file `FREESOUND_API_KEY=your_api_key`

In [2]:
load_dotenv()

FREESOUND_API_KEY = os.getenv('FREESOUND_API_KEY')

freesound_client = freesound.FreesoundClient()
freesound_client.set_token(FREESOUND_API_KEY)

## Download dataset

In [3]:
def save_audio(audio_url, file_name):
    base_directory = "../corpus/audio"
    os.makedirs(base_directory, exist_ok=True)
    
    # Download sound from freesound
    audio = requests.get(audio_url)

    # Define file name for audio
    file_path = os.path.join(base_directory, file_name)

    # Write audio
    with open(file_path, "wb") as file:
      file.write(audio.content)

In [4]:
def download_corpus(num_samples=100):
    url = "https://freesound.org/apiv2/search/text/"
    params = {
        "filter": "is_geotagged:1%20duration: [1 TO 60]",
        "fields": "geotag,tags,id,previews",
        "token": FREESOUND_API_KEY,
        'query': 'instruments',
    }
    
    response = requests.get(url, params=params)
    json_response = response.json()
    
    mappings = dict()
    count = 0
    while json_response.get('next') is not None and count <= num_samples:
    
        # Iterate over all retrieved sounds (dictionary)
        for i in range (len(json_response['results'])):
            audio_url = json_response['results'][i].get('previews')['preview-hq-mp3']
            
            # Get sound ID and create filename
            sound_id = json_response['results'][i].get('id')
            file_name = str(sound_id) + ".mp3"
            
            # Get freesound geotag
            geotag = json_response['results'][i].get('geotag')
            
            # Save audio file
            save_audio(audio_url, file_name)

            # Add filename: geotag to mappings
            mappings[file_name] = geotag

            # Update count
            count += 1
        
        response = requests.get(url=json_response['next'],params=params)
        json_response = response.json()
    
    with open('../corpus/mappings.json', 'w') as f:
        json.dump(mappings, f, indent=4)

In [5]:
download_corpus(200)