

---


# Freesound audio corpus generation with geotags


---




**Resources:**


*   [Freesound API search parameters](https://freesound.org/docs/api/resources_apiv2.html#request-parameters-text-search-parameters) (information about tags, filters etc.)
*   [Freesound API key generation](https://freesound.org/home/login/?next=/apiv2/apply/) (requires Freesound account)
* [pycountry documentation](https://pypi.org/project/pycountry/)
* [reverse geocoder documentation](https://github.com/thampiman/reverse-geocoder)



---








# Necessary pip installs


---



In [1]:
!pip install reverse_geocoder
!pip install pycountry

Collecting reverse_geocoder
  Using cached reverse_geocoder-1.5.1-py3-none-any.whl
Installing collected packages: reverse_geocoder
Successfully installed reverse_geocoder-1.5.1
Collecting pycountry
  Using cached pycountry-24.6.1-py3-none-any.whl.metadata (12 kB)
Using cached pycountry-24.6.1-py3-none-any.whl (6.3 MB)
Installing collected packages: pycountry
Successfully installed pycountry-24.6.1


In [2]:
!pip install git+https://github.com/MTG/freesound-python

Collecting git+https://github.com/MTG/freesound-python
  Cloning https://github.com/MTG/freesound-python to /tmp/pip-req-build-k959qrja
  Running command git clone --filter=blob:none --quiet https://github.com/MTG/freesound-python /tmp/pip-req-build-k959qrja
  Resolved https://github.com/MTG/freesound-python to commit 5be99a3689d17303c01cb122bbb0d5a96eba04f6
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Building wheels for collected packages: freesound-python
  Building wheel for freesound-python (pyproject.toml) ... [?25ldone
[?25h  Created wheel for freesound-python: filename=freesound_python-1.1-py3-none-any.whl size=6729 sha256=e5941251893da3ee8be506eccbf854679e1201c549142c79f16e3ff0e6b99da3
  Stored in directory: /tmp/pip-ephem-wheel-cache-k5g1foe3/wheels/dc/c8/27/88211ef4ed95f2a550e37a5391838b1351782509677b1545af
Successfully built freesound-python
Installing c

In [10]:
import os
import pandas as pd
import numpy as np
import freesound
import json
from IPython.display import display


FREESOUND_API_KEY = 'fvtN2EDKCruugeJttir9hVtuToQQFhK03cUO8cdy' #[YOUR API KEY]

freesound_client = freesound.FreesoundClient()
freesound_client.set_token(FREESOUND_API_KEY)

In [51]:
%rm -rf /content/geotagged_sounds



---


# Create geotagged_sounds audio file directory


---


Audio file directory contains subdirectories with country names. Every country subdirectory contains files with the naming convention *Country_ID.mp3*

In [11]:
import requests
import os

# Country imports
import reverse_geocoder as rg
import pycountry

# NOTE: assumes that geotag is passed as string "x y" (Freesound API geotag format)
def get_country_by_geotag(geotag):
  # Format geotag to tuple
  formatted_geotag = geotag.split()

  # Retrieve country code for formatted geotag
  location = rg.search(formatted_geotag)
  cc = location[0].get('cc')

  # Get country name for geotag
  country_name = pycountry.countries.get(alpha_2=cc).name

  return country_name

# country should be string, id should be int
def save_audio(audio_url, id, country):
    base_directory = "sounds"  # Base directory to store audio files

    os.makedirs(base_directory, exist_ok=True)
    # Download sound from URL provided by Freesound
    audio = requests.get(audio_url)

    # Define file name for audio
    file_path = os.path.join(base_directory, str(id)+".mp3")

    # Write audio
    with open(file_path, "wb") as file:
      file.write(audio.content)



def get_geotagged_sounds(num_samples=100):
  # TO ADD: parameters to the function (e.g. url, number of sounds, fields etc.)
  # NOTE: multiple filter criterion (in params): filter1:value%20filter2:value%20 etc...

  url = "https://freesound.org/apiv2/search/text/"
  params = {
      "filter": "is_geotagged:1%20duration: [1 TO 60]",
      "fields": "geotag,tags,id,previews",
      "token": FREESOUND_API_KEY,
      'query': 'instruments',
  }

  response = requests.get(url, params=params)
  json_response = response.json()

  geotag_sound_ids = []
  count = 0

  '''
  NOTE: JSON response contains key 'next', which is a URL for the next page
  which contains the queried sounds. There seem to be 150 sounds per page, so
  every time there is more than 150 sounds to be retrieved, 'next' is not None
  '''
  data = dict()
  while json_response.get('next') is not None and count <= num_samples:

    # Iterate over all retrieved sounds (dictionary)
    for i in range (len(json_response['results'])):

      # Get sound preview URL
      # NOTE: .mp3 is retrieved. For .ogg, pass preview-hq-ogg
      audio_url = json_response['results'][i].get('previews')['preview-hq-mp3']

      # Get sound ID
      id = json_response['results'][i].get('id')
      geotag_sound_ids.append(id)

      # Get country of sound from Freesound geotag
      geotag = json_response['results'][i].get('geotag')
      file_path = str(id)+".mp3"

        
      #print(geotag)
      country = get_country_by_geotag(geotag)

      # Save audio file
      save_audio(audio_url,id,country)

      data[file_path] = geotag
      #print(data)
      count += 1
      #if count%100 == 0:
        #print(str(count)+" sound IDs retrieved")


    #print(json_response['next']) #Prints URL containing next audio files
    response = requests.get(url=json_response['next'],params=params)
    json_response = response.json()

  with open('data.json', 'w') as f:
      json.dump(data, f, indent=4)
  return geotag_sound_ids,count



---


# Run 'main'


---



In [None]:
geotag_sound_ids,count = get_geotagged_sounds(200)



---


# Code examples


---



### Retrieving country name from coordinates for some Freesound sample

In [47]:
import reverse_geocoder as rg
import pycountry
import requests


url = "https://freesound.org/apiv2/sounds/344165/"
params = {
    
    "fields": "geotag,tags,id,previews",
    "token": FREESOUND_API_KEY
}

# Get JSON response from Freesound
response = requests.get(url, params=params)
json_response = response.json()
# print(json_response)

# Get audio URL from Freesound sample
audio_url = json_response['previews']['preview-hq-mp3']
audio = requests.get(audio_url) #audio.content contains the mp3 file

# Get geotag coordinates from Freesound sample
coord = json_response.get('geotag')
coord_tuple = coord.split() #converts from 'x y' to (x,y)

# reverse_geocoder
location = rg.search(coord_tuple)
cc = location[0].get('cc')

# pycountry
country_name = pycountry.countries.get(alpha_2=cc).name

print('Country name: '+country_name)

Country name: United States
