<a href="https://colab.research.google.com/github/yakuzadave/colab-ai-art-gen/blob/main/essentia_classifier_enhanced.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Essentia Music Classifier

This notebook is designed to use Essentia to help classify the WAV files that I am working with to create an audio dataset for later work with Meta Musicgen fine-tuning and training.

## Google Drive Integration
For my ease of use and file persistence, I am using the Google Colab ability to mount Google Drive as a filesystem.  If you end up making a copy of this notebook, make sure to update the paths to reflect your own folder structure.



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Module Installs

In [None]:
# Adjust this path to your Google Drive Location if you would like to save there
# !git clone clone git clone https://github.com/MTG/essentia.git to /content/drive/MyDrive/AI/essentia_music_repo

In [None]:
%%capture module_install_logs
!sudo apt-get install build-essential libeigen3-dev libyaml-dev libfftw3-dev libtag1-dev libchromaprint-dev

!pip install -U essentia-tensorflow pydub

In [None]:
%%capture weight_install_logs
# download weights to the default location
# !curl https://essentia.upf.edu/models/classification-heads/genre_discogs400/genre_discogs400-discogs-effnet-1.pb --output genre_discogs400-discogs-effnet-1.pb
# !curl https://essentia.upf.edu/models/feature-extractors/discogs-effnet/discogs-effnet-bs64-1.pb --output discogs-effnet-bs64-1.pb
# !curl https://essentia.upf.edu/models/classification-heads/mtg_jamendo_moodtheme/mtg_jamendo_moodtheme-discogs-effnet-1.pb --output mtg_jamendo_moodtheme-discogs-effnet-1.pb
# !curl https://essentia.upf.edu/models/classification-heads/mtg_jamendo_instrument/mtg_jamendo_instrument-discogs-effnet-1.pb --output mtg_jamendo_instrument-discogs-effnet-1.pb

# # uncomment if you would like to copy the weights to Google Drive instead
# !curl https://essentia.upf.edu/models/classification-heads/genre_discogs400/genre_discogs400-discogs-effnet-1.pb --output /content/drive/MyDrive/AI/essentia_music_models/genre_discogs400-discogs-effnet-1.pb
# !curl https://essentia.upf.edu/models/feature-extractors/discogs-effnet/discogs-effnet-bs64-1.pb --output /content/drive/MyDrive/AI/essentia_music_models/discogs-effnet-bs64-1.pb
# !curl https://essentia.upf.edu/models/classification-heads/mtg_jamendo_moodtheme/mtg_jamendo_moodtheme-discogs-effnet-1.pb --output /content/drive/MyDrive/AI/essentia_music_models/mtg_jamendo_moodtheme-discogs-effnet-1.pb
# !curl https://essentia.upf.edu/models/classification-heads/mtg_jamendo_instrument/mtg_jamendo_instrument-discogs-effnet-1.pb --output /content/drive/MyDrive/AI/essentia_music_models/mtg_jamendo_instrument-discogs-effnet-1.pb




In [None]:
#@title Function to Get Audio Files (filtered to only those longer than 60 seconds)
import os
from pydub import AudioSegment

def get_long_audio_files(directory, min_duration=60):
    """
    Retrieves paths of .mp3 and .wav files longer than min_duration seconds from the specified directory and its subdirectories.

    Parameters:
    directory (str): The root directory to search within.
    min_duration (int): Minimum duration in seconds to filter audio files.

    Returns:
    list: A list of file paths for audio files exceeding the specified duration.
    """
    long_audio_files = []
    for root, _, files in os.walk(directory):
        for file in files:
            if file.lower().endswith(('.mp3', '.wav')):
                file_path = os.path.join(root, file)
                try:
                    audio = AudioSegment.from_file(file_path)
                    duration = len(audio) / 1000  # Duration in seconds
                    if duration > min_duration:
                        long_audio_files.append(file_path)
                except Exception as e:
                    print(f"Error processing {file_path}: {e}")
    return long_audio_files



In [None]:
#@title Example usage:
directory_path = '/content/drive/MyDrive/Audio'
long_audio_files = get_long_audio_files(directory_path)
for file in long_audio_files:
    print(file)


In [None]:
#@title Music Genre Labels

#@markdown Next we define the music genre labels that we will be using.
genre_labels = [
    "Blues---Boogie Woogie",
    "Blues---Chicago Blues",
    "Blues---Country Blues",
    "Blues---Delta Blues",
    "Blues---Electric Blues",
    "Blues---Harmonica Blues",
    "Blues---Jump Blues",
    "Blues---Louisiana Blues",
    "Blues---Modern Electric Blues",
    "Blues---Piano Blues",
    "Blues---Rhythm & Blues",
    "Blues---Texas Blues",
    "Brass & Military---Brass Band",
    "Brass & Military---Marches",
    "Brass & Military---Military",
    "Children's---Educational",
    "Children's---Nursery Rhymes",
    "Children's---Story",
    "Classical---Baroque",
    "Classical---Choral",
    "Classical---Classical",
    "Classical---Contemporary",
    "Classical---Impressionist",
    "Classical---Medieval",
    "Classical---Modern",
    "Classical---Neo-Classical",
    "Classical---Neo-Romantic",
    "Classical---Opera",
    "Classical---Post-Modern",
    "Classical---Renaissance",
    "Classical---Romantic",
    "Electronic---Abstract",
    "Electronic---Acid",
    "Electronic---Acid House",
    "Electronic---Acid Jazz",
    "Electronic---Ambient",
    "Electronic---Bassline",
    "Electronic---Beatdown",
    "Electronic---Berlin-School",
    "Electronic---Big Beat",
    "Electronic---Bleep",
    "Electronic---Breakbeat",
    "Electronic---Breakcore",
    "Electronic---Breaks",
    "Electronic---Broken Beat",
    "Electronic---Chillwave",
    "Electronic---Chiptune",
    "Electronic---Dance-pop",
    "Electronic---Dark Ambient",
    "Electronic---Darkwave",
    "Electronic---Deep House",
    "Electronic---Deep Techno",
    "Electronic---Disco",
    "Electronic---Disco Polo",
    "Electronic---Donk",
    "Electronic---Downtempo",
    "Electronic---Drone",
    "Electronic---Drum n Bass",
    "Electronic---Dub",
    "Electronic---Dub Techno",
    "Electronic---Dubstep",
    "Electronic---Dungeon Synth",
    "Electronic---EBM",
    "Electronic---Electro",
    "Electronic---Electro House",
    "Electronic---Electroclash",
    "Electronic---Euro House",
    "Electronic---Euro-Disco",
    "Electronic---Eurobeat",
    "Electronic---Eurodance",
    "Electronic---Experimental",
    "Electronic---Freestyle",
    "Electronic---Future Jazz",
    "Electronic---Gabber",
    "Electronic---Garage House",
    "Electronic---Ghetto",
    "Electronic---Ghetto House",
    "Electronic---Glitch",
    "Electronic---Goa Trance",
    "Electronic---Grime",
    "Electronic---Halftime",
    "Electronic---Hands Up",
    "Electronic---Happy Hardcore",
    "Electronic---Hard House",
    "Electronic---Hard Techno",
    "Electronic---Hard Trance",
    "Electronic---Hardcore",
    "Electronic---Hardstyle",
    "Electronic---Hi NRG",
    "Electronic---Hip Hop",
    "Electronic---Hip-House",
    "Electronic---House",
    "Electronic---IDM",
    "Electronic---Illbient",
    "Electronic---Industrial",
    "Electronic---Italo House",
    "Electronic---Italo-Disco",
    "Electronic---Italodance",
    "Electronic---Jazzdance",
    "Electronic---Juke",
    "Electronic---Jumpstyle",
    "Electronic---Jungle",
    "Electronic---Latin",
    "Electronic---Leftfield",
    "Electronic---Makina",
    "Electronic---Minimal",
    "Electronic---Minimal Techno",
    "Electronic---Modern Classical",
    "Electronic---Musique Concrète",
    "Electronic---Neofolk",
    "Electronic---New Age",
    "Electronic---New Beat",
    "Electronic---New Wave",
    "Electronic---Noise",
    "Electronic---Nu-Disco",
    "Electronic---Power Electronics",
    "Electronic---Progressive Breaks",
    "Electronic---Progressive House",
    "Electronic---Progressive Trance",
    "Electronic---Psy-Trance",
    "Electronic---Rhythmic Noise",
    "Electronic---Schranz",
    "Electronic---Sound Collage",
    "Electronic---Speed Garage",
    "Electronic---Speedcore",
    "Electronic---Synth-pop",
    "Electronic---Synthwave",
    "Electronic---Tech House",
    "Electronic---Tech Trance",
    "Electronic---Techno",
    "Electronic---Trance",
    "Electronic---Tribal",
    "Electronic---Tribal House",
    "Electronic---Trip Hop",
    "Electronic---Tropical House",
    "Electronic---UK Garage",
    "Electronic---Vaporwave",
    "Folk, World, & Country---African",
    "Folk, World, & Country---Bluegrass",
    "Folk, World, & Country---Cajun",
    "Folk, World, & Country---Canzone Napoletana",
    "Folk, World, & Country---Catalan Music",
    "Folk, World, & Country---Celtic",
    "Folk, World, & Country---Country",
    "Folk, World, & Country---Fado",
    "Folk, World, & Country---Flamenco",
    "Folk, World, & Country---Folk",
    "Folk, World, & Country---Gospel",
    "Folk, World, & Country---Highlife",
    "Folk, World, & Country---Hillbilly",
    "Folk, World, & Country---Hindustani",
    "Folk, World, & Country---Honky Tonk",
    "Folk, World, & Country---Indian Classical",
    "Folk, World, & Country---Laïkó",
    "Folk, World, & Country---Nordic",
    "Folk, World, & Country---Pacific",
    "Folk, World, & Country---Polka",
    "Folk, World, & Country---Raï",
    "Folk, World, & Country---Romani",
    "Folk, World, & Country---Soukous",
    "Folk, World, & Country---Séga",
    "Folk, World, & Country---Volksmusik",
    "Folk, World, & Country---Zouk",
    "Folk, World, & Country---Éntekhno",
    "Funk / Soul---Afrobeat",
    "Funk / Soul---Boogie",
    "Funk / Soul---Contemporary R&B",
    "Funk / Soul---Disco",
    "Funk / Soul---Free Funk",
    "Funk / Soul---Funk",
    "Funk / Soul---Gospel",
    "Funk / Soul---Neo Soul",
    "Funk / Soul---New Jack Swing",
    "Funk / Soul---P.Funk",
    "Funk / Soul---Psychedelic",
    "Funk / Soul---Rhythm & Blues",
    "Funk / Soul---Soul",
    "Funk / Soul---Swingbeat",
    "Funk / Soul---UK Street Soul",
    "Hip Hop---Bass Music",
    "Hip Hop---Boom Bap",
    "Hip Hop---Bounce",
    "Hip Hop---Britcore",
    "Hip Hop---Cloud Rap",
    "Hip Hop---Conscious",
    "Hip Hop---Crunk",
    "Hip Hop---Cut-up/DJ",
    "Hip Hop---DJ Battle Tool",
    "Hip Hop---Electro",
    "Hip Hop---G-Funk",
    "Hip Hop---Gangsta",
    "Hip Hop---Grime",
    "Hip Hop---Hardcore Hip-Hop",
    "Hip Hop---Horrorcore",
    "Hip Hop---Instrumental",
    "Hip Hop---Jazzy Hip-Hop",
    "Hip Hop---Miami Bass",
    "Hip Hop---Pop Rap",
    "Hip Hop---Ragga HipHop",
    "Hip Hop---RnB/Swing",
    "Hip Hop---Screw",
    "Hip Hop---Thug Rap",
    "Hip Hop---Trap",
    "Hip Hop---Trip Hop",
    "Hip Hop---Turntablism",
    "Jazz---Afro-Cuban Jazz",
    "Jazz---Afrobeat",
    "Jazz---Avant-garde Jazz",
    "Jazz---Big Band",
    "Jazz---Bop",
    "Jazz---Bossa Nova",
    "Jazz---Contemporary Jazz",
    "Jazz---Cool Jazz",
    "Jazz---Dixieland",
    "Jazz---Easy Listening",
    "Jazz---Free Improvisation",
    "Jazz---Free Jazz",
    "Jazz---Fusion",
    "Jazz---Gypsy Jazz",
    "Jazz---Hard Bop",
    "Jazz---Jazz-Funk",
    "Jazz---Jazz-Rock",
    "Jazz---Latin Jazz",
    "Jazz---Modal",
    "Jazz---Post Bop",
    "Jazz---Ragtime",
    "Jazz---Smooth Jazz",
    "Jazz---Soul-Jazz",
    "Jazz---Space-Age",
    "Jazz---Swing",
    "Latin---Afro-Cuban",
    "Latin---Baião",
    "Latin---Batucada",
    "Latin---Beguine",
    "Latin---Bolero",
    "Latin---Boogaloo",
    "Latin---Bossanova",
    "Latin---Cha-Cha",
    "Latin---Charanga",
    "Latin---Compas",
    "Latin---Cubano",
    "Latin---Cumbia",
    "Latin---Descarga",
    "Latin---Forró",
    "Latin---Guaguancó",
    "Latin---Guajira",
    "Latin---Guaracha",
    "Latin---MPB",
    "Latin---Mambo",
    "Latin---Mariachi",
    "Latin---Merengue",
    "Latin---Norteño",
    "Latin---Nueva Cancion",
    "Latin---Pachanga",
    "Latin---Porro",
    "Latin---Ranchera",
    "Latin---Reggaeton",
    "Latin---Rumba",
    "Latin---Salsa",
    "Latin---Samba",
    "Latin---Son",
    "Latin---Son Montuno",
    "Latin---Tango",
    "Latin---Tejano",
    "Latin---Vallenato",
    "Non-Music---Audiobook",
    "Non-Music---Comedy",
    "Non-Music---Dialogue",
    "Non-Music---Education",
    "Non-Music---Field Recording",
    "Non-Music---Interview",
    "Non-Music---Monolog",
    "Non-Music---Poetry",
    "Non-Music---Political",
    "Non-Music---Promotional",
    "Non-Music---Radioplay",
    "Non-Music---Religious",
    "Non-Music---Spoken Word",
    "Pop---Ballad",
    "Pop---Bollywood",
    "Pop---Bubblegum",
    "Pop---Chanson",
    "Pop---City Pop",
    "Pop---Europop",
    "Pop---Indie Pop",
    "Pop---J-pop",
    "Pop---K-pop",
    "Pop---Kayōkyoku",
    "Pop---Light Music",
    "Pop---Music Hall",
    "Pop---Novelty",
    "Pop---Parody",
    "Pop---Schlager",
    "Pop---Vocal",
    "Reggae---Calypso",
    "Reggae---Dancehall",
    "Reggae---Dub",
    "Reggae---Lovers Rock",
    "Reggae---Ragga",
    "Reggae---Reggae",
    "Reggae---Reggae-Pop",
    "Reggae---Rocksteady",
    "Reggae---Roots Reggae",
    "Reggae---Ska",
    "Reggae---Soca",
    "Rock---AOR",
    "Rock---Acid Rock",
    "Rock---Acoustic",
    "Rock---Alternative Rock",
    "Rock---Arena Rock",
    "Rock---Art Rock",
    "Rock---Atmospheric Black Metal",
    "Rock---Avantgarde",
    "Rock---Beat",
    "Rock---Black Metal",
    "Rock---Blues Rock",
    "Rock---Brit Pop",
    "Rock---Classic Rock",
    "Rock---Coldwave",
    "Rock---Country Rock",
    "Rock---Crust",
    "Rock---Death Metal",
    "Rock---Deathcore",
    "Rock---Deathrock",
    "Rock---Depressive Black Metal",
    "Rock---Doo Wop",
    "Rock---Doom Metal",
    "Rock---Dream Pop",
    "Rock---Emo",
    "Rock---Ethereal",
    "Rock---Experimental",
    "Rock---Folk Metal",
    "Rock---Folk Rock",
    "Rock---Funeral Doom Metal",
    "Rock---Funk Metal",
    "Rock---Garage Rock",
    "Rock---Glam",
    "Rock---Goregrind",
    "Rock---Goth Rock",
    "Rock---Gothic Metal",
    "Rock---Grindcore",
    "Rock---Grunge",
    "Rock---Hard Rock",
    "Rock---Hardcore",
    "Rock---Heavy Metal",
    "Rock---Indie Rock",
    "Rock---Industrial",
    "Rock---Krautrock",
    "Rock---Lo-Fi",
    "Rock---Lounge",
    "Rock---Math Rock",
    "Rock---Melodic Death Metal",
    "Rock---Melodic Hardcore",
    "Rock---Metalcore",
    "Rock---Mod",
    "Rock---Neofolk",
    "Rock---New Wave",
    "Rock---No Wave",
    "Rock---Noise",
    "Rock---Noisecore",
    "Rock---Nu Metal",
    "Rock---Oi",
    "Rock---Parody",
    "Rock---Pop Punk",
    "Rock---Pop Rock",
    "Rock---Pornogrind",
    "Rock---Post Rock",
    "Rock---Post-Hardcore",
    "Rock---Post-Metal",
    "Rock---Post-Punk",
    "Rock---Power Metal",
    "Rock---Power Pop",
    "Rock---Power Violence",
    "Rock---Prog Rock",
    "Rock---Progressive Metal",
    "Rock---Psychedelic Rock",
    "Rock---Psychobilly",
    "Rock---Pub Rock",
    "Rock---Punk",
    "Rock---Rock & Roll",
    "Rock---Rockabilly",
    "Rock---Shoegaze",
    "Rock---Ska",
    "Rock---Sludge Metal",
    "Rock---Soft Rock",
    "Rock---Southern Rock",
    "Rock---Space Rock",
    "Rock---Speed Metal",
    "Rock---Stoner Rock",
    "Rock---Surf",
    "Rock---Symphonic Rock",
    "Rock---Technical Death Metal",
    "Rock---Thrash",
    "Rock---Twist",
    "Rock---Viking Metal",
    "Rock---Yé-Yé",
    "Stage & Screen---Musical",
    "Stage & Screen---Score",
    "Stage & Screen---Soundtrack",
    "Stage & Screen---Theme",
]





## Auth to Google

In [None]:

from google.colab import auth
auth.authenticate_user()

import gspread
from google.auth import default
creds, _ = default()

gc = gspread.authorize(creds)

### Adding the Music Genres to a Google Sheet

In [None]:
sh = gc.open('genre_labels')

# if you need to create the sheet, use this instead:
# sh = gc.create('genre_labels')

In [None]:
genre_worksheet = sh.sheet1

In [None]:
# update the genre_worksheet
cell_list = genre_worksheet.range('A1:A{}'.format(len(genre_labels)))

for i, val in enumerate(genre_labels):
  cell_list[i].value = val

genre_worksheet.update_cells(cell_list)
print("Google Sheet created with genre labels")

Google Sheet created with genre labels


In [None]:
genre_worksheet.get_all_records()

In [None]:
# @title Mood and Theme Classes
#@markdown Next we will be creating our mood and theme classes we are using for our music.
mood_theme_classes = [
    "action",
    "adventure",
    "advertising",
    "background",
    "ballad",
    "calm",
    "children",
    "christmas",
    "commercial",
    "cool",
    "corporate",
    "dark",
    "deep",
    "documentary",
    "drama",
    "dramatic",
    "dream",
    "emotional",
    "energetic",
    "epic",
    "fast",
    "film",
    "fun",
    "funny",
    "game",
    "groovy",
    "happy",
    "heavy",
    "holiday",
    "hopeful",
    "inspiring",
    "love",
    "meditative",
    "melancholic",
    "melodic",
    "motivational",
    "movie",
    "nature",
    "party",
    "positive",
    "powerful",
    "relaxing",
    "retro",
    "romantic",
    "sad",
    "sexy",
    "slow",
    "soft",
    "soundscape",
    "space",
    "sport",
    "summer",
    "trailer",
    "travel",
    "upbeat",
    "uplifting"
]

In [None]:
# prompt: Let's add another sheet to 'sh' called 'moods_and_themes' and add a header at A1 called 'moods_and_themes' and then adding all values from 'mood_theme_classes' as rows

# Add a new sheet if needed
# worksheet_mood_theme = sh.add_worksheet(title='moods_and_themes', rows="1000", cols="20")
worksheet_mood_theme = sh.worksheet('moods_and_themes')

# Update the header
worksheet_mood_theme.update_acell('A1', 'moods_and_themes')

# Add the mood_theme_classes as rows starting from A2
cell_list = worksheet_mood_theme.range(f'A2:A{len(mood_theme_classes)+1}')
for i, val in enumerate(mood_theme_classes):
  cell_list[i].value = val

worksheet_mood_theme.update_cells(cell_list)

print("Mood and Theme labels added to a new worksheet")

Mood and Theme labels added to a new worksheet


In [None]:
# @title instrument classes
#@markdown Next we add in our instument classes for our music.
instrument_classes = [
    "accordion",
    "acousticbassguitar",
    "acousticguitar",
    "bass",
    "beat",
    "bell",
    "bongo",
    "brass",
    "cello",
    "clarinet",
    "classicalguitar",
    "computer",
    "doublebass",
    "drummachine",
    "drums",
    "electricguitar",
    "electricpiano",
    "flute",
    "guitar",
    "harmonica",
    "harp",
    "horn",
    "keyboard",
    "oboe",
    "orchestra",
    "organ",
    "pad",
    "percussion",
    "piano",
    "pipeorgan",
    "rhodes",
    "sampler",
    "saxophone",
    "strings",
    "synthesizer",
    "trombone",
    "trumpet",
    "viola",
    "violin",
    "voice"
]

In [None]:
# prompt: Let's add another sheet to 'sh' called 'instrument_classes' and add a header at A1 called 'instrument_classes' and then adding all values from 'instrument_classes' as rows

# Add a new sheet
worksheet_instrument_classes = sh.add_worksheet(title='instrument_classes', rows="1000", cols="20")

# Update the header
worksheet_instrument_classes.update_acell('A1', 'instrument_classes')

# Add the instrument_classes as rows starting from A2
cell_list = worksheet_instrument_classes.range(f'A2:A{len(instrument_classes)+1}')
for i, val in enumerate(instrument_classes):
  cell_list[i].value = val

worksheet_instrument_classes.update_cells(cell_list)

print("Instrument classes added to a new worksheet")

Instrument classes added to a new worksheet


In [None]:
# prompt: # prompt: Let's add another sheet to 'sh' called 'music_tagging' and add a header at A1 called 'song_name' and a header at B1 called 'genres' and a header at C1 called 'moods_and_themes' and then a header at D1 called 'instruments'

# Add a new sheet
worksheet_music_tagging = sh.add_worksheet(title='music_tagging', rows="1000", cols="20")

# Update the headers
worksheet_music_tagging.update_acell('A1', 'song_name')
worksheet_music_tagging.update_acell('B1', 'genres')
worksheet_music_tagging.update_acell('C1', 'moods_and_themes')
worksheet_music_tagging.update_acell('D1', 'instruments')

{'spreadsheetId': '1lh2NJ8p-7Lp0ZUsVAHsNg1ELgcHs0WYABeTooQA5WCw',
 'updatedRange': 'music_tagging!D1',
 'updatedRows': 1,
 'updatedColumns': 1,
 'updatedCells': 1}

In [None]:
worksheet_music_tagging = sh.get_worksheet(3)
worksheet_music_tagging.get_all_records()

[{'song_name': '/content/Ghost of Tomorrow_1.0.0.wav',
  'genres': 'Electronic---Synthwave_(0.39022335410118103), Electronic---Synth-pop_(0.1479136347770691), Electronic---Darkwave_(0.11130209267139435)',
  'moods_and_themes': 'energetic_(0.17382456362247467), melodic_(0.08527339994907379), love_(0.08090018481016159), happy_(0.07108449935913086), motivational_(0.0566890686750412), uplifting_(0.05144369974732399), space_(0.05130566656589508)',
  'instruments': 'guitar_(0.37892022728919983), synthesizer_(0.3507300913333893), drums_(0.2975122630596161), piano_(0.2644599378108978), bass_(0.233682781457901), electricguitar_(0.16659238934516907), keyboard_(0.11333607882261276)'},
 {'song_name': '/content/Ghost of Tomorrow_1.0.0.wav',
  'genres': 'Electronic---Synthwave_(0.39022335410118103), Electronic---Synth-pop_(0.1479136347770691), Electronic---Darkwave_(0.11130209267139435)',
  'moods_and_themes': 'energetic_(0.17382456362247467), melodic_(0.08527339994907379), love_(0.08090018481016159

In [None]:
from essentia.standard import MonoLoader, TensorflowPredictEffnetDiscogs, TensorflowPredict2D
import numpy as np

# audio_filename = "/content/gh110mix.mp3"
audio_filename = "/content/Ghost of Tomorrow_1.0.0.wav"

audio = MonoLoader(filename=audio_filename, sampleRate=16000, resampleQuality=4)()
embedding_model = TensorflowPredictEffnetDiscogs(graphFilename="discogs-effnet-bs64-1.pb", output="PartitionedCall:1")
embeddings = embedding_model(audio)

# Function for filter predictions for our music
def filter_predictions(predictions, class_list, threshold=0.1):
    predictions_mean = np.mean(predictions, axis=0)
    sorted_indices = np.argsort(predictions_mean)[::-1]
    filtered_indices = [i for i in sorted_indices if predictions_mean[i] > threshold]
    filtered_labels = [class_list[i] for i in filtered_indices]
    filtered_values = [predictions_mean[i] for i in filtered_indices]
    return filtered_labels, filtered_values


# We will be adding audio_filename, genres, mood_and_theme_classes, and instuments as a row to worksheet_music_tagging

# predict genres
model = TensorflowPredict2D(graphFilename="genre_discogs400-discogs-effnet-1.pb", input="serving_default_model_Placeholder", output="PartitionedCall:0")
predictions = model(embeddings)
filtered_labels, filtered_values = filter_predictions(predictions, genre_labels)

filtered_labels_genre = []

print("Genres:")
for label, value in zip(filtered_labels, filtered_values):
    print(f"{label} ({value})")

    # respace spaces and add to filtered_labels_genre
    label_value_str = f"{label}_({value})"
    filtered_labels_genre.append(label_value_str.replace(" ", "_"))
    print(f" added {label_value_str}")

print()

# predict mood/theme classes
model = TensorflowPredict2D(graphFilename="mtg_jamendo_moodtheme-discogs-effnet-1.pb")
predictions = model(embeddings)
filtered_labels, filtered_values = filter_predictions(predictions, mood_theme_classes, threshold=0.05)

filtered_labels_mood_theme = []

print("Mood/Theme classes:")
for label, value in zip(filtered_labels, filtered_values):
    print(f"{label} ({value})")
    label_value_str = f"{label}_({value})"
    filtered_labels_mood_theme.append(label_value_str.replace(" ", "_"))
    print(f" added {label_value_str}")



print()

# predict instrumentation
model = TensorflowPredict2D(graphFilename="mtg_jamendo_instrument-discogs-effnet-1.pb")
predictions = model(embeddings)
filtered_labels, filtered_values = filter_predictions(predictions, instrument_classes)
filtered_labels_instrument = []

print("Instruments:")
for label, value in zip(filtered_labels, filtered_values):
    print(f"{label} ({value})")

    label_value_str = f"{label}_({value})"
    filtered_labels_instrument.append(label_value_str.replace(" ", "_"))
    print(f" added {label_value_str}")

print()

genres_str = ', '.join(filtered_labels_genre)
mood_and_theme_classes_str = ', '.join(filtered_labels_mood_theme)
instruments_str = ', '.join(filtered_labels_instrument)

row_data = [audio_filename, genres_str, mood_and_theme_classes_str, instruments_str]

worksheet_music_tagging.append_row(row_data)
print("Music data added to music_tagging sheet")

Genres:
Electronic---Synthwave (0.39022335410118103)
 added Electronic---Synthwave_(0.39022335410118103)
Electronic---Synth-pop (0.1479136347770691)
 added Electronic---Synth-pop_(0.1479136347770691)
Electronic---Darkwave (0.11130209267139435)
 added Electronic---Darkwave_(0.11130209267139435)

Mood/Theme classes:
energetic (0.17382456362247467)
 added energetic_(0.17382456362247467)
melodic (0.08527339994907379)
 added melodic_(0.08527339994907379)
love (0.08090018481016159)
 added love_(0.08090018481016159)
happy (0.07108449935913086)
 added happy_(0.07108449935913086)
motivational (0.0566890686750412)
 added motivational_(0.0566890686750412)
uplifting (0.05144369974732399)
 added uplifting_(0.05144369974732399)
space (0.05130566656589508)
 added space_(0.05130566656589508)

Instruments:
guitar (0.37892022728919983)
 added guitar_(0.37892022728919983)
synthesizer (0.3507300913333893)
 added synthesizer_(0.3507300913333893)
drums (0.2975122630596161)
 added drums_(0.2975122630596161)


In [None]:
# Initialize models once
embedding_model = TensorflowPredictEffnetDiscogs(
    graphFilename="discogs-effnet-bs64-1.pb",
    output="PartitionedCall:1"
)

genre_model = TensorflowPredict2D(
    graphFilename="genre_discogs400-discogs-effnet-1.pb",
    input="serving_default_model_Placeholder",
    output="PartitionedCall:0"
)

mood_theme_model = TensorflowPredict2D(
      graphFilename="genre_discogs400-discogs-effnet-1.pb",
      input="serving_default_model_Placeholder",
      output="PartitionedCall:0")

instrument_model = TensorflowPredict2D(graphFilename="mtg_jamendo_instrument-discogs-effnet-1.pb")

In [None]:
from essentia.standard import MonoLoader, TensorflowPredictEffnetDiscogs, TensorflowPredict2D
import numpy as np



# Function: Load audio file
def load_audio(filename, sample_rate=16000):
    return MonoLoader(filename=filename, sampleRate=sample_rate, resampleQuality=4)()

# Function: Generate embeddings
def get_embeddings(audio, model_path="discogs-effnet-bs64-1.pb"):
    embedding_model = TensorflowPredictEffnetDiscogs(graphFilename=model_path, output="PartitionedCall:1")
    return embedding_model(audio)

# Function for filter predictions for our music
def filter_predictions(predictions, class_list, threshold=0.1):
    predictions_mean = np.mean(predictions, axis=0)
    sorted_indices = np.argsort(predictions_mean)[::-1]
    filtered_indices = [i for i in sorted_indices if predictions_mean[i] > threshold]
    filtered_labels = [class_list[i] for i in filtered_indices]
    filtered_values = [predictions_mean[i] for i in filtered_indices]
    return filtered_labels, filtered_values


# We will be adding audio_filename, genres, mood_and_theme_classes, and instuments as a row to worksheet_music_tagging

def predict_genres():
  # predict genres
  model = TensorflowPredict2D(
      graphFilename="genre_discogs400-discogs-effnet-1.pb",
      input="serving_default_model_Placeholder",
      output="PartitionedCall:0")
  predictions = model(embeddings)
  filtered_labels, filtered_values = filter_predictions(predictions, genre_labels)

  filtered_labels_genre = []

  print("Genres:")
  for label, value in zip(filtered_labels, filtered_values):
      print(f"{label} ({value})")

      # respace spaces and add to filtered_labels_genre
      label_value_str = f"{label}_({value})"
      filtered_labels_genre.append(label_value_str.replace(" ", "_"))
      print(f" added {label_value_str}")

  print()
  return filtered_labels_genre

def predict_music_theme():
  # predict mood/theme classes
  model = TensorflowPredict2D(graphFilename="mtg_jamendo_moodtheme-discogs-effnet-1.pb")
  predictions = model(embeddings)
  filtered_labels, filtered_values = filter_predictions(predictions, mood_theme_classes, threshold=0.05)

  filtered_labels_mood_theme = []

  print("Mood/Theme classes:")
  for label, value in zip(filtered_labels, filtered_values):
      print(f"{label} ({value})")
      label_value_str = f"{label}_({value})"
      filtered_labels_mood_theme.append(label_value_str.replace(" ", "_"))
      print(f" added {label_value_str}")



  print()
  return filtered_labels_mood_theme


# predict instrumentation
def predict_instruments():
  model = TensorflowPredict2D(graphFilename="mtg_jamendo_instrument-discogs-effnet-1.pb")
  predictions = model(embeddings)
  filtered_labels, filtered_values = filter_predictions(predictions, instrument_classes)
  filtered_labels_instrument = []

  print("Instruments:")
  for label, value in zip(filtered_labels, filtered_values):
      print(f"{label} ({value})")

      label_value_str = f"{label}_({value})"
      filtered_labels_instrument.append(label_value_str.replace(" ", "_"))
      print(f" added {label_value_str}")

  print()
  return filtered_labels_instrument

# Function: Add data to Google Sheet
def add_to_sheet(worksheet, row_data):
    worksheet.append_row(row_data)
    print("Data successfully added to the Google Sheet")

# Main Processing Logic
def process_audio(filename):
    audio = load_audio(filename)

    # Generate embeddings
    embeddings = get_embeddings(audio)

    # Predict genres
    filtered_genres = predict_genres()

    # Predict mood/theme
    filtered_mood_theme = predict_music_theme()

    # Predict instruments
    filtered_instruments = predict_instruments()

    # Prepare data
    genres_str = ', '.join([f"{label.replace(' ', '_')}" for label in filtered_genres])
    mood_theme_str = ', '.join([f"{label.replace(' ', '_')}" for label in filtered_mood_theme])
    instruments_str = ', '.join([f"{label.replace(' ', '_')}" for label in filtered_instruments])
    row_data = [filename, genres_str, mood_theme_str, instruments_str]

    # Add data to Google Sheet
    add_to_sheet(worksheet_music_tagging, row_data)

# Example usage
# audio_filename = "/content/Ghost of Tomorrow_1.0.0.wav"
audio_filename = "/content/Love spells_1.wav"
process_audio(audio_filename)


Genres:
Electronic---Synthwave (0.39022335410118103)
 added Electronic---Synthwave_(0.39022335410118103)
Electronic---Synth-pop (0.1479136347770691)
 added Electronic---Synth-pop_(0.1479136347770691)
Electronic---Darkwave (0.11130209267139435)
 added Electronic---Darkwave_(0.11130209267139435)

Mood/Theme classes:
energetic (0.17382456362247467)
 added energetic_(0.17382456362247467)
melodic (0.08527339994907379)
 added melodic_(0.08527339994907379)
love (0.08090018481016159)
 added love_(0.08090018481016159)
happy (0.07108449935913086)
 added happy_(0.07108449935913086)
motivational (0.0566890686750412)
 added motivational_(0.0566890686750412)
uplifting (0.05144369974732399)
 added uplifting_(0.05144369974732399)
space (0.05130566656589508)
 added space_(0.05130566656589508)

Instruments:
guitar (0.37892022728919983)
 added guitar_(0.37892022728919983)
synthesizer (0.3507300913333893)
 added synthesizer_(0.3507300913333893)
drums (0.2975122630596161)
 added drums_(0.2975122630596161)


In [None]:
import time
for song in long_audio_files:
  process_audio(song)
  print(f"Processed {song}")
  time.sleep(1)

Genres:
Electronic---Synthwave (0.39022335410118103)
 added Electronic---Synthwave_(0.39022335410118103)
Electronic---Synth-pop (0.1479136347770691)
 added Electronic---Synth-pop_(0.1479136347770691)
Electronic---Darkwave (0.11130209267139435)
 added Electronic---Darkwave_(0.11130209267139435)

Mood/Theme classes:
energetic (0.17382456362247467)
 added energetic_(0.17382456362247467)
melodic (0.08527339994907379)
 added melodic_(0.08527339994907379)
love (0.08090018481016159)
 added love_(0.08090018481016159)
happy (0.07108449935913086)
 added happy_(0.07108449935913086)
motivational (0.0566890686750412)
 added motivational_(0.0566890686750412)
uplifting (0.05144369974732399)
 added uplifting_(0.05144369974732399)
space (0.05130566656589508)
 added space_(0.05130566656589508)

Instruments:
guitar (0.37892022728919983)
 added guitar_(0.37892022728919983)
synthesizer (0.3507300913333893)
 added synthesizer_(0.3507300913333893)
drums (0.2975122630596161)
 added drums_(0.2975122630596161)


In [None]:
from essentia.standard import MonoLoader, TensorflowPredictEffnetDiscogs, TensorflowPredict2D
import numpy as np



# Function: Load audio file
def load_audio(filename, sample_rate=16000):
    return MonoLoader(filename=filename, sampleRate=sample_rate, resampleQuality=4)()

# Function: Generate embeddings
def get_embeddings(audio, model_path="discogs-effnet-bs64-1.pb"):
    # embedding_model = TensorflowPredictEffnetDiscogs(graphFilename=model_path, output="PartitionedCall:1")
    embeddings = embedding_model(audio)
    return embedding_model(audio)

# Function for filter predictions for our music
def filter_predictions(predictions, class_list, threshold=0.1):
    predictions_mean = np.mean(predictions, axis=0)
    sorted_indices = np.argsort(predictions_mean)[::-1]
    filtered_indices = [i for i in sorted_indices if predictions_mean[i] > threshold]
    filtered_labels = [class_list[i] for i in filtered_indices]
    filtered_values = [predictions_mean[i] for i in filtered_indices]
    return filtered_labels, filtered_values


# We will be adding audio_filename, genres, mood_and_theme_classes, and instuments as a row to worksheet_music_tagging

def predict_genres():
  # predict genres
  # model = TensorflowPredict2D(
  #     graphFilename="genre_discogs400-discogs-effnet-1.pb",
  #     input="serving_default_model_Placeholder",
  #     output="PartitionedCall:0")

  predictions = genre_model(embeddings)
  filtered_labels, filtered_values = filter_predictions(predictions, genre_labels)

  filtered_labels_genre = []

  print("Genres:")
  for label, value in zip(filtered_labels, filtered_values):
      print(f"{label} ({value})")

      # respace spaces and add to filtered_labels_genre
      label_value_str = f"{label}_({value})"
      filtered_labels_genre.append(label_value_str.replace(" ", "_"))
      print(f" added {label_value_str}")

  print()
  return filtered_labels_genre

def predict_music_theme():
  # predict mood/theme classes
  # model = TensorflowPredict2D(graphFilename="mtg_jamendo_moodtheme-discogs-effnet-1.pb")
  # predictions = model(embeddings)
  predictions = mood_theme_model(embeddings)
  filtered_labels, filtered_values = filter_predictions(predictions, mood_theme_classes, threshold=0.05)

  filtered_labels_mood_theme = []

  print("Mood/Theme classes:")
  for label, value in zip(filtered_labels, filtered_values):
      print(f"{label} ({value})")
      label_value_str = f"{label}_({value})"
      filtered_labels_mood_theme.append(label_value_str.replace(" ", "_"))
      print(f" added {label_value_str}")



  print()
  return filtered_labels_mood_theme


# predict instrumentation
def predict_instruments():
  # model = TensorflowPredict2D(graphFilename="mtg_jamendo_instrument-discogs-effnet-1.pb")
  # predictions = model(embeddings)
  predictions = instrument_model(embeddings)
  filtered_labels, filtered_values = filter_predictions(predictions, instrument_classes)
  filtered_labels_instrument = []

  print("Instruments:")
  for label, value in zip(filtered_labels, filtered_values):
      print(f"{label} ({value})")

      label_value_str = f"{label}_({value})"
      filtered_labels_instrument.append(label_value_str.replace(" ", "_"))
      print(f" added {label_value_str}")

  print()
  return filtered_labels_instrument

# Function: Add data to Google Sheet
def add_to_sheet(worksheet, row_data):
    worksheet.append_row(row_data)
    print("Data successfully added to the Google Sheet")

# Main Processing Logic
def process_audio(filename):
    audio = load_audio(filename)

    # Generate embeddings
    embeddings = get_embeddings(audio)

    # Predict genres
    filtered_genres = predict_genres()

    # Predict mood/theme
    filtered_mood_theme = predict_music_theme()

    # Predict instruments
    filtered_instruments = predict_instruments()

    # Prepare data
    genres_str = ', '.join([f"{label.replace(' ', '_')}" for label in filtered_genres])
    mood_theme_str = ', '.join([f"{label.replace(' ', '_')}" for label in filtered_mood_theme])
    instruments_str = ', '.join([f"{label.replace(' ', '_')}" for label in filtered_instruments])
    row_data = [filename, genres_str, mood_theme_str, instruments_str]

    # Add data to Google Sheet
    add_to_sheet(worksheet_music_tagging, row_data)

# Example usage
# audio_filename = "/content/Ghost of Tomorrow_1.0.0.wav"
audio_filename = "/content/Love spells_1.wav"
process_audio(audio_filename)


# Other Examples of Working with Essentia



## Basic Audio Loading

In [None]:
import essentia.standard as es
file_name = "/content/Ghost of Tomorrow_1.0.0.wav"

In [None]:


# load an audio file in mono
loader = es.MonoLoader(filename=file_name, sampleRate=16000, resampleQuality=4)
audio = loader()

In [None]:
# Load the whole file in stereo
audio, _, _, _, _, _ = es.AudioLoader(filename=file_name)()
print(audio.shape)

(9999359, 2)


In [None]:
# Load only a 10-seconds segment in mono, starting from the 2nd minute
audio = es.EasyLoader(filename=file_name,
                      sampleRate=44100, startTime=60, endTime=70)()
print(audio.shape)

(441000,)


## Reading File Metadata

In [None]:
metadata_reader = es.MetadataReader(filename=file_name)
audio_metadata = metadata_reader()

In [None]:
metadata_pool = audio_metadata[7]
for d in metadata_pool.descriptorNames():
    print(d)

## Representations

In [None]:
import IPython

# Plots
import matplotlib.pyplot as plt
from pylab import plot, show, figure, imshow
plt.rcParams['figure.figsize'] = (15, 6)

In [None]:
IPython.display.Audio(file_name)

Buffered data was truncated after reaching the output size limit.

In [None]:
import essentia.standard as es
import essentia
# load an audio file in mono
loader = es.MonoLoader(filename=file_name, sampleRate=16000, resampleQuality=4)
audio = loader()



In [None]:
windowing = es.Windowing(type='blackmanharris62', zeroPadding=2048)
spectrum = es.Spectrum()
melbands = es.MelBands(numberBands=96, lowFrequencyBound=0, highFrequencyBound=11000)
spectrum_logfreq = es.LogSpectrum(binsPerSemitone=1)

amp2db = es.UnaryOperator(type='lin2db', scale=2)
pool = essentia.Pool()

## old inference code for testing; dont run

In [None]:
# @title
from essentia.standard import MonoLoader, TensorflowPredictEffnetDiscogs, TensorflowPredict2D
import numpy as np

# Load audio
audio = MonoLoader(filename="/content/what is this genre lol.flac", sampleRate=16000, resampleQuality=4)()

# Embedding model
embedding_model = TensorflowPredictEffnetDiscogs(graphFilename="discogs-effnet-bs64-1.pb", output="PartitionedCall:1")
embeddings = embedding_model(audio)

# Classification model
model = TensorflowPredict2D(graphFilename="genre_discogs400-discogs-effnet-1.pb", input="serving_default_model_Placeholder", output="PartitionedCall:0")
predictions = model(embeddings)

# Taking mean across all slices
predictions_mean = np.mean(predictions, axis=0)

# Sort indices by prediction strength
sorted_indices = np.argsort(predictions_mean)[::-1]

# Filtering predictions by threshold
filtered_indices = [i for i in sorted_indices if predictions_mean[i] > 0.15]

if len(filtered_indices) == 0:
    print("No labels found, trying again with a lower threshold")
    filtered_indices = [i for i in sorted_indices if predictions_mean[i] > 0.1]
    if len(filtered_indices) == 0:
        print('where even did you find this genre what the fuck am i listening to')
        exit(0)


# Get top n labels and their corresponding activations
filtered_labels = [genre_labels[i].replace("---", ", ") for i in filtered_indices]
filtered_values = [predictions_mean[i] for i in filtered_indices]

print("Filtered labels: ", filtered_labels)
print("Filtered values: ", filtered_values)

# make comma separated list of unique tags
seen_tags = set()
result = []

for label in ', '.join(filtered_labels).split(', '):
    if label not in seen_tags:
        result.append(label)
        seen_tags.add(label)

final_result = ', '.join(result)
print(final_result)

Filtered labels:  ['Electronic, Bassline', 'Electronic, Electro', 'Electronic, Electro House']
Filtered values:  [0.8754932, 0.23489274, 0.23271377]
Electronic, Bassline, Electro, Electro House
