# Experiment & Config Setup

Before starting any experiment, you should define the `EXPERIMENT_NAME` (ideally, matching the name of the notebook) and the baseline configuration for all the classes that will be used across the experiment.

In [None]:
%load_ext autoreload
%autoreload 2
#-------------------------------------------------------------------------------
# REQUIRED PACKAGES
#-------------------------------------------------------------------------------
import os
import sys
from pathlib import Path
from databricks.sdk.runtime import *
notebook_path =  '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get())
os.chdir(notebook_path)
os.chdir('..')
sys.path.append("../..")
from mlops.utils.environment_setup import start_experiment

#-------------------------------------------------------------------------------
# CONFIGURABLE OPTIONS
#-------------------------------------------------------------------------------
EXPERIMENT_NAME = "birdnet-average-3s-chunk-embeddings-pooling-max"     # Experiment name (should match notebook name)
SCHEMA = "frogid_ml"
MODEL_NAME = "birdnet-average-3s-chunk-embeddings-pooling-max"
CURRENT_USER="hannah.weng@matrgroup.com"                   # Author email address on databricks             

#-------------------------------------------------------------------------------
# SYSTEM SETUP FOR EXPERIMENT
#-------------------------------------------------------------------------------
IS_DATABRICKS = "DATABRICKS_RUNTIME_VERSION" in os.environ
ROOT_DIR = Path(os.getcwd()).parent
sys.path.insert(0, str(ROOT_DIR))
experiment = start_experiment(
    experiment_name=EXPERIMENT_NAME,
    root_dir=ROOT_DIR, 
    is_databricks=IS_DATABRICKS, 
    current_user=CURRENT_USER
)

# Run Setup

In the following section you can run a version of this experiment by defining a run.

In [None]:
################################################################################
# START MLFLOW RUN
################################################################################
# This an be a new run (run_id = None) or an existing run that you want to
# reload, by specifying the run_id
################################################################################
from utils.environment_setup import start_mlflow_run
from utils.pipeline import generate_pipeline_config, instantiate_pipeline

run_name, run_id = start_mlflow_run(run_id = "c1bfbab4aa2b410d96c1d61e9f0fca8b")
config = generate_pipeline_config(
    experiment, 
    run_id, overrides={
    "evaluation_config.pooling_strategy": "mean",
    "modelling_data_strategy.other_species_sampling_strategy": "stratify"
    }, 
    force_save=True)
pipeline = instantiate_pipeline(config)

In [None]:
################################################################################
# LOAD CLEAN DATA
################################################################################
# The anchoring function determines how you select the class_label_single
# from a list of species in multi-species settings. Below we use the
# most-frequent-target strategy, which means that if there are multiple species
# the single class label will be the species that is most frequently represented
# among the list of class labels.
################################################################################

from feature_engineering.registry_anchoring_strategies import ANCHORING_STRATEGY_REGISTRY

# Load the cleaned data and their classes
df_data, class_labels_to_species_mapping = pipeline.data_selector.load_data(
    label_anchor_fn=ANCHORING_STRATEGY_REGISTRY["most-frequent-target"]
)

In [None]:
################################################################################
# MODELLING
################################################################################
# The following code snippet demonstrates how to produce a reproducible ML
# model training pipeline. The process involves:
# 1. Selecting the subset of data to use for modelling based on a strategy
# 2. Downloading & Preprocessing the selected subset to create a feature df
# 3. Training the model according to the initial experiment setup
################################################################################
from mlops.training.tf_model_registry import MODEL_REGISTRY

# Step 1: Sample the modelling data using the data_sampler
df_modelling = pipeline.data_sampler.sample_modelling_dataset(
    df_data=df_data,
    modelling_strategy=config.modelling_data_strategy,
)

# Step 2: Download the data and return the updated dataframe (in case of missing files)
df_modelling = pipeline.data_downloader.download_files(df_modelling)

# Step 3: Create embeddings for the data using the data_preprocessor
df_modelling_features = pipeline.data_preprocessor.run(df_modelling)

# Step 4: Train the model
model = pipeline.model_trainer.train(df_modelling_features, model_fn=MODEL_REGISTRY['birdnet_mlp_multiclass'], name = f"{SCHEMA}.{MODEL_NAME}")

In [None]:
################################################################################
# EVALUATION: TEST DATA
################################################################################
# The following code snippet demonstrates how to do an evaluation of a model
# 1. Select a sample you are interested in using the data_sampler
# 2. Downloading & Preprocessing the selected subset to create a feature df
# 3. Evaluating the model by pointing to the correct run_id
################################################################################

# Sample the data you are interested in
df_sample = pipeline.data_sampler.sample_test_data(run_id=run_id, df=df_data)

# Download any files required to evaluate this sample
df_sample = pipeline.data_downloader.download_files(df_sample)

# Create embeddings for the data using this sample
df_sample_features = pipeline.data_preprocessor.run(df_sample)

# Evaluate the results for the model stored inside the given run_id
print("🔄 Starting model evaluation...")
print(f"📊 Evaluating {len(df_sample_features)} feature samples")
print(f"🏷️ Using class mapping with {len(class_labels_to_species_mapping)} classes")

try:
    y_true, y_true_binarized, y_pred, y_probs, macro_results, per_species_results = pipeline.model_evaluator.evaluate(
        run_id=run_id,
        df_features=df_sample_features,
        class_label_to_species_mapping=class_labels_to_species_mapping,
        dir_name_to_store_results="single-species-max-1000"
    )
    print("✅ Evaluation completed successfully!")
    print(f"📈 Macro results: {macro_results}")
    
except Exception as e:
    print(f"❌ Evaluation failed with error: {str(e)}")
    print(f"🔍 Error type: {type(e).__name__}")
    import traceback
    traceback.print_exc()
    raise

In [None]:
################################################################################
# EVALUATION: TEST DATA
################################################################################
# The following code snippet demonstrates how to do an evaluation of a model
# 1. Select a sample you are interested in using the data_sampler
# 2. Downloading & Preprocessing the selected subset to create a feature df
# 3. Evaluating the model by pointing to the correct run_id
################################################################################

# Sample the data you are interested in
df_sample = pipeline.data_sampler.sample_test_data(run_id=run_id, df=df_data)

# Download any files required to evaluate this sample
df_sample = pipeline.data_downloader.download_files(df_sample)

# Create embeddings for the data using this sample
df_sample_features = pipeline.data_preprocessor.run(df_sample)

# Evaluate the results for the model stored inside the given run_id
print("🔄 Starting model evaluation...")
print(f"📊 Evaluating {len(df_sample_features)} feature samples")
print(f"🏷️ Using class mapping with {len(class_labels_to_species_mapping)} classes")

try:
    y_true, y_true_binarized, y_pred, y_probs, macro_results, per_species_results = pipeline.model_evaluator.evaluate(
        run_id=run_id,
        df_features=df_sample_features,
        class_label_to_species_mapping=class_labels_to_species_mapping,
        dir_name_to_store_results="single-species-mean-1000"
    )
    print("✅ Evaluation completed successfully!")
    print(f"📈 Macro results: {macro_results}")
    
except Exception as e:
    print(f"❌ Evaluation failed with error: {str(e)}")
    print(f"🔍 Error type: {type(e).__name__}")
    import traceback
    traceback.print_exc()
    raise

In [None]:
################################################################################
# EVALUATION: TEST DATA
################################################################################
# The following code snippet demonstrates how to do an evaluation of a model
# 1. Select a sample you are interested in using the data_sampler
# 2. Downloading & Preprocessing the selected subset to create a feature df
# 3. Evaluating the model by pointing to the correct run_id
################################################################################

# Sample the data you are interested in
df_sample = pipeline.data_sampler.sample_test_data(run_id=run_id, df=df_data)

# Download any files required to evaluate this sample
df_sample = pipeline.data_downloader.download_files(df_sample)

# Create embeddings for the data using this sample
df_sample_features = pipeline.data_preprocessor.run(df_sample)

# Evaluate the results for the model stored inside the given run_id
print("🔄 Starting model evaluation...")
print(f"📊 Evaluating {len(df_sample_features)} feature samples")
print(f"🏷️ Using class mapping with {len(class_labels_to_species_mapping)} classes")

try:
    y_true, y_true_binarized, y_pred, y_probs, macro_results, per_species_results = pipeline.model_evaluator.evaluate(
        run_id=run_id,
        df_features=df_sample_features,
        class_label_to_species_mapping=class_labels_to_species_mapping,
        dir_name_to_store_results="single-species-topk-1000"
    )
    print("✅ Evaluation completed successfully!")
    print(f"📈 Macro results: {macro_results}")
    
except Exception as e:
    print(f"❌ Evaluation failed with error: {str(e)}")
    print(f"🔍 Error type: {type(e).__name__}")
    import traceback
    traceback.print_exc()
    raise

In [None]:
################################################################################
# EVALUATION: TEST DATA
################################################################################
# The following code snippet demonstrates how to do an evaluation of a model
# 1. Select a sample you are interested in using the data_sampler
# 2. Downloading & Preprocessing the selected subset to create a feature df
# 3. Evaluating the model by pointing to the correct run_id
################################################################################

# Sample the data you are interested in
df_sample = pipeline.data_sampler.sample_test_data(run_id=run_id, df=df_data)

# Download any files required to evaluate this sample
df_sample = pipeline.data_downloader.download_files(df_sample)

# Create embeddings for the data using this sample
df_sample_features = pipeline.data_preprocessor.run(df_sample)

# Evaluate the results for the model stored inside the given run_id
print("🔄 Starting model evaluation...")
print(f"📊 Evaluating {len(df_sample_features)} feature samples")
print(f"🏷️ Using class mapping with {len(class_labels_to_species_mapping)} classes")

try:
    y_true, y_true_binarized, y_pred, y_probs, macro_results, per_species_results = pipeline.model_evaluator.evaluate(
        run_id=run_id,
        df_features=df_sample_features,
        class_label_to_species_mapping=class_labels_to_species_mapping,
        dir_name_to_store_results="single-species-softmax-1000"
    )
    print("✅ Evaluation completed successfully!")
    print(f"📈 Macro results: {macro_results}")
    
except Exception as e:
    print(f"❌ Evaluation failed with error: {str(e)}")
    print(f"🔍 Error type: {type(e).__name__}")
    import traceback
    traceback.print_exc()
    raise

In [None]:
################################################################################
# HELPFUL UTILITIES
################################################################################
# This section describes how you might choose to use some helpful utilities
################################################################################
from utils.cache_utils import clear_cache
from dataclasses import replace

# You can clear local directories by specifying what ids you want to keep
deleted_files_list = clear_cache(
    directory=experiment["audio_files_path"],
    keep_ids=[],
    keep_extensions={"wav"}
)