In [1]:
import glob, os
import pathlib
import json
import yaml

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import collections
from pprint import pprint

from dotty_dict import dotty

%matplotlib inline

In [2]:
# avoid the module's method call deprecation
try:
    collectionsAbc = collections.abc
except AttributeError:
    collectionsAbc = collections

Configuring Juputer's options to present more things to the output.

In [3]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

# Functions to use

## Function to fully flatten each json file

In [4]:
def flatten_dict_full(d, sep="_"):
    obj = collections.OrderedDict()

    def recurse(t, parent_key=""):
        
        if isinstance(t, list):
            for i in range(len(t)):
                recurse(t[i], parent_key + sep + str(i) if parent_key else str(i))
        elif isinstance(t,dict):
            for k, v in t.items():
                recurse(v, parent_key + sep + k if parent_key else k)
        else:
            obj[parent_key] = t
    recurse(d)

    return obj

In [5]:
path_app = os.getcwd()

In [6]:
# path_app = os.path.join(os.path.abspath(os.getcwd()), os.pardir)
# # path = os.path.join(path, 'danceability', 'features', 'mp3')
# print('Current path:', path_app)
# print('Type:', type(path_app))

In [7]:
with open(os.path.join(path_app, 'project_danceability.yaml'), 'r') as conf_stream:
    try:
        config_data = yaml.safe_load(conf_stream)
    except yaml.YAMLError as exc:
        print(exc)

In [8]:
config_data

{'templateVersion': '2.1-beta2',
 'className': 'test_danceability',
 'datasetsDirectory': '/data/train_results_danceability/datasets',
 'resultsDirectory': '/data/train_results_danceability/results',
 'filelist': '/data/datasets/danceability/metadata/filelist_sig.yaml',
 'groundtruth': '/data/datasets/danceability/metadata/modified_groundtruth.yaml',
 'seed': 1593182276.72,
 'clusterMode': True,
 'excludedDescriptors': ['metadata.tags*'],
 'failOnUnmatched': False,
 'preprocessing': {'raw': [],
  'basic': [{'transfo': 'remove',
    'params': {'descriptorNames': ['metadata.*',
      '*dmean*',
      '*dvar*',
      '*.min',
      '*.max',
      '*cov',
      'tonal.thpcp',
      'lowlevel.spectral_energyband_high.*',
      'lowlevel.silence_rate*']}},
   {'transfo': 'enumerate',
    'params': {'descriptorNames': ['tonal.chords_key',
      'tonal.chords_scale',
      'tonal.key_key',
      'tonal.key_scale']}}],
  'lowlevel': [{'transfo': 'remove',
    'params': {'descriptorNames': ['met

In [9]:
exclude_descriptors = config_data["excludedDescriptors"]

In [10]:
exclude_descriptors

['metadata.tags*']

In [11]:
config_data["preprocessing"]["basic"]

[{'transfo': 'remove',
  'params': {'descriptorNames': ['metadata.*',
    '*dmean*',
    '*dvar*',
    '*.min',
    '*.max',
    '*cov',
    'tonal.thpcp',
    'lowlevel.spectral_energyband_high.*',
    'lowlevel.silence_rate*']}},
 {'transfo': 'enumerate',
  'params': {'descriptorNames': ['tonal.chords_key',
    'tonal.chords_scale',
    'tonal.key_key',
    'tonal.key_scale']}}]

In [12]:
config_data["preprocessing"]["basic"][0]["transfo"]

'remove'

In [13]:
config_data["preprocessing"]["basic"][0]["params"]["descriptorNames"]

['metadata.*',
 '*dmean*',
 '*dvar*',
 '*.min',
 '*.max',
 '*cov',
 'tonal.thpcp',
 'lowlevel.spectral_energyband_high.*',
 'lowlevel.silence_rate*']

In [14]:
type(config_data["preprocessing"]["basic"][0]["params"]["descriptorNames"])

list

In [15]:
remove_keys_list = config_data["preprocessing"]["basic"][0]["params"]["descriptorNames"]

# Load Data GroundTruth

## Path declaration

In [16]:
path_obj = pathlib.Path().absolute()
path_obj

PosixPath('/Users/pantelistzamalis/Desktop/google2020/gsoc_ml_infrastructure/notebooks')

In [17]:
path_app_data = os.path.join(os.path.abspath(os.getcwd()), os.pardir)
# path = os.path.join(path, 'danceability', 'features', 'mp3')
# print('Current path:', path_app)
print('Type:', type(path_app_data))
print("Path:", path_app_data)

Type: <class 'str'>
Path: /Users/pantelistzamalis/Desktop/google2020/gsoc_ml_infrastructure/notebooks/..


## Count all the JSON data included in files

In [18]:
counter = 0
for root, dirs, files in os.walk(path_app_data):
    for file in files:
        if(file.endswith(".json")):
#             print(os.path.join(root,file))
            counter += 1
            
print('counted json files:', counter)  

counted json files: 10461


## Load the GroundTruth file

In [19]:
with open(os.path.join(path_app_data, 'acousticbrainz-datasets/danceability/metadata/groundtruth.yaml'), 'r') as stream:
    try:
        groundTr_data = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

In [20]:
len(groundTr_data['groundTruth'].keys())

225

## Check the unique folders with tracks and sort them alphabetically

In [21]:
labeled_tracks = groundTr_data['groundTruth']

### Split the keys in Folder - Track Name

In [22]:
folders = []
for key in labeled_tracks:
    key = key.split('/')
    folders.append(key[0])

In [23]:
print(folders[:15])

['Alternative', 'Alternative', 'Alternative', 'Alternative', 'Alternative', 'Alternative', 'Alternative', 'Alternative', 'Alternative', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues', 'Blues']


### Check the folders

In [24]:
folders = set(folders)
folders = list(folders)
folders.sort()
folders

['Alternative',
 'Blues',
 'Classical',
 'Country',
 'Dance and House',
 'Folk and New Age',
 'Hip-Hop and Rap',
 'House & Garage and Grime',
 'Jazz',
 'Jungle and D&B',
 'Latin',
 'Pop',
 'R&B and Soul',
 'Reggae',
 'Rock and Metal',
 'Techno and Electro',
 'Vocal and Acapella']

## Class Name retrieved from GT file

In [25]:
class_name = groundTr_data['className']

In [26]:
class_name

'danceability'

## Load a DF containing the tracks from the features path and the info retrieved from the GT file

In [27]:
path_features = os.path.join(path_app_data, 'acousticbrainz-datasets/danceability', 'features', 'mp3')

In [28]:
# path_features

In [29]:
tracks = []
for key, value in labeled_tracks.items():
    track_dict = {}
    key = key.split('/')
    path_tracks = os.path.join(path_features, key[0])
    for f_name in os.listdir(path_tracks):
        if f_name.startswith(key[1]):
            track_dict['genre_category'] = key[0]
            track_dict['track'] = key[1]
            track_dict['track_path'] = os.path.join(path_features, key[0], f_name)
            track_dict['danceability'] = value
    tracks.append(track_dict)

In [30]:
len(tracks)

225

DF that includes the tracks

In [31]:
df_tracks = pd.DataFrame(data=tracks)

Plot the track's paths

In [32]:
df_tracks.head()

Unnamed: 0,genre_category,track,track_path,danceability
0,Alternative,02 Happy Birthday - Altered Images.mp3,/Users/pantelistzamalis/Desktop/google2020/gsoc_ml_infrastructure/notebooks/../acousticbrainz-datasets/danceability/features/mp3/Alternative/02 Happy Birthday - Altered Images.mp3.json,danceable
1,Alternative,03 How Soon Is Now - Smiths.mp3,/Users/pantelistzamalis/Desktop/google2020/gsoc_ml_infrastructure/notebooks/../acousticbrainz-datasets/danceability/features/mp3/Alternative/03 How Soon Is Now - Smiths.mp3.json,not_danceable
2,Alternative,04 Pearly Dewdrops' Drops - Cocteau Twins.mp3,/Users/pantelistzamalis/Desktop/google2020/gsoc_ml_infrastructure/notebooks/../acousticbrainz-datasets/danceability/features/mp3/Alternative/04 Pearly Dewdrops' Drops - Cocteau Twins.mp3.json,not_danceable
3,Alternative,06 Mr Brightside - Killers.mp3,/Users/pantelistzamalis/Desktop/google2020/gsoc_ml_infrastructure/notebooks/../acousticbrainz-datasets/danceability/features/mp3/Alternative/06 Mr Brightside - Killers.mp3.json,danceable
4,Alternative,06 Song 2 - Blur.mp3,/Users/pantelistzamalis/Desktop/google2020/gsoc_ml_infrastructure/notebooks/../acousticbrainz-datasets/danceability/features/mp3/Alternative/06 Song 2 - Blur.mp3.json,danceable


Check the className distribution of the data based on the 'genre_category'

Data distribution based on the genres

In [33]:
df_tracks['genre_category'].value_counts()

Jungle and D&B              22
Latin                       22
Dance and House             21
Hip-Hop and Rap             19
Vocal and Acapella          17
Techno and Electro          16
Folk and New Age            16
Rock and Metal              13
R&B and Soul                13
Reggae                      12
House & Garage and Grime    10
Alternative                  9
Blues                        8
Country                      8
Pop                          7
Jazz                         6
Classical                    6
Name: genre_category, dtype: int64

# Load a Single JSON low-level file and handle its data

In [34]:
with open(df_tracks['track_path'].iloc[0]) as json_file:
    data_feats = json.load(json_file)

In [35]:
# pprint(data_feats)

In [36]:
data_feats.keys()

dict_keys(['lowlevel', 'metadata', 'rhythm', 'tonal'])

In [37]:
data_feats["metadata"]["tags"]

{'file_name': '02 Happy Birthday - Altered Images.mp3',
 'album': ['John Peel - A Tribute [UK] Disc 2'],
 'albumartist': ['Various Artists'],
 'artist': ['Various Artists'],
 'date': ['2005'],
 'genre': ['Rock/Pop'],
 'label': ['WSM'],
 'length': ['178320'],
 'title': ['Happy Birthday - Altered Images'],
 'tracknumber': ['2']}

In [38]:
data_feats_dotty = dotty(data_feats)

In [39]:
data_feats_dotty["metadata.tags"]

{'file_name': '02 Happy Birthday - Altered Images.mp3',
 'album': ['John Peel - A Tribute [UK] Disc 2'],
 'albumartist': ['Various Artists'],
 'artist': ['Various Artists'],
 'date': ['2005'],
 'genre': ['Rock/Pop'],
 'label': ['WSM'],
 'length': ['178320'],
 'title': ['Happy Birthday - Altered Images'],
 'tracknumber': ['2']}

In [40]:
remove_keys_list

['metadata.*',
 '*dmean*',
 '*dvar*',
 '*.min',
 '*.max',
 '*cov',
 'tonal.thpcp',
 'lowlevel.spectral_energyband_high.*',
 'lowlevel.silence_rate*']

In [41]:
data_feats_dotty["lowlevel.spectral_energyband_high"]

{'dmean': 0.000430126703577,
 'dmean2': 0.000582747336011,
 'dvar': 1.32549780574e-06,
 'dvar2': 2.6748007258e-06,
 'max': 0.0408168584108,
 'mean': 0.000924074323848,
 'median': 0.000307242153212,
 'min': 7.17486931364e-21,
 'var': 3.44317140843e-06}

In [42]:
# def remove_keys(keys_list):
#     for item in keys_list:
#         if item.endswith(".*"):
#             item = item.replace(".*", "")
# #             del data_feats_dotty[item]
#             print(data_feats_dotty[item])
#         elif item.startswith(""):
#         else:
#             del data_feats_dotty[item]

In [43]:
# remove_keys(remove_keys_list)

In [44]:
# data_feats_dotty["lowlevel.silence_rate"]

In [45]:
data_feats_dotty["lowlevel.spectral_energyband_high"]

{'dmean': 0.000430126703577,
 'dmean2': 0.000582747336011,
 'dvar': 1.32549780574e-06,
 'dvar2': 2.6748007258e-06,
 'max': 0.0408168584108,
 'mean': 0.000924074323848,
 'median': 0.000307242153212,
 'min': 7.17486931364e-21,
 'var': 3.44317140843e-06}

In [46]:
for k, v in data_feats.items():
    for k1, v1 in v.items():
        key_list = []
        key_list.append(k)
        key_list.append(k1)
        keys_united = ".".join(key_list)
        print(keys_united)

lowlevel.average_loudness
lowlevel.barkbands_crest
lowlevel.barkbands_flatness_db
lowlevel.barkbands_kurtosis
lowlevel.barkbands_skewness
lowlevel.barkbands_spread
lowlevel.dissonance
lowlevel.dynamic_complexity
lowlevel.erbbands_crest
lowlevel.erbbands_flatness_db
lowlevel.erbbands_kurtosis
lowlevel.erbbands_skewness
lowlevel.erbbands_spread
lowlevel.hfc
lowlevel.melbands_crest
lowlevel.melbands_flatness_db
lowlevel.melbands_kurtosis
lowlevel.melbands_skewness
lowlevel.melbands_spread
lowlevel.pitch_salience
lowlevel.silence_rate_20dB
lowlevel.silence_rate_30dB
lowlevel.silence_rate_60dB
lowlevel.spectral_centroid
lowlevel.spectral_complexity
lowlevel.spectral_decrease
lowlevel.spectral_energy
lowlevel.spectral_energyband_high
lowlevel.spectral_energyband_low
lowlevel.spectral_energyband_middle_high
lowlevel.spectral_energyband_middle_low
lowlevel.spectral_entropy
lowlevel.spectral_flux
lowlevel.spectral_kurtosis
lowlevel.spectral_rms
lowlevel.spectral_rolloff
lowlevel.spectral_skewne

In [47]:
flatten_data_feats = flatten_dict_full(data_feats)

In [48]:
flatten_data_feats= dict(flatten_data_feats)

In [49]:
flatten_data_feats

{'lowlevel_average_loudness': 0.940232157707,
 'lowlevel_barkbands_crest_dmean': 2.76665973663,
 'lowlevel_barkbands_crest_dmean2': 4.44829654694,
 'lowlevel_barkbands_crest_dvar': 5.74580621719,
 'lowlevel_barkbands_crest_dvar2': 14.9876413345,
 'lowlevel_barkbands_crest_max': 26.7228469849,
 'lowlevel_barkbands_crest_mean': 10.8071289062,
 'lowlevel_barkbands_crest_median': 9.6913394928,
 'lowlevel_barkbands_crest_min': 2.79826569557,
 'lowlevel_barkbands_crest_var': 24.1222038269,
 'lowlevel_barkbands_flatness_db_dmean': 0.0237202681601,
 'lowlevel_barkbands_flatness_db_dmean2': 0.0359584279358,
 'lowlevel_barkbands_flatness_db_dvar': 0.000525934854522,
 'lowlevel_barkbands_flatness_db_dvar2': 0.0011579785496,
 'lowlevel_barkbands_flatness_db_max': 0.627937674522,
 'lowlevel_barkbands_flatness_db_mean': 0.176746591926,
 'lowlevel_barkbands_flatness_db_median': 0.146083265543,
 'lowlevel_barkbands_flatness_db_min': 0.0408386327326,
 'lowlevel_barkbands_flatness_db_var': 0.01097385119

In [50]:
len(flatten_data_feats.keys())

3135

In [51]:
lala = "dsxcdfd_mfcc_dsksds"
lolo = "sadsdsd_alaa_slasad"

In [52]:
import re

In [53]:
if re.search("mfcc", lolo):
    print("pattern found")
else:
    print("pattern not found")

pattern not found


`'metadata.*', '*dmean*', '*dvar*',
'*.min', '*.max', '*cov',
'tonal.thpcp', # because of division by zero
'lowlevel.spectral_energyband_high.*', # 0 for low samplerate
'lowlevel.silence_rate*' # funky behavior in general`

In [54]:
len(remove_keys_list)

9

In [55]:
remove_keys_list_update = []

In [56]:
for item in remove_keys_list:
    if item.endswith(".*"):
        item = item.replace(".*", "_")
    elif item.startswith("*."):
        item = item.replace("*.", "_")
    else:
        item = item.replace("*", "")
    item = item.replace(".", "_")
    remove_keys_list_update.append(item)

In [57]:
remove_keys_list_update

['metadata_',
 'dmean',
 'dvar',
 '_min',
 '_max',
 'cov',
 'tonal_thpcp',
 'lowlevel_spectral_energyband_high_',
 'lowlevel_silence_rate']

1) metadata: `metadata.*`

In [58]:
counter = 0
for key in flatten_data_feats:
    if key.startswith("metadata"):
        print(key)
        counter += 1
print()
print(counter)

metadata_audio_properties_analysis_sample_rate
metadata_audio_properties_bit_rate
metadata_audio_properties_equal_loudness
metadata_audio_properties_length
metadata_audio_properties_lossless
metadata_audio_properties_replay_gain
metadata_audio_properties_sample_rate
metadata_audio_properties_codec
metadata_audio_properties_downmix
metadata_audio_properties_md5_encoded
metadata_tags_file_name
metadata_tags_album_0
metadata_tags_albumartist_0
metadata_tags_artist_0
metadata_tags_date_0
metadata_tags_genre_0
metadata_tags_label_0
metadata_tags_length_0
metadata_tags_title_0
metadata_tags_tracknumber_0
metadata_version_essentia
metadata_version_essentia_git_sha
metadata_version_extractor

23


2) dmean: `*dmean*`

In [59]:
counter = 0
for key in flatten_data_feats:
    if re.search("dmean", key):
        print(key)
        counter += 1
print()
print(counter)

lowlevel_barkbands_crest_dmean
lowlevel_barkbands_crest_dmean2
lowlevel_barkbands_flatness_db_dmean
lowlevel_barkbands_flatness_db_dmean2
lowlevel_barkbands_kurtosis_dmean
lowlevel_barkbands_kurtosis_dmean2
lowlevel_barkbands_skewness_dmean
lowlevel_barkbands_skewness_dmean2
lowlevel_barkbands_spread_dmean
lowlevel_barkbands_spread_dmean2
lowlevel_dissonance_dmean
lowlevel_dissonance_dmean2
lowlevel_erbbands_crest_dmean
lowlevel_erbbands_crest_dmean2
lowlevel_erbbands_flatness_db_dmean
lowlevel_erbbands_flatness_db_dmean2
lowlevel_erbbands_kurtosis_dmean
lowlevel_erbbands_kurtosis_dmean2
lowlevel_erbbands_skewness_dmean
lowlevel_erbbands_skewness_dmean2
lowlevel_erbbands_spread_dmean
lowlevel_erbbands_spread_dmean2
lowlevel_hfc_dmean
lowlevel_hfc_dmean2
lowlevel_melbands_crest_dmean
lowlevel_melbands_crest_dmean2
lowlevel_melbands_flatness_db_dmean
lowlevel_melbands_flatness_db_dmean2
lowlevel_melbands_kurtosis_dmean
lowlevel_melbands_kurtosis_dmean2
lowlevel_melbands_skewness_dmean
lo

3) dvar: `*dvar*`

In [60]:
counter = 0
for key in flatten_data_feats:
    if re.search("dvar", key):
        print(key)
        counter += 1
print()
print(counter)

lowlevel_barkbands_crest_dvar
lowlevel_barkbands_crest_dvar2
lowlevel_barkbands_flatness_db_dvar
lowlevel_barkbands_flatness_db_dvar2
lowlevel_barkbands_kurtosis_dvar
lowlevel_barkbands_kurtosis_dvar2
lowlevel_barkbands_skewness_dvar
lowlevel_barkbands_skewness_dvar2
lowlevel_barkbands_spread_dvar
lowlevel_barkbands_spread_dvar2
lowlevel_dissonance_dvar
lowlevel_dissonance_dvar2
lowlevel_erbbands_crest_dvar
lowlevel_erbbands_crest_dvar2
lowlevel_erbbands_flatness_db_dvar
lowlevel_erbbands_flatness_db_dvar2
lowlevel_erbbands_kurtosis_dvar
lowlevel_erbbands_kurtosis_dvar2
lowlevel_erbbands_skewness_dvar
lowlevel_erbbands_skewness_dvar2
lowlevel_erbbands_spread_dvar
lowlevel_erbbands_spread_dvar2
lowlevel_hfc_dvar
lowlevel_hfc_dvar2
lowlevel_melbands_crest_dvar
lowlevel_melbands_crest_dvar2
lowlevel_melbands_flatness_db_dvar
lowlevel_melbands_flatness_db_dvar2
lowlevel_melbands_kurtosis_dvar
lowlevel_melbands_kurtosis_dvar2
lowlevel_melbands_skewness_dvar
lowlevel_melbands_skewness_dvar2


4) min: `*.min`

In [61]:
counter = 0
for key in flatten_data_feats:
    if key.endswith("_min"):
        print(key)
        counter += 1
print()
print(counter)

lowlevel_barkbands_crest_min
lowlevel_barkbands_flatness_db_min
lowlevel_barkbands_kurtosis_min
lowlevel_barkbands_skewness_min
lowlevel_barkbands_spread_min
lowlevel_dissonance_min
lowlevel_erbbands_crest_min
lowlevel_erbbands_flatness_db_min
lowlevel_erbbands_kurtosis_min
lowlevel_erbbands_skewness_min
lowlevel_erbbands_spread_min
lowlevel_hfc_min
lowlevel_melbands_crest_min
lowlevel_melbands_flatness_db_min
lowlevel_melbands_kurtosis_min
lowlevel_melbands_skewness_min
lowlevel_melbands_spread_min
lowlevel_pitch_salience_min
lowlevel_silence_rate_20dB_min
lowlevel_silence_rate_30dB_min
lowlevel_silence_rate_60dB_min
lowlevel_spectral_centroid_min
lowlevel_spectral_complexity_min
lowlevel_spectral_decrease_min
lowlevel_spectral_energy_min
lowlevel_spectral_energyband_high_min
lowlevel_spectral_energyband_low_min
lowlevel_spectral_energyband_middle_high_min
lowlevel_spectral_energyband_middle_low_min
lowlevel_spectral_entropy_min
lowlevel_spectral_flux_min
lowlevel_spectral_kurtosis_mi

5) max: `*.max`

In [62]:
counter = 0
for key in flatten_data_feats:
    if key.endswith("_max"):
        print(key)
        counter += 1
print()
print(counter)

lowlevel_barkbands_crest_max
lowlevel_barkbands_flatness_db_max
lowlevel_barkbands_kurtosis_max
lowlevel_barkbands_skewness_max
lowlevel_barkbands_spread_max
lowlevel_dissonance_max
lowlevel_erbbands_crest_max
lowlevel_erbbands_flatness_db_max
lowlevel_erbbands_kurtosis_max
lowlevel_erbbands_skewness_max
lowlevel_erbbands_spread_max
lowlevel_hfc_max
lowlevel_melbands_crest_max
lowlevel_melbands_flatness_db_max
lowlevel_melbands_kurtosis_max
lowlevel_melbands_skewness_max
lowlevel_melbands_spread_max
lowlevel_pitch_salience_max
lowlevel_silence_rate_20dB_max
lowlevel_silence_rate_30dB_max
lowlevel_silence_rate_60dB_max
lowlevel_spectral_centroid_max
lowlevel_spectral_complexity_max
lowlevel_spectral_decrease_max
lowlevel_spectral_energy_max
lowlevel_spectral_energyband_high_max
lowlevel_spectral_energyband_low_max
lowlevel_spectral_energyband_middle_high_max
lowlevel_spectral_energyband_middle_low_max
lowlevel_spectral_entropy_max
lowlevel_spectral_flux_max
lowlevel_spectral_kurtosis_ma

6) cov: `*cov`

In [63]:
counter = 0
for key in flatten_data_feats:
    if re.search("cov", key):
        print(key)
        counter += 1
print()
print(counter)

lowlevel_gfcc_cov_0_0
lowlevel_gfcc_cov_0_1
lowlevel_gfcc_cov_0_2
lowlevel_gfcc_cov_0_3
lowlevel_gfcc_cov_0_4
lowlevel_gfcc_cov_0_5
lowlevel_gfcc_cov_0_6
lowlevel_gfcc_cov_0_7
lowlevel_gfcc_cov_0_8
lowlevel_gfcc_cov_0_9
lowlevel_gfcc_cov_0_10
lowlevel_gfcc_cov_0_11
lowlevel_gfcc_cov_0_12
lowlevel_gfcc_cov_1_0
lowlevel_gfcc_cov_1_1
lowlevel_gfcc_cov_1_2
lowlevel_gfcc_cov_1_3
lowlevel_gfcc_cov_1_4
lowlevel_gfcc_cov_1_5
lowlevel_gfcc_cov_1_6
lowlevel_gfcc_cov_1_7
lowlevel_gfcc_cov_1_8
lowlevel_gfcc_cov_1_9
lowlevel_gfcc_cov_1_10
lowlevel_gfcc_cov_1_11
lowlevel_gfcc_cov_1_12
lowlevel_gfcc_cov_2_0
lowlevel_gfcc_cov_2_1
lowlevel_gfcc_cov_2_2
lowlevel_gfcc_cov_2_3
lowlevel_gfcc_cov_2_4
lowlevel_gfcc_cov_2_5
lowlevel_gfcc_cov_2_6
lowlevel_gfcc_cov_2_7
lowlevel_gfcc_cov_2_8
lowlevel_gfcc_cov_2_9
lowlevel_gfcc_cov_2_10
lowlevel_gfcc_cov_2_11
lowlevel_gfcc_cov_2_12
lowlevel_gfcc_cov_3_0
lowlevel_gfcc_cov_3_1
lowlevel_gfcc_cov_3_2
lowlevel_gfcc_cov_3_3
lowlevel_gfcc_cov_3_4
lowlevel_gfcc_cov_3_5
l

lowlevel_mfcc_icov_1_12
lowlevel_mfcc_icov_2_0
lowlevel_mfcc_icov_2_1
lowlevel_mfcc_icov_2_2
lowlevel_mfcc_icov_2_3
lowlevel_mfcc_icov_2_4
lowlevel_mfcc_icov_2_5
lowlevel_mfcc_icov_2_6
lowlevel_mfcc_icov_2_7
lowlevel_mfcc_icov_2_8
lowlevel_mfcc_icov_2_9
lowlevel_mfcc_icov_2_10
lowlevel_mfcc_icov_2_11
lowlevel_mfcc_icov_2_12
lowlevel_mfcc_icov_3_0
lowlevel_mfcc_icov_3_1
lowlevel_mfcc_icov_3_2
lowlevel_mfcc_icov_3_3
lowlevel_mfcc_icov_3_4
lowlevel_mfcc_icov_3_5
lowlevel_mfcc_icov_3_6
lowlevel_mfcc_icov_3_7
lowlevel_mfcc_icov_3_8
lowlevel_mfcc_icov_3_9
lowlevel_mfcc_icov_3_10
lowlevel_mfcc_icov_3_11
lowlevel_mfcc_icov_3_12
lowlevel_mfcc_icov_4_0
lowlevel_mfcc_icov_4_1
lowlevel_mfcc_icov_4_2
lowlevel_mfcc_icov_4_3
lowlevel_mfcc_icov_4_4
lowlevel_mfcc_icov_4_5
lowlevel_mfcc_icov_4_6
lowlevel_mfcc_icov_4_7
lowlevel_mfcc_icov_4_8
lowlevel_mfcc_icov_4_9
lowlevel_mfcc_icov_4_10
lowlevel_mfcc_icov_4_11
lowlevel_mfcc_icov_4_12
lowlevel_mfcc_icov_5_0
lowlevel_mfcc_icov_5_1
lowlevel_mfcc_icov_5_2
l

7) tonal.thpcp: `tonal.thpcp`

In [64]:
counter = 0
for key in flatten_data_feats:
    if re.search("tonal_thpcp", key):
        print(key)
        counter += 1
print()
print(counter)

tonal_thpcp_0
tonal_thpcp_1
tonal_thpcp_2
tonal_thpcp_3
tonal_thpcp_4
tonal_thpcp_5
tonal_thpcp_6
tonal_thpcp_7
tonal_thpcp_8
tonal_thpcp_9
tonal_thpcp_10
tonal_thpcp_11
tonal_thpcp_12
tonal_thpcp_13
tonal_thpcp_14
tonal_thpcp_15
tonal_thpcp_16
tonal_thpcp_17
tonal_thpcp_18
tonal_thpcp_19
tonal_thpcp_20
tonal_thpcp_21
tonal_thpcp_22
tonal_thpcp_23
tonal_thpcp_24
tonal_thpcp_25
tonal_thpcp_26
tonal_thpcp_27
tonal_thpcp_28
tonal_thpcp_29
tonal_thpcp_30
tonal_thpcp_31
tonal_thpcp_32
tonal_thpcp_33
tonal_thpcp_34
tonal_thpcp_35

36


8) lowlevel.spectral_energyband_high: `lowlevel.spectral_energyband_high.*`

In [65]:
counter = 0
for key in flatten_data_feats:
    if re.search("lowlevel_spectral_energyband_high", key):
        print(key)
        counter += 1
print()
print(counter)

lowlevel_spectral_energyband_high_dmean
lowlevel_spectral_energyband_high_dmean2
lowlevel_spectral_energyband_high_dvar
lowlevel_spectral_energyband_high_dvar2
lowlevel_spectral_energyband_high_max
lowlevel_spectral_energyband_high_mean
lowlevel_spectral_energyband_high_median
lowlevel_spectral_energyband_high_min
lowlevel_spectral_energyband_high_var

9


9) lowlevel.silence_rate: `lowlevel.silence_rate*`

In [66]:
counter = 0
for key in flatten_data_feats:
    if re.search("lowlevel_silence_rate", key):
        print(key)
        counter += 1
print()
print(counter)

lowlevel_silence_rate_20dB_dmean
lowlevel_silence_rate_20dB_dmean2
lowlevel_silence_rate_20dB_dvar
lowlevel_silence_rate_20dB_dvar2
lowlevel_silence_rate_20dB_max
lowlevel_silence_rate_20dB_mean
lowlevel_silence_rate_20dB_median
lowlevel_silence_rate_20dB_min
lowlevel_silence_rate_20dB_var
lowlevel_silence_rate_30dB_dmean
lowlevel_silence_rate_30dB_dmean2
lowlevel_silence_rate_30dB_dvar
lowlevel_silence_rate_30dB_dvar2
lowlevel_silence_rate_30dB_max
lowlevel_silence_rate_30dB_mean
lowlevel_silence_rate_30dB_median
lowlevel_silence_rate_30dB_min
lowlevel_silence_rate_30dB_var
lowlevel_silence_rate_60dB_dmean
lowlevel_silence_rate_60dB_dmean2
lowlevel_silence_rate_60dB_dvar
lowlevel_silence_rate_60dB_dvar2
lowlevel_silence_rate_60dB_max
lowlevel_silence_rate_60dB_mean
lowlevel_silence_rate_60dB_median
lowlevel_silence_rate_60dB_min
lowlevel_silence_rate_60dB_var

27


In [68]:
for key, value in flatten_data_feats.items():
    
    if re.search("mfcc", key):
        print(key)

lowlevel_mfcc_mean_0
lowlevel_mfcc_mean_1
lowlevel_mfcc_mean_2
lowlevel_mfcc_mean_3
lowlevel_mfcc_mean_4
lowlevel_mfcc_mean_5
lowlevel_mfcc_mean_6
lowlevel_mfcc_mean_7
lowlevel_mfcc_mean_8
lowlevel_mfcc_mean_9
lowlevel_mfcc_mean_10
lowlevel_mfcc_mean_11
lowlevel_mfcc_mean_12
lowlevel_mfcc_cov_0_0
lowlevel_mfcc_cov_0_1
lowlevel_mfcc_cov_0_2
lowlevel_mfcc_cov_0_3
lowlevel_mfcc_cov_0_4
lowlevel_mfcc_cov_0_5
lowlevel_mfcc_cov_0_6
lowlevel_mfcc_cov_0_7
lowlevel_mfcc_cov_0_8
lowlevel_mfcc_cov_0_9
lowlevel_mfcc_cov_0_10
lowlevel_mfcc_cov_0_11
lowlevel_mfcc_cov_0_12
lowlevel_mfcc_cov_1_0
lowlevel_mfcc_cov_1_1
lowlevel_mfcc_cov_1_2
lowlevel_mfcc_cov_1_3
lowlevel_mfcc_cov_1_4
lowlevel_mfcc_cov_1_5
lowlevel_mfcc_cov_1_6
lowlevel_mfcc_cov_1_7
lowlevel_mfcc_cov_1_8
lowlevel_mfcc_cov_1_9
lowlevel_mfcc_cov_1_10
lowlevel_mfcc_cov_1_11
lowlevel_mfcc_cov_1_12
lowlevel_mfcc_cov_2_0
lowlevel_mfcc_cov_2_1
lowlevel_mfcc_cov_2_2
lowlevel_mfcc_cov_2_3
lowlevel_mfcc_cov_2_4
lowlevel_mfcc_cov_2_5
lowlevel_mfcc_