In [15]:
# task2.py を逐次実行してみて中身を把握する

import sys
import os

import numpy as np
import argparse
import textwrap


import librosa
import librosa.display
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.axes_grid1 import make_axes_locatable

sys.path.append('/store/projects/ml/mathG/DCASE2017/baseline/DCASE2017-baseline-system/')

from dcase_framework.application_core import BinarySoundEventAppCore
from dcase_framework.parameters import ParameterContainer
from dcase_framework.utils import *
from dcase_framework.features import FeatureContainer, FeatureRepository, FeatureExtractor, FeatureNormalizer, \
    FeatureStacker, FeatureAggregator, FeatureMasker


In [2]:
project_base=os.path.dirname(os.path.realpath('../../baseline/DCASE2017-baseline-system/applications/system/'))


params = ParameterContainer(
    project_base=project_base,
    path_structure={
        'feature_extractor': [
            'dataset',
            'feature_extractor.parameters.*'
        ],
        'feature_normalizer': [
            'dataset',
            'feature_extractor.parameters.*'
        ],
        'learner': [
            'dataset',
            'feature_extractor',
            'feature_stacker',
            'feature_normalizer',
            'feature_aggregator',
            'learner'
        ],
        'recognizer': [
            'dataset',
            'feature_extractor',
            'feature_stacker',
            'feature_normalizer',
            'feature_aggregator',
            'learner',
            'recognizer'
        ],
    }
)

params

{}

In [3]:
params.load(filename=project_base + '/parameters/task2.defaults.yaml')
params['active_set'] = 'dcase2017_gpu'
params.process() # 実行用のパラメータ構造に変える？


setup_logging(parameter_container=params['logging'])
#app.show_parameters()

  dict.update(self, yaml.load(infile))


In [4]:
class Task2AppCore(BinarySoundEventAppCore):
    pass

app = Task2AppCore(
    name='DCASE 2017::Detection of rare sound events / Baseline System',
    params=params,
    system_desc=params.get('description'),
    system_parameter_set_id=params.get('active_set'),
    setup_label='Development setup',
    log_system_progress=params.get_path('general.log_system_progress'),
    show_progress_in_console=params.get_path('general.print_system_progress'),
    use_ascii_progress_bar=params.get_path('general.use_ascii_progress_bar')
)

# app.initialize()

[I] DCASE 2017::Detection of rare sound events / Baseline System
[I] 


In [18]:
#app.initialize()
#app.feature_extraction()
from tqdm import tqdm

# やっていく
overwrite=True

files=None

#######
# feature 作成用の data を準備
#######
if not files:
    files = []
    for event_label in app.dataset.event_labels:
        for fold in app._get_active_folds():
            for item_id, item in enumerate(app.dataset.train(fold, event_label=event_label)):
                if item['file'] not in files:
                    files.append(item['file'])
            for item_id, item in enumerate(app.dataset.test(fold, event_label=event_label)):
                if item['file'] not in files:
                    files.append(item['file'])
    files = sorted(files)


feature_files = []
feature_extractor = app.FeatureExtractor(overwrite=overwrite, store=True)

feature_extractor

for file_id, audio_filename in enumerate(tqdm(files,
                                              desc='           {0:<15s}'.format('Extracting features '),
                                              file=sys.stdout,
                                              leave=False,
                                              disable=app.disable_progress_bar,
                                              ascii=app.use_ascii_progress_bar)):

    if app.log_system_progress:
        app.logger.info('  {title:<15s} [{file_id:d}/{total:d}] {file:<30s}'.format(
            title='Extracting features ',
            file_id=file_id,
            total=len(files),
            file=os.path.split(audio_filename)[-1])
        )

    # Get feature filename
    current_feature_files = app._get_feature_filename(
        audio_file=os.path.split(audio_filename)[1],
        path=app.params.get_path('path.feature_extractor')
    )

    if not filelist_exists(current_feature_files) or overwrite:
        feature_repository = feature_extractor.extract(
            audio_file=app.dataset.relative_to_absolute_path(audio_filename),
            extractor_params=DottedDict(app.params.get_path('feature_extractor.parameters')),
            storage_paths=current_feature_files
        )
        
    feature_files.append(current_feature_files)
        
FeatureExtractor().get_default_parameters()




                                                                                    

{'mfcc': {'mono': True,
  'window': 'hamming_asymmetric',
  'spectrogram_type': 'magnitude',
  'n_mfcc': 20,
  'n_mels': 40,
  'n_fft': 2048,
  'fmin': 0,
  'fmax': 22050,
  'htk': False,
  'fs': 44100,
  'win_length_samples': 1764,
  'hop_length_samples': 882},
 'mfcc_delta': {'width': 9,
  'dependency_method': 'mfcc',
  'fs': 44100,
  'win_length_samples': 1764,
  'hop_length_samples': 882,
  'dependency_parameters': {'mono': True,
   'window': 'hamming_asymmetric',
   'spectrogram_type': 'magnitude',
   'n_mfcc': 20,
   'n_mels': 40,
   'n_fft': 2048,
   'fmin': 0,
   'fmax': 22050,
   'htk': False,
   'fs': 44100,
   'win_length_samples': 1764,
   'hop_length_samples': 882}},
 'mfcc_acceleration': {'width': 9,
  'dependency_method': 'mfcc',
  'fs': 44100,
  'win_length_samples': 1764,
  'hop_length_samples': 882,
  'dependency_parameters': {'mono': True,
   'window': 'hamming_asymmetric',
   'spectrogram_type': 'magnitude',
   'n_mfcc': 20,
   'n_mels': 40,
   'n_fft': 2048,
   'fm

In [27]:
feature_repository['mel']['feat'][0].shape

(1501, 40)

In [32]:
# feature_stacker の役目を調べる

feature_stacker = FeatureStacker(recipe=[{'method': 'mel'}])
feature_matrix = feature_stacker.feature_vector(feature_repository=feature_repository)

feature_matrix

{'feat': [array([[ -5.002877 ,  -7.204636 ,  -7.887638 , ..., -11.675259 ,
          -11.7941675, -12.186553 ],
         [ -4.816158 ,  -6.169664 ,  -6.6384535, ..., -11.342943 ,
          -11.678137 , -12.109688 ],
         [ -4.704848 ,  -5.784425 ,  -6.270282 , ..., -11.4215975,
          -11.667576 , -12.06991  ],
         ...,
         [ -5.7667813,  -5.778595 ,  -6.9095564, ..., -11.737999 ,
          -11.727472 , -12.06486  ],
         [ -5.7177258,  -6.202236 ,  -6.744507 , ..., -11.686974 ,
          -11.807487 , -12.002305 ],
         [ -6.074368 ,  -6.2238717,  -7.357516 , ..., -11.659121 ,
          -11.758827 , -12.153561 ]], dtype=float32)],
 'stat': None,
 'meta': {'parameters': {'fs': 44100,
   'win_length_seconds': 0.04,
   'win_length_samples': 1764,
   'hop_length_seconds': 0.02,
   'hop_length_samples': 882},
  'datetime': '2019-12-29 07:54:54',
  'audio_file': '/store/projects/ml/mathG/DCASE2017/baseline/DCASE2017-baseline-system/applications/data/TUT-rare-sound-ev

In [33]:
feature_repository


{'mel': {'feat': [array([[ -5.002877 ,  -7.204636 ,  -7.887638 , ..., -11.675259 ,
           -11.7941675, -12.186553 ],
          [ -4.816158 ,  -6.169664 ,  -6.6384535, ..., -11.342943 ,
           -11.678137 , -12.109688 ],
          [ -4.704848 ,  -5.784425 ,  -6.270282 , ..., -11.4215975,
           -11.667576 , -12.06991  ],
          ...,
          [ -5.7667813,  -5.778595 ,  -6.9095564, ..., -11.737999 ,
           -11.727472 , -12.06486  ],
          [ -5.7177258,  -6.202236 ,  -6.744507 , ..., -11.686974 ,
           -11.807487 , -12.002305 ],
          [ -6.074368 ,  -6.2238717,  -7.357516 , ..., -11.659121 ,
           -11.758827 , -12.153561 ]], dtype=float32)],
  'stat': None,
  'meta': {'parameters': {'mono': True,
    'window': 'hamming_asymmetric',
    'spectrogram_type': 'magnitude',
    'n_mels': 40,
    'normalize_mel_bands': False,
    'n_fft': 2048,
    'fmin': 0,
    'fmax': 22050,
    'htk': False,
    'log': True,
    'fs': 44100,
    'win_length_samples': 1764