In [None]:
import tensorflow as tf
import tensorflow_datasets as tfds

import librosa
import librosa.display

import numpy as np
from matplotlib import pyplot as plt

from pathlib import Path

from IPython.display import Audio

import os

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # disable GPU devices
os.environ["TFDS_DATA_DIR"] = os.path.expanduser("~/tensorflow_datasets")  # default location of tfds database

import dpmhm
from dpmhm.datasets import transformer, preprocessing, feature, utils

In [None]:
TFDS_DATA_DIR = os.path.expanduser('~/tensorflow_datasets/')

In [None]:
# !cd ~/Téléchargements
# !pwd
# !ls
# !unzip /phmap.zip
# !mv phmap ../tmp/

In [None]:
_ = dpmhm.datasets.install('PHMAP2021', 
                            data_dir=TFDS_DATA_DIR,                           
                            manual_dir=os.path.expanduser('~/tmp/phmap/')
                          )

In [None]:
dataset_name = 'phmap2021'

ds_all, ds_info = tfds.load(
    dataset_name, 
    data_dir=TFDS_DATA_DIR,
    # split=['train[:75%]', 'train[75%:]'],   
    # split=['all'],
    with_info=True,
    # batch_size=-1,  # load the whole dataset into a single batch
)

print(ds_all.keys())  # show the split of the raw dataset

ds0 = ds_all['train']

In [None]:
ds0.cardinality().numpy()

In [None]:
eles = list(ds0.take(ds0.cardinality().numpy()).as_numpy_iterator())

# for x, el in enumerate(eles):
#     print(el['metadata']['LoadForce'],el['metadata']['FileName'])

In [None]:
fn = eles[1]['metadata']['FileName'].decode()
sr = eles[1]['sampling_rate']
x = eles[1]['signal']['sig1']
label=eles[1]['metadata']['Label'].decode()

plt.figure()
plt.plot(np.arange(len(x))/sr, x)
plt.xlabel('Time (s)')
plt.title('Fichier : '+fn+' Label : '+label)

Audio(x, rate=sr)

In [None]:
fn = eles[1]['metadata']['FileName'].decode()
sr = eles[1]['sampling_rate']
x = eles[1]['signal']['sig2']
label=eles[1]['metadata']['Label'].decode()

plt.figure()
plt.plot(np.arange(len(x))/sr, x)
plt.xlabel('Time (s)')
plt.title('Fichier : '+fn+' Label : '+label)

Audio(x, rate=sr)

In [None]:
# Filter on the channels
channels = ['sig1','sig2']

# Label ramification
keys = [] 

# Selection of domain
# filters = {'SensorType': [b'pin',b'po']}  # source domain
filters = {}  # no selection

# Resampling rate in Hz
resampling_rate = 50000  # no resampling

# Size of the sliding window (after resampling)
elem=list(ds0.take(5).as_numpy_iterator())
window_size = resampling_rate  # 1 second
# window_size = len(elem[0]['signal']['sig1'])

# Number of samples to skip between sucessive window
hop_size = window_size//2

from dpmhm.datasets import transformer

compactor = transformer.DatasetCompactor(ds0, channels=channels, keys=keys, filters=filters,
                                         resampling_rate=resampling_rate, 
                                         window_size=window_size, hop_size=hop_size)

In [None]:
ds1 = compactor.dataset

print("Original:", ds0.element_spec['signal'])
print("Compacted:", ds1.element_spec['signal'])

In [None]:
eles0 = list(ds0.take(2).as_numpy_iterator())
eles1 = list(ds1.take(2).as_numpy_iterator())

ds1.element_spec

In [None]:
from dpmhm.datasets import feature

# Feature extractor

_func = lambda x, sr: feature.spectral_features(x, sr, 'spectrogram', 
                                                time_window=window_size/resampling_rate, hop_step=window_size/(resampling_rate*2), normalize=False,
                                                to_db=True)[0]
# _func = lambda x, sr: feature.spectral_features(x, sr, 'spectrogram', 
                                                # time_window=window_size/resampling_rate, hop_step=window_size/(resampling_rate*2), normalize=False,
                                                # to_db=True)[0]
                                                
# _func = lambda x, sr: feature.spectral_features(x, sr, 'melspectrogram', 
#                                                 time_window=0.025, hop_step=0.0125, normalize=False,
#                                                 feature_kwargs={'n_mels':128})[0]

extractor = transformer.FeatureExtractor(compactor.dataset, _func)

In [None]:
ds2 = extractor.dataset
print(ds2.element_spec)

eles2 = list(ds2.take(2).as_numpy_iterator())

In [None]:
print(eles2[0]['feature'].shape)

X = eles2[0]
plt.matshow(X['feature'][0], origin='lower')
plt.xlabel('Time bin')
plt.ylabel('Frequency bin')