# Audio Data Block

## Setup

In [1]:
#Export

#Internal dependencies
from exp.nb_AudioCommon import * 

In [2]:
#Export

#External dependencies
from fastai.utils import *
from fastai.vision import *
from IPython.display import Audio
import torchaudio
from torchaudio import transforms

## Data Block classes

### AudioItem
This is the base class of fast.ai Audio. Key thing is that it's data property isn't a 'raw' tensor, instead it holds a reference to an AudioData object.

In [3]:
#Export        
class AudioItem(ItemBase):
    def __init__(self, data:AudioData, **kwargs):
        self.data = data # Always flatten out to single dimension signal!
        self.kwargs = kwargs

    def __str__(self):
        if isinstance(self.data, AudioData): return f'{self.__class__.__name__}: {self.duration}sec ({len(self)} @ {self.data.sr}hz).'
        else: return f'{type(self.data)}: {self.data.shape}'
    def __len__(self): return self.data.sig.shape[0]
    def _repr_html_(self): return f'{self.__str__()}<br />{self.ipy_audio._repr_html_()}'
    
    def show(self, title:Optional[str]=None, **kwargs):
        "Show sound on `ax` with `title`, using `cmap` if single-channel, overlaid with optional `y`"
        self.hear(title=title)

    def hear(self, title=None):
        if title is not None: print(title)
        display(self.ipy_audio)

    def apply_tfms(self, tfms):
        for tfm in tfms:
            self.data = tfm(self.data)
        return self
        
    @property
    def shape(self):
        return self.data.sig.shape
    
    @property
    def ipy_audio(self):
        return Audio(data=self.data.sig, rate=self.data.sr)

    @property
    def duration(self): return len(self.data.sig)/self.data.sr

### AudioDataBunch

In [4]:
#Export
class AudioDataBunch(DataBunch):
    def hear_ex(self, rows:int=3, ds_type:DatasetType=DatasetType.Valid, **kwargs):
        batch = self.dl(ds_type).dataset[:rows]
        self.train_ds.hear_xys(batch.x, batch.y, **kwargs)

### AudioList
This class is responsible to contain a list of AudioItem.

In [5]:
#Export
class AudioList(ItemList):
    _bunch = AudioDataBunch
    
    # TODO: __REPR__    
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
            
    def get(self, i):
        item = self.items[i]
        if isinstance(item, (Path, str)):
            return AudioItem(AudioData.load(str(item)))
        if isinstance(item, (tuple, np.ndarray)): #data,sr 
            return AudioItem(AudioData(item[0],item[1]))
        print('Format not supported!', file=sys.stderr)
        raise

    def reconstruct(self, t:Tensor): return Image(t.transpose(1,2))

    def hear_xys(self, xs, ys, **kwargs):
        for x, y in zip(xs, ys): x.hear(title=y, **kwargs)
            
    # TODO: example with from_folder
    @classmethod
    def from_folder(cls, path:PathOrStr='.', extensions:Collection[str]=None, **kwargs)->ItemList:
        extensions = ifnone(extensions, AUDIO_EXTENSIONS)
        return super().from_folder(path=path, extensions=extensions, **kwargs)

### Audio Transforms - should this be here or in another place?

In [6]:
#Export
def get_audio_transforms(spectro:bool=False,
                         white_noise:bool=True,
                         modulate_volume:bool=True,
                         random_cutout:bool=True,
                         pad_with_silence:bool=True,
                         pitch_warp:bool=True,
                         down_and_up:bool=True,
                         mx_to_pad:int=1000,
                         xtra_tfms:Optional[Collection[Transform]]=None,
                         **kwargs)->Collection[Transform]:
    "Utility func to easily create a list of audio transforms."
    res = []
    if white_noise: res.append(partial(tfm_add_white_noise, **kwargs))
    if modulate_volume: res.append(partial(tfm_modulate_volume, **kwargs))
    if random_cutout: res.append(partial(tfm_random_cutout, **kwargs))
    if pad_with_silence: res.append(partial(tfm_pad_with_silence, **kwargs))
    if pitch_warp: res.append(partial(tfm_pitch_warp, **kwargs))
    if down_and_up: res.append(partial(tfm_down_and_up, **kwargs))
    res.append(partial(tfm_pad_to_max, mx=mx_to_pad))
    final_transform = tfm_extract_signal
    if spectro: final_transform = partial(tfm_spectro, **kwargs)
    res.append(final_transform)
    #       train                   , valid
    return (res + listify(xtra_tfms), [partial(tfm_pad_to_max, mx=mx_to_pad), final_transform])

## Testing

### Sample data for test

In [7]:
from fastai.basics import url2name, datapath4file, untar_data
data_url = 'http://www.openslr.org/resources/45/ST-AEDS-20180100_1-OS'
path = datapath4file(url2name(data_url))
untar_data(data_url, dest = path) 
good_sample = path.ls()[256] # arbitrary choice of file

In [8]:
def test_AudioItem_create_from_data(f):
    signal,samplerate = torchaudio.load(f)
    a = AudioItem(AudioData(signal,samplerate))
    assert 1 == len(a.data.sig.shape), 'Single dimension data'
    assert a.data.sig.shape[0] > 100, 'Has data'
    assert 16000 == a.data.sr
    display(a)

test_AudioItem_create_from_data(good_sample)

In [9]:
def test_AudioItem_create_from_audio_file(f):
    a = AudioItem(AudioData.load(f))
    assert 1 == len(a.data.sig.shape), 'Single dimension data'
    assert a.data.sig.shape[0] > 100, 'Has data'
    assert 16000 == a.data.sr
    display(a)
    
test_AudioItem_create_from_audio_file(good_sample)

In [10]:
import glob

In [11]:
def test_AudioList_from_df_file_names():

    #Create Data Frame
    df = pd.DataFrame(glob.glob(str(path/'**/*.wav'), recursive=True)[:10])
    df.columns = ['FileName']
    display(df.head())

    #Create AudioList
    ils = AudioList.from_df(df, path, cols=['FileName'])
    
    #Test an item
    i=5
    print(f'FileName: {df.FileName[i]}')
    a = ils.get(i)
    print(a.data.sig.shape, a.data.sr)
    display(a)

test_AudioList_from_df_file_names()

Unnamed: 0,FileName
0,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-...
1,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-...
2,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-...
3,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-...
4,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-...


FileName: /home/jupyter/.fastai/data/ST-AEDS-20180100_1-OS/m0005_us_m0005_00282.wav
torch.Size([46720]) 16000


In [12]:
def test_AudioList_from_folder():
    p = path

    #Create AudioList
    ils = AudioList.from_folder(p)
    
    #Test an item
    i=4
    a = ils.get(i)
    print(a.data.sig.shape, a.data.sr)
    display(a)

test_AudioList_from_folder()

torch.Size([60800]) 16000


In [13]:
def test_AudioList_from_df_data_and_sr():

    #Create Data Frame
    df = pd.DataFrame(glob.glob(str(path/'**/*.wav'), recursive=True)[:10])
    df.columns = ['FileName']
    df['SampleAndSr']=df['FileName'].apply(lambda n: torchaudio.load(n))
    #df = df['tmp'].drop()
    
    display(df.head())

    #Create AudioList
    ils = AudioList.from_df(df, path, cols=['SampleAndSr'])
    
    #Test an item
    i=6
    print(f'FileName: {df.FileName[i]}')
    a = ils.get(i)
    print(a.data.sig.shape, a.data.sr)
    display(a)

test_AudioList_from_df_data_and_sr()

Unnamed: 0,FileName,SampleAndSr
0,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-...,"([[tensor(0.), tensor(0.), tensor(0.), tensor(..."
1,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-...,"([[tensor(0.), tensor(0.), tensor(0.), tensor(..."
2,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-...,"([[tensor(0.), tensor(0.), tensor(0.), tensor(..."
3,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-...,"([[tensor(0.), tensor(0.), tensor(0.), tensor(..."
4,/home/jupyter/.fastai/data/ST-AEDS-20180100_1-...,"([[tensor(0.), tensor(0.), tensor(0.), tensor(..."


FileName: /home/jupyter/.fastai/data/ST-AEDS-20180100_1-OS/f0005_us_f0005_00432.wav
torch.Size([65280]) 16000


## Sample AudioDataBunch Usage

<span style="color:red">CURRENTLY BROKEN</span>

In [14]:

# def process_phn_file(p_file, sig, sr, delimiter=' '):
#     df = pd.read_csv(p_file, delimiter=delimiter, header=None)
#     df.columns = ['Start', 'End', 'Phn']
#     df['SampleAndSr'] = df.apply(lambda x : (sig[-1][x['Start']: x['End']], sr), axis=1)
#     return df

# def create_phn_df(path, count=100):
#     phns = []
#     final = pd.DataFrame()

#     for phn_file in glob.glob(str(path/'**/*.PHN'), recursive=True)[:count]:
#         sig,sr = torchaudio.load(str(phn_file.replace('PHN', 'WAV')))
#         df = process_phn_file(phn_file, sig, sr, delimiter=' ')
#         df['Source'] = phn_file
#         final = final.append(df, ignore_index=True)
#     return df

# df = create_phn_df(path/'TRAIN')
# df.head()

In [15]:
# step0 = AudioList.from_df(df, path, cols=['SampleAndSr']); print(type(step0))

In [16]:
# step0

In [17]:
# step1 = step0.split_by_rand_pct(0.1, seed=1);
# print(f'Result type: {type(step1)}, Type of Items: {type(step1.lists[0])}')

In [18]:
# step2 = step1.label_from_df('Phn');
# print(f'Result type: {type(step2)}')

In [19]:
# 

In [20]:
# tfms = get_audio_transforms()
# step3 = step2.transform(tfms);
# print(type(step3))

In [21]:
# step4 = step3.databunch(bs=batch_size);
# print(type(step4))

In [22]:
# step4 = step3.databunch(bs=batch_size);
# step4

In [23]:
# max_len = 1000
# datablock = step4
# batch = datablock.one_batch()
# print(len(batch[0]), batch[0].shape)
# assert batch_size == len(batch[0])
# assert max_len == batch[0].shape[1]

In [24]:
# datablock.show_batch()

# Export

In [25]:
!python notebook2script.py DataBlock.ipynb

Converted DataBlock.ipynb to nb_DataBlock.py
