In [2]:
import math
import numpy as np

import librosa as lr
from librosa import display

import torch 
import torch.nn as nn
import torch.nn.functional as F

from matplotlib import pyplot as plt
import IPython

from parallel_wavenet import *
from wavenet_dataset import *
from constant_q_transform import *
from parallel_wavenet_trainer import *

In [None]:
model = WaveNetModel()
print("input_length:", model.input_length)
print("output length:", model.output_length)

In [None]:
dataset = ParallelWavenetDataset('audio_clips', 
                                 item_length=model.input_length, 
                                 target_length=model.output_length, 
                                 test_stride=0)
print("dataset length:", len(dataset))

In [None]:
example, target = dataset[10]

In [None]:
IPython.display.Audio(example.squeeze(), rate=16000)

In [None]:
output = model(example)
output.shape

In [None]:
target.shape

In [None]:
cqt_module = CQT(sr=16000, fmin=30, n_bins=250, bins_per_octave=32)

In [None]:
cqt = cqt_module(example)
scalogram = abs(cqt).squeeze()**2

fig = plt.figure(figsize=(8, 8))
plt.imshow(np.log(scalogram.detach()), origin='lower', aspect=1.)
plt.show()

In [None]:
scalogram.shape

In [None]:
conditioning_model_settings = conditioning_wavenet_default_settings
conditioning_wavenet_default_settings['conditioning_period'] = 128
conditioning_wavenet_default_settings['conditioning_channels'] = [250, 16]
conditioning_model = WaveNetModelWithConditioning(conditioning_model_settings)

In [None]:
cqt = cqt_module(example)
scalogram = abs(cqt).squeeze()**2
model_output = conditioning_model((example, scalogram.unsqueeze(0)))

In [None]:
model_output.shape

In [None]:
output_signal = torch.cat([example[0, 0, :-conditioning_model.output_length], model_output[:, 0]], dim=0)

In [None]:
IPython.display.Audio(output_signal.detach(), rate=16000)

In [None]:
output_cqt = cqt_module(output_signal.view(1, 1, -1))
output_scalogram = abs(output_cqt).squeeze()**2

fig = plt.figure(figsize=(8, 8))
plt.imshow(np.log(output_scalogram.detach()), origin='lower', aspect=1.)
plt.show()

In [None]:
wavenet_settings = conditioning_wavenet_default_settings
wavenet_settings['conditioning_channels'] = [256, 16]
wavenet_settings['skip_channels'] = 32
test_model = WaveNetModelWithConditioning(wavenet_settings)
dataset = ParallelWavenetDataset('audio_clips',
                                 item_length=test_model.input_length,
                                 target_length=test_model.output_length,
                                 test_stride=0)
trainer = ParallelWavenetTrainer(test_model, dataset, CQT())
trainer.train(4, 1)

In [3]:
dataset = ParallelWavenetDataset('/Volumes/Elements/Projekte/scalogram-wavenet/house_dataset',
                                 item_length=100,
                                 target_length=100,
                                 test_stride=0)

processed /Volumes/Elements/Projekte/scalogram-wavenet/house_dataset/BEC010300111_Kate Ryan_Hurry Up.m4a
processed /Volumes/Elements/Projekte/scalogram-wavenet/house_dataset/CAN110601056_BT_Swing Away.m4a
processed /Volumes/Elements/Projekte/scalogram-wavenet/house_dataset/DEBY30730703_Cosmic Baby_La Ciudad De Los Dioses.m4a
processed /Volumes/Elements/Projekte/scalogram-wavenet/house_dataset/DEE190500058_WestBam_Like That.m4a
processed /Volumes/Elements/Projekte/scalogram-wavenet/house_dataset/DEE190500060_WestBam_Total Extrem.m4a
processed /Volumes/Elements/Projekte/scalogram-wavenet/house_dataset/DEN060700147_ATB_Alcarda.m4a
processed /Volumes/Elements/Projekte/scalogram-wavenet/house_dataset/DEW760400003_Paul van Dyk_Crush - PvD Remix.m4a
processed /Volumes/Elements/Projekte/scalogram-wavenet/house_dataset/DEW760700136_Paul van Dyk_We Are Alive - Christopher Just Remix.m4a
processed /Volumes/Elements/Projekte/scalogram-wavenet/house_dataset/DEY470919860_Quazar_Travel Light.m4a
proc

In [None]:
import subprocess
rc = subprocess.call(["tst", "asd"])

In [None]:
rc