In [1]:
import os
from pathlib import Path


import pandas

import numpy as np
import soundfile as sf
import torch
# from torch_stoi import NegSTOILoss
import matplotlib.pyplot as plt

from encoder.params_data import *


from encoder import inference as encoder
from synthesizer.inference import Synthesizer
from utils.default_models import ensure_default_models
from vocoder import inference as vocoder

from loss_functions import *
from utils_pgd import *

Install h5py to use hdf5 features: http://docs.h5py.org/
  warn(h5py_msg)


In [2]:
if torch.cuda.is_available():
    device_id = torch.cuda.current_device()
    gpu_properties = torch.cuda.get_device_properties(device_id)
    ## Print some environment information (for debugging purposes)
    print("Found %d GPUs available. Using GPU %d (%s) of compute capability %d.%d with "
        "%.1fGb total memory.\n" %
        (torch.cuda.device_count(),
        device_id,
        gpu_properties.name,
        gpu_properties.major,
        gpu_properties.minor,
        gpu_properties.total_memory / 1e9))
else:
    print("Using CPU for inference.\n")

# ## Load the models one by one.
print("Preparing the encoder, the synthesizer and the vocoder...")

ensure_default_models(Path("saved_models"))
encoder.load_model(Path("saved_models/default/encoder.pt"))
synthesizer = Synthesizer(Path('saved_models/default/synthesizer.pt'))
vocoder.load_model(Path('saved_models/default/vocoder.pt'))

Using CPU for inference.

Preparing the encoder, the synthesizer and the vocoder...
Loaded encoder "encoder.pt" trained to step 1564501
Synthesizer using device: cpu
Building Wave-RNN
Trainable Parameters: 4.481M
Loading model weights at saved_models\default\vocoder.pt


In [3]:
def genAttacks(loss_func, fwd_pass, input_path_pfx, output_path_pfx, pre):
  tokens = pandas.read_csv(pre + f"clean/labels.csv")
  losses_arr = []

  files = []

  for index, row in tokens.iterrows():
    id = row['ID']
    target_path = f'{input_path_pfx}/{id}.wav'
    target_text = row['wrd']

    if (not os.path.exists(target_path)):
        continue

    wav_init = getPreProcessedInput(target_path)

    noised_data,noise,losses,max_noise,= pgd(target_text, wav_init, fwd_pass, loss_func, nb_iter=200)

    losses_arr.append(losses)
    files.append(noised_data)

    filename = f'{output_path_pfx}/{id}.wav'
    sf.write(filename, max_noise.clone().squeeze().detach().numpy().astype(np.float32), 16000)

  arr = torch.tensor(losses_arr).clone().detach().numpy()
  arr = np.mean(arr,axis=0)
  plt.plot(arr)
  plt.xlabel('Num Iters')
  plt.ylabel(f'Loss ({loss_func.__name__})')
  plt.savefig(f'{loss_func.__name__}.png')

In [4]:
def compute_forward_pass_spectogram(input_sample,input_speech_text):
    gw = FwdPass(input_sample,input_speech_text,only_spectrogram=True)
    return gw.float()

def compute_forward_pass_waveform(input_sample,input_speech_text):
    gw = FwdPass(input_sample,input_speech_text,only_spectrogram=False)
    return gw.float()

### Generating attacks on the spectogram

In [None]:
pre = "/content/gdrive/MyDrive/Colab_Notebooks/mlsp-speech-noiser-2-master/"
genAttacks(l2loss, compute_forward_pass_spectogram, pre + 'clean/test',pre + 'attacks/entropy', pre)
genAttacks(crossEntropy, compute_forward_pass_spectogram, pre + 'clean/test',pre + 'attacks/l2', pre)