# Dependencies and Imports

In [None]:
#@title Install dependencies

!pip install -q torchaudio omegaconf

import torch
from pprint import pprint
from omegaconf import OmegaConf
from IPython.display import Audio, display

torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-models/master/models.yml',
                               'latest_silero_models.yml',
                               progress=False)
models = OmegaConf.load('latest_silero_models.yml')

# Colab Demo

In [None]:
# see latest avaiable models
available_languages = list(models.tts_models.keys())
print(f'Available languages {available_languages}')

for lang in available_languages:
    speakers = list(models.tts_models.get(lang).keys())
    print(f'Available speakers for {lang}: {speakers}')

In [None]:
import torch

language = 'ru'
speaker = 'kseniya_16khz'
device = torch.device('cpu')
model, symbols, sample_rate, example_text, apply_tts = torch.hub.load(repo_or_dir='snakers4/silero-models',
                                                                      model='silero_tts',
                                                                      language=language,
                                                                      speaker=speaker)
model = model.to(device)  # gpu or cpu

In [None]:
audio = apply_tts(texts=[example_text],
                  model=model,
                  sample_rate=sample_rate,
                  symbols=symbols,
                  device=device)

print(example_text)
display(Audio(audio[0], rate=sample_rate))

## Enhance synthesis with logmmse

In [None]:
!pip install -q logmmse

You can try to enhance synthesized audio with logmmse algorithm, though it could demand parameters tuning for the particular speaker.

In [None]:
import numpy as np
from logmmse import logmmse

enhanced = logmmse(np.array(audio[0]), sample_rate, output_file=None, initial_noise=1, window_size=160, noise_threshold=0.15)
display(Audio(enhanced, rate=sample_rate))

# Minimal Example to Run Locally 

We have received a lot of questions regarding the packaging requirements and utils from the `silero-models` repo from people trying to run models locally standalone (on their desktop for example). The project is packaged using `torch.hub` utils which basically are in the `hubconf.py` [file](https://github.com/snakers4/silero-models/blob/master/hubconf.py) and `tts_utils.py` [file](https://github.com/snakers4/silero-models/blob/master/tts_utils.py).

For some reason this is very difficult to understand for some users. Also the `hubconf.py` contains some dependecies, which may not be necessarily required when running TTS for example on your desktop, i.e. `torchaudio` and `omegaconf`.

The following example is a minimal standalone example for such a use-case. It has very little external dependecies (essentially just `torch`, the rest is just standard python library). It basically does the following:

- Loads one of the 16 kHz models (I just chose one randomly), listed on the models.yml [file](https://github.com/snakers4/silero-models/blob/master/models.yml) locally and uses it as cache;
- The `symbols` are taken from the same  models.yml file;
- `apply_tts` is just one of the utils we provide in the project;
- The rest is self-explanatory;

In order to use this example, you will need to handle the resulting audios by yourself.

In [None]:
import os
import wave
import torch
import contextlib


torch.set_grad_enabled(False)
device = torch.device('cpu')
torch.set_num_threads(4)  # safe optimal value, i.e. 2 CPU cores, does not work properly in colab
symbols = '_~абвгдеёжзийклмнопрстуфхцчшщъыьэюя +.,!?…:;–'
local_file = 'model.jit'


if not os.path.isfile(local_file):
  torch.hub.download_url_to_file('https://models.silero.ai/models/tts/ru/v1_kseniya_16000.jit',
                                 local_file)

if not os.path.isfile('tts_utils.py'):
  torch.hub.download_url_to_file('https://raw.githubusercontent.com/snakers4/silero-models/master/tts_utils.py',
                                 'tts_utils.py')
  from tts_utils import apply_tts  # modify these utils and use them your project
  

model = torch.jit.load('model.jit',
                       map_location=device)
model.eval()
example_batch = ['В н+едрах т+ундры в+ыдры в г+етрах т+ырят в в+ёдра +ядра к+едров.',
                 'К+отики - +это ж+идкость!',
                 'М+ама М+илу м+ыла с м+ылом.']
sample_rate = 16000
model = model.to(device)

audio = apply_tts(texts=example_batch,
                  model=model,
                  sample_rate=sample_rate,
                  symbols=symbols,
                  device=device)

def write_wave(path, audio, sample_rate):
    """Writes a .wav file.
    Takes path, PCM audio data, and sample rate.
    """
    with contextlib.closing(wave.open(path, 'wb')) as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(sample_rate)
        wf.writeframes(audio)

for i, _audio in enumerate(audio):
  write_wave(path=f'test_{str(i).zfill(3)}.wav',
             audio=(audio[i] * 32767).numpy().astype('int16'),
             sample_rate=16000)

In [None]:
from IPython.display import Audio, display
display(Audio('test_000.wav', rate=16000))
display(Audio('test_001.wav', rate=16000))
display(Audio('test_002.wav', rate=16000))