## Setup

In [None]:
# Clone the repo including pretrained models
!git clone https://github.com/as-ideas/ForwardTacotron.git

In [None]:
# Install requirements
%cd ForwardTacotron/
!apt-get install espeak
!pip install -r requirements.txt

In [None]:
# Get pretrained models
!wget https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/ForwardTacotron/forward_step90k.pt
!wget https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/ForwardTacotron/fastpitch_step200k.pt

In [None]:
# Load pretrained models
from notebook_utils.synthesize import Synthesizer
import IPython.display as ipd
synth_forward = Synthesizer(tts_path='forward_step90k.pt')
synth_fastpitch = Synthesizer(tts_path='fastpitch_step200k.pt')

##Synthesize

In [None]:
def extract_paragraph(file_path, paragraph_number):
    with open(file_path, 'r', encoding='latin-1') as file:
        paragraphs = file.read().split('\n\n')
        if paragraph_number <= len(paragraphs):
            return paragraphs[paragraph_number - 1]
        else:
            return "Paragraph not found"

In [None]:
input_text = extract_paragraph('/content/panchatantra.txt', 1)

In [None]:
import numpy as np
import re

# Split the input text into sentences
sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', input_text)

# Calculate the number of sentences per part
total_sentences = len(sentences)
sentences_per_part = total_sentences // 15

# Split the sentences into parts
sentence_parts = [sentences[i:i+sentences_per_part] for i in range(0, total_sentences, sentences_per_part)]

# Initialize a list to store the audio segments
audio_segments = []

# Iterate over each part and synthesize the audio
pitch_func = lambda x: x * 1.5
for part in sentence_parts:
    part_text = ' '.join(part)  # Join the sentences into a single text for synthesis
    wav = synth_fastpitch(part_text, voc_model='melgan', alpha=1, pitch_function=pitch_func)
    audio_segments.append(wav)

# Concatenate the audio segments
final_audio = np.concatenate(audio_segments)

# Play the final audio
ipd.Audio(final_audio, rate=synth_fastpitch.dsp.sample_rate)