# Generate

### Init model

In [None]:
import os
import torch
import torchaudio
from TTS.tts.configs.xtts_config import XttsConfig
from TTS.tts.models.xtts import Xtts
from subprocess import getoutput
from IPython.display import Audio


# Add here the xtts_config path
CONFIG_PATH = "xttsv2_checkpoint/tyler1_xttsv2-February-28-2024_08+23AM-2b31060/config.json"
# Add here the vocab file that you have used to train the model
TOKENIZER_PATH = "xttsv2_checkpoint/XTTS_v2.0_original_model_files/vocab.json"
# Add here the checkpoint that you want to do inference with
XTTS_CHECKPOINT = "xttsv2_checkpoint/tyler1_xttsv2-February-28-2024_08+23AM-2b31060/best_model.pth"

# List of all wavs for speaker reference
wavs = getoutput("ls data/wavs/*.wav").split("\n")
# Add here the speaker reference
SPEAKER_REFERENCE = ["data/wavs/" + wav for wav in os.listdir('data/wavs/') if "wav" in wav]

# output wav path
OUTPUT_WAV_PATH = "generated_audio/xtts_output_es.wav"


config = XttsConfig()
config.load_json(CONFIG_PATH)
model = Xtts.init_from_config(config)
model.load_checkpoint(config, checkpoint_path=XTTS_CHECKPOINT, vocab_path=TOKENIZER_PATH, use_deepspeed=False)
model.cuda()

gpt_cond_latent, speaker_embedding = model.get_conditioning_latents(audio_path= SPEAKER_REFERENCE)

### Output

In [None]:
import time

# output wav path
OUTPUT_WAV_PATH = "generated_audio/xtts_output_de.wav"

start = time.time()
out = model.inference(
    # "So he starts off level one just doing this. Like oh he's gonna like bro he losing, doesn't hit a single thing. Look at it like- what the fuck. Like bro okay whatever. Watch this top dive bro. I solo made Vayne one HP, right? Just wait out and fucking ghost, you twat.",
    # "en",
    # "Esta es mi voz en español. ¡Es Tyler1, bebé! Joder, si, hablo español.",
    # "es",
    "Hallo Isi, hier ist Tyler1 und das ist meine Stimme auf Deutsch. Willlkommen in Manila. Ich hoffe du hast eine gute Zeit und wirst viel Spass haben. Vergiss nicht deinen Lümmel einzutüte",
    "de",
    # "C'est ma voix en français. C'est Tyler 1 bébé ! Putain ouais je parle français. J'ai mangé un croissant et une baguette today",
    # "fr",
    gpt_cond_latent,
    speaker_embedding,
    temperature=0.7, # Add custom parameters here
)
torchaudio.save(OUTPUT_WAV_PATH, torch.tensor(out["wav"]).unsqueeze(0), 24000)

end = time.time()
print(end-start)

Audio(OUTPUT_WAV_PATH)

# Push to S3

In [4]:
import boto3

bucket_name = "audio-messages-bucket"
file_path = "generated_audio/bass_boosted/xtts_output-bass_boost.wav"
key =  "username/tyler1/game_id/timestamp.wav"

url = "https://d1d78cjctwypjb.cloudfront.net/" + key 

In [5]:
import boto3
from botocore.exceptions import NoCredentialsError

def upload_file_to_s3(file_path, bucket_name, s3_key):
    """
    Upload a file to an S3 bucket

    :param file_path: Path to file to upload
    :param bucket_name: Name of the bucket to upload to
    :param s3_key: S3 object name. If not specified, file_path is used
    :return: True if file was uploaded, else False
    """
    # If S3 object name is not specified, use the file name
    if s3_key is None:
        s3_key = file_path.split('/')[-1]

    # Create an S3 client
    s3_client = boto3.client('s3')
    try:
        # Upload the file
        s3_client.upload_file(file_path, bucket_name, s3_key)
        print(f"File {file_path} uploaded to {bucket_name}/{s3_key}.")
        return True
    except NoCredentialsError:
        print("Credentials not available or invalid.")
        return False
    except Exception as e:
        print(f"Failed to upload file: {e}")
        return False

In [6]:
upload_file_to_s3(file_path, bucket_name, key)

File generated_audio/bass_boosted/xtts_output-bass_boost.wav uploaded to audio-messages-bucket/username/tyler1/game_id/timestamp.wav.


True