In [None]:
# === CONFIGURA√á√ÉO DO MODO DE TREINAMENTO ===
DEBUG_MODE = True # @param {type:"boolean"}
USE_DRIVE = True # @param {type:"boolean"}
MOUNT_DRIVE = True # @param {type:"boolean"}

# Configura√ß√£o baseada no modo
if DEBUG_MODE:
    print("üêõ MODO DEBUG: Treinamento r√°pido para teste")
    EPOCHS = 3
    MAX_SAMPLES = 100
    CONFIG_NAME = "vits2_english_debug"
else:
    print("üöÄ MODO PRODU√á√ÉO: Treinamento completo")
    EPOCHS = 200
    MAX_SAMPLES = None
    CONFIG_NAME = "vits2_english_production"

print(f"üìä Configura√ß√£o: {CONFIG_NAME}")
print(f"üîÑ √âpocas: {EPOCHS}")
print(f"üìà Amostras: {MAX_SAMPLES if MAX_SAMPLES else 'Todas (~22.910)'}")


In [None]:
# === CONFIGURA√á√ÉO DO SISTEMA E CLONAGEM ===
import subprocess
import sys
import os

def run_command(cmd, description):
    """Executa comando com output em tempo real."""
    print(f"üîÑ {description}")
    process = subprocess.Popen(
        cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        universal_newlines=True, bufsize=1
    )

    for line in process.stdout:
        print(line.rstrip())

    process.wait()
    if process.returncode != 0:
        raise RuntimeError(f"Comando falhou: {cmd}")
    print(f"‚úÖ {description} - Conclu√≠do\n")

# Montar Google Drive se necess√°rio
if MOUNT_DRIVE:
    from google.colab import drive
    drive.mount('/content/drive')

    if USE_DRIVE:
        drive_path = '/content/drive/MyDrive/ValeTTS'
        os.makedirs(drive_path, exist_ok=True)
        os.chdir(drive_path)
        print(f"üìÅ Diret√≥rio de trabalho: {drive_path}")

# Verificar GPU
run_command("nvidia-smi", "Verificando GPU dispon√≠vel")

# Clonar reposit√≥rio
if not os.path.exists('ValeTTS'):
    run_command(
        "git clone https://github.com/wallaceblaia/ValeTTS-Colab.git ValeTTS",
        "Clonando reposit√≥rio ValeTTS"
    )
else:
    print("üìÅ Reposit√≥rio j√° existe")

os.chdir('ValeTTS')
run_command("git pull origin main", "Atualizando reposit√≥rio")
print(f"üìç Diret√≥rio atual: {os.getcwd()}")


In [None]:
# === INSTALA√á√ÉO DE DEPEND√äNCIAS ===
# Instalar depend√™ncias do sistema
run_command(
    "apt-get update && apt-get install -y espeak espeak-data libespeak1 libespeak-dev ffmpeg",
    "Instalando depend√™ncias do sistema"
)

# Instalar depend√™ncias Python
run_command(
    "pip install -e .",
    "Instalando ValeTTS em modo desenvolvimento"
)

# Verificar instala√ß√£o
run_command(
    "python -c 'import valetts; print(f\"‚úÖ ValeTTS instalado: {valetts.__version__}\")'",
    "Verificando instala√ß√£o"
)


In [None]:
# === DOWNLOAD DO DATASET ===
dataset_path = "data/generated/Dataset-Unificado"
if not os.path.exists(dataset_path):
    print("üì• Dataset n√£o encontrado localmente")
    print("üí° Certifique-se de ter o dataset dispon√≠vel em:")
    print("   - Google Drive: /content/drive/MyDrive/ValeTTS/data/generated/Dataset-Unificado")
    print("   - Ou fa√ßa upload manual do dataset")

    # Verificar se existe no Drive
    drive_dataset = "/content/drive/MyDrive/ValeTTS/data/generated/Dataset-Unificado"
    if os.path.exists(drive_dataset):
        print(f"‚úÖ Dataset encontrado no Drive: {drive_dataset}")
        print("üîó Criando link simb√≥lico...")
        os.makedirs("data/generated", exist_ok=True)
        os.symlink(drive_dataset, dataset_path)
    else:
        print("‚ö†Ô∏è Dataset n√£o encontrado!")
        print("üìã Use um dos m√©todos abaixo:")
        print("1. Upload manual via interface do Colab")
        print("2. Download direto (substitua o link):")
        print("   !gdown --folder 'LINK_DO_GOOGLE_DRIVE_AQUI'")
else:
    print("‚úÖ Dataset j√° existe")

# Verificar dataset
metadata_file = f"{dataset_path}/metadata.csv"
if os.path.exists(metadata_file):
    import pandas as pd
    df = pd.read_csv(metadata_file)
    print(f"üìä Dataset carregado: {len(df)} amostras")
    print(f"üéôÔ∏è Falantes √∫nicos: {df['speaker_id'].nunique()}")
    print(f"üåç Idiomas: {df['locale'].unique() if 'locale' in df.columns else 'N/A'}")

    # Mostrar amostra dos dados
    print("\nüìã Amostra dos dados:")
    print(df.head(3)[['speaker_id', 'text', 'locale']].to_string())
else:
    print("‚ùå Arquivo metadata.csv n√£o encontrado!")
    print("üí° Verifique se o dataset foi baixado corretamente")


In [None]:
# === CONFIGURA√á√ÉO DIN√ÇMICA DO MODELO ===
import yaml

# Configura√ß√£o base do modelo com dimens√µes compat√≠veis
if DEBUG_MODE:
    # Debug: dimens√µes reduzidas mas compat√≠veis
    model_config = {
        "text_encoder_hidden_dim": 128,
        "latent_dim": 128,
        "speaker_embedding_dim": 256,  # Igual ao generator_initial_channels
        "generator_initial_channels": 256,
        "decoder_hidden_dim": 256,  # Igual ao generator_initial_channels
    }
else:
    # Produ√ß√£o: dimens√µes completas
    model_config = {
        "text_encoder_hidden_dim": 192,
        "latent_dim": 192,
        "speaker_embedding_dim": 512,  # Igual ao generator_initial_channels
        "generator_initial_channels": 512,
        "decoder_hidden_dim": 512,  # Igual ao generator_initial_channels
    }

# Configura√ß√£o completa do YAML
config = {
    "model": {
        "name": "VITS2",
        "mel_channels": 80,
        "n_speakers": 52,
        "text_processor": "english",
        "inference_only": False,
        **model_config
    },
    "training": {
        "learning_rate": 2.0e-4,
        "batch_size": 16,
        "max_epochs": EPOCHS,
        "accumulate_grad_batches": 1,
        "max_grad_norm": 1.0,
        "mel_loss_weight": 45.0,
        "kl_loss_weight": 1.0,
        "adv_loss_weight": 1.0,
        "fm_loss_weight": 2.0,
        "duration_loss_weight": 1.0,
        "use_amp": True,
        "gradient_clip_val": 1.0,
        "discriminator_update_frequency": 1,
        "scheduler": {
            "name": "ReduceLROnPlateau",
            "mode": "min",
            "factor": 0.5,
            "patience": 15 if not DEBUG_MODE else 5,
            "min_lr": 1.0e-6
        }
    },
    "data": {
        "dataset_format": "valetts",
        "data_dir": "data/generated/Dataset-Unificado",
        "metadata_file": "data/generated/Dataset-Unificado/metadata.csv",
        "language": "en-us",
        "locale_column": "locale",
        "text_processor": {
            "use_phonemes": True,
            "normalize_numbers": True,
            "normalize_whitespace": True,
            "lowercase": True
        },
        "sample_rate": 22050,
        "n_mels": 80,
        "n_fft": 1024,
        "hop_length": 256,
        "win_length": 1024,
        "num_workers": 4,
        "pin_memory": True,
        "persistent_workers": True,
        "use_augmentation": True,
        "volume_range": [0.9, 1.1],
        "pitch_range": [-1, 1]
    },
    "logging": {
        "log_dir": "logs",
        "experiment_name": f"vits2_english_{CONFIG_NAME.split('_')[-1]}",
        "checkpoint": {
            "dirpath": f"checkpoints/vits2_english_{CONFIG_NAME.split('_')[-1]}",
            "filename": f"vits2_english-{{epoch:03d}}-{{epoch/val_loss_total:.3f}}",
            "monitor": "epoch/val_loss_total",
            "mode": "min",
            "save_top_k": 3 if DEBUG_MODE else 5,
            "save_last": True,
            "every_n_epochs": 1 if DEBUG_MODE else 10
        },
        "early_stopping": {
            "monitor": "epoch/val_loss_total",
            "mode": "min",
            "patience": 10 if DEBUG_MODE else 30,
            "min_delta": 0.001
        },
        "tensorboard": {
            "save_dir": "logs/tensorboard",
            "name": f"vits2_english_{CONFIG_NAME.split('_')[-1]}"
        }
    },
    "hardware": {
        "accelerator": "gpu",
        "devices": 1,
        "precision": "16-mixed",
        "strategy": "auto"
    },
    "validation": {
        "val_check_interval": 1.0,
        "generate_samples": True,
        "sample_every_n_epochs": 1 if DEBUG_MODE else 10,
        "limit_val_batches": 1.0
    },
    "dataset_config": {
        "expected_locale": "en",
        "validate_files": True,
        "cache_preprocessing": True
    },
    "llm_monitor": {
        "enabled": False
    }
}

# Adicionar limita√ß√£o de amostras para debug
if DEBUG_MODE:
    config["data"]["max_samples_debug"] = MAX_SAMPLES

# Criar diret√≥rios necess√°rios
os.makedirs("configs/training", exist_ok=True)
os.makedirs(config["logging"]["checkpoint"]["dirpath"], exist_ok=True)
os.makedirs("logs/tensorboard", exist_ok=True)

# Salvar configura√ß√£o
config_path = f"configs/training/{CONFIG_NAME}.yaml"
with open(config_path, 'w') as f:
    yaml.dump(config, f, default_flow_style=False, indent=2)

print(f"‚úÖ Configura√ß√£o criada: {config_path}")
print(f"üéØ Modo: {CONFIG_NAME.upper()}")
print(f"üìä √âpocas: {EPOCHS}")
print(f"üé§ Falantes: {config['model']['n_speakers']}")
print(f"üíæ Dimens√µes do modelo (compat√≠veis):")
print(f"   - Hidden: {config['model']['text_encoder_hidden_dim']}")
print(f"   - Latent: {config['model']['latent_dim']}")
print(f"   - Speaker: {config['model']['speaker_embedding_dim']}")
print(f"   - Generator: {config['model']['generator_initial_channels']}")
print(f"   - Decoder: {config['model']['decoder_hidden_dim']}")

# Verificar compatibilidade de dimens√µes
if config['model']['speaker_embedding_dim'] == config['model']['decoder_hidden_dim']:
    print("‚úÖ Dimens√µes compat√≠veis - Sem erro de tensor!")
else:
    print("‚ùå AVISO: Dimens√µes incompat√≠veis detectadas!")


In [None]:
# === INICIAR TREINAMENTO ===
print(f"üöÄ Iniciando treinamento VITS2 - Modo: {CONFIG_NAME.upper()}")
print(f"üìÅ Configura√ß√£o: {config_path}")
print(f"‚è±Ô∏è Estimativa: {'~5 min' if DEBUG_MODE else '~8-12 horas'}")
print("\n" + "="*50)

# Executar treinamento
cmd = f"python scripts/train_vits2.py --config {config_path} --disable-llm"
run_command(cmd, f"Treinamento VITS2 {CONFIG_NAME.upper()}")

print("\n" + "="*50)
print("üéâ TREINAMENTO CONCLU√çDO COM SUCESSO!")
print(f"üìÅ Checkpoints salvos em: {config['logging']['checkpoint']['dirpath']}/")
print(f"üìä Logs dispon√≠veis em: logs/tensorboard/")


In [None]:
# === DOWNLOAD DOS RESULTADOS ===
import zipfile
from datetime import datetime
import glob

# Criar arquivo ZIP com resultados
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
zip_filename = f"valetts_vits2_english_{CONFIG_NAME.split('_')[-1]}_{timestamp}.zip"

print(f"üì¶ Criando arquivo ZIP: {zip_filename}")

with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    # Adicionar checkpoints
    checkpoint_dir = config["logging"]["checkpoint"]["dirpath"]
    if os.path.exists(checkpoint_dir):
        for root, dirs, files in os.walk(checkpoint_dir):
            for file in files:
                if file.endswith('.ckpt'):
                    file_path = os.path.join(root, file)
                    zipf.write(file_path, os.path.relpath(file_path, '.'))

    # Adicionar configura√ß√£o
    zipf.write(config_path, config_path)

    # Adicionar amostras geradas
    if os.path.exists("samples"):
        for root, dirs, files in os.walk("samples"):
            for file in files:
                if file.endswith('.wav'):
                    file_path = os.path.join(root, file)
                    zipf.write(file_path, os.path.relpath(file_path, '.'))

    # Adicionar logs principais
    if os.path.exists("logs/training.log"):
        zipf.write("logs/training.log", "logs/training.log")

print(f"‚úÖ Arquivo criado: {zip_filename}")
print(f"üíæ Tamanho: {os.path.getsize(zip_filename) / 1024 / 1024:.1f} MB")

# Download no Colab
if 'google.colab' in sys.modules:
    from google.colab import files
    print("‚¨áÔ∏è Iniciando download...")
    files.download(zip_filename)
    print("‚úÖ Download conclu√≠do!")

print("\nüéØ TREINAMENTO FINALIZADO!")
print(f"üìä Modo: {CONFIG_NAME.upper()}")
print(f"‚è±Ô∏è Status: {'Teste conclu√≠do' if DEBUG_MODE else 'Produ√ß√£o conclu√≠da'}")
print(f"üì¶ Resultados salvos em: {zip_filename}")

# Encontrar e mostrar checkpoints dispon√≠veis
checkpoint_dir = config["logging"]["checkpoint"]["dirpath"]
checkpoints = glob.glob(f"{checkpoint_dir}/*.ckpt")
if checkpoints:
    print(f"\nüìÅ Checkpoints dispon√≠veis ({len(checkpoints)}):")
    for ckpt in sorted(checkpoints)[-3:]:  # Mostrar √∫ltimos 3
        size_mb = os.path.getsize(ckpt) / 1024 / 1024
        print(f"   üì¶ {os.path.basename(ckpt)} ({size_mb:.1f} MB)")
else:
    print("\n‚ö†Ô∏è Nenhum checkpoint encontrado")
