In [ ]:
# === CONFIGURA√á√ÉO DA API OPENROUTER (OPCIONAL) ===
# Monitor LLM para otimiza√ß√£o inteligente do treinamento

import os
import getpass

# Configura√ß√£o da API OpenRouter
ENABLE_LLM_MONITOR = True # @param {type:"boolean"}

if ENABLE_LLM_MONITOR:
    print("ü§ñ Configurando Monitor LLM com OpenRouter...")
    print("üìã O monitor LLM analisa o progresso do treinamento e sugere ajustes autom√°ticos")
    print("üí° Opcional: deixe em branco para desabilitar")
    
    # Input da API key (campo seguro)
    OPENROUTER_API_KEY = getpass.getpass("üîë Digite sua API Key do OpenRouter (ou Enter para pular): ")
    
    if OPENROUTER_API_KEY and OPENROUTER_API_KEY.strip():
        # Configurar vari√°vel de ambiente
        os.environ['OPENROUTER_API_KEY'] = OPENROUTER_API_KEY.strip()
        
        # Testar conex√£o
        try:
            import requests
            test_url = "https://openrouter.ai/api/v1/models"
            headers = {
                "Authorization": f"Bearer {OPENROUTER_API_KEY.strip()}",
                "Content-Type": "application/json"
            }
            
            response = requests.get(test_url, headers=headers, timeout=10)
            if response.status_code == 200:
                print("‚úÖ API Key v√°lida - Monitor LLM habilitado")
                print("üìä Funcionalidades ativas:")
                print("   - An√°lise autom√°tica a cada 10 √©pocas")
                print("   - Sugest√µes de ajuste de learning rate")
                print("   - Detec√ß√£o de problemas de converg√™ncia")
                print("   - Relat√≥rios detalhados de progresso")
                
                # Mostrar modelos dispon√≠veis
                models = response.json().get('data', [])
                claude_models = [m for m in models if 'claude' in m.get('id', '').lower()]
                if claude_models:
                    print(f"üß† Modelo recomendado: {claude_models[0].get('id', 'claude-3-5-sonnet')}")
            else:
                print(f"‚ö†Ô∏è Erro na valida√ß√£o da API: {response.status_code}")
                print("üîß Monitor LLM ser√° desabilitado para este treinamento")
                ENABLE_LLM_MONITOR = False
                
        except Exception as e:
            print(f"‚ö†Ô∏è Erro ao testar API: {e}")
            print("üîß Monitor LLM ser√° desabilitado para este treinamento")
            ENABLE_LLM_MONITOR = False
    else:
        print("‚ÑπÔ∏è API Key n√£o fornecida - Monitor LLM desabilitado")
        ENABLE_LLM_MONITOR = False
else:
    print("üîß Monitor LLM desabilitado por configura√ß√£o")
    ENABLE_LLM_MONITOR = False

# Salvar configura√ß√£o para uso posterior
LLM_CONFIG = {
    'enabled': ENABLE_LLM_MONITOR,
    'provider': 'openrouter',
    'model': 'anthropic/claude-3-5-sonnet-20241022',
    'monitor_every_epochs': 10,
    'api_key_configured': bool(os.environ.get('OPENROUTER_API_KEY'))
}

print(f"\nüéØ Configura√ß√£o LLM: {'‚úÖ Ativo' if ENABLE_LLM_MONITOR else '‚ùå Desabilitado'}")

if not ENABLE_LLM_MONITOR:
    print("\nüí° Para habilitar o Monitor LLM:")
    print("1. Crie uma conta em https://openrouter.ai")
    print("2. Obtenha sua API Key")
    print("3. Execute esta c√©lula novamente e forne√ßa a key")
    print("4. O monitor ajudar√° a otimizar seu treinamento automaticamente")

In [ ]:
# === CONFIGURA√á√ÉO DO MODO DE TREINAMENTO ===
# Configura√ß√£o otimizada para A100 40GB VRAM
DEBUG_MODE = False # @param {type:"boolean"}
USE_DRIVE = True # @param {type:"boolean"}
MOUNT_DRIVE = True # @param {type:"boolean"}

# Detectar GPU automaticamente e ajustar configura√ß√£o
import subprocess
def get_gpu_info():
    try:
        result = subprocess.run(['nvidia-smi', '--query-gpu=name,memory.total', '--format=csv,noheader,nounits'], 
                              capture_output=True, text=True)
        if result.returncode == 0:
            gpu_info = result.stdout.strip().split('\n')[0]
            gpu_name, gpu_memory = gpu_info.split(', ')
            return gpu_name.strip(), int(gpu_memory.strip())
    except:
        pass
    return "Unknown", 0

gpu_name, gpu_memory = get_gpu_info()
print(f"üéÆ GPU Detectada: {gpu_name}")
print(f"üíæ VRAM: {gpu_memory} MB")

# Configura√ß√£o autom√°tica baseada na GPU
if "A100" in gpu_name and gpu_memory >= 40000:
    print("üöÄ MODO A100: Configura√ß√£o de alta performance")
    BATCH_SIZE = 32
    CONFIG_NAME = "vits2_english_a100_optimized"
    EXPECTED_PERFORMANCE = "5-8 it/s"
    ESTIMATED_TIME = "4-6 horas"
elif "V100" in gpu_name:
    print("‚ö° MODO V100: Configura√ß√£o otimizada")
    BATCH_SIZE = 24
    CONFIG_NAME = "vits2_english_production"
    EXPECTED_PERFORMANCE = "3-5 it/s"
    ESTIMATED_TIME = "6-8 horas"
elif "T4" in gpu_name:
    print("üì± MODO T4: Configura√ß√£o conservativa")
    BATCH_SIZE = 16
    CONFIG_NAME = "vits2_english_production"
    EXPECTED_PERFORMANCE = "1-2 it/s"
    ESTIMATED_TIME = "12-15 horas"
else:
    print("‚ö†Ô∏è GPU n√£o detectada, usando configura√ß√£o padr√£o")
    BATCH_SIZE = 16
    CONFIG_NAME = "vits2_english_production"
    EXPECTED_PERFORMANCE = "1-2 it/s"
    ESTIMATED_TIME = "12-15 horas"

# Configura√ß√£o do treinamento
if DEBUG_MODE:
    print("üêõ MODO DEBUG: Treinamento r√°pido para teste")
    EPOCHS = 3
    MAX_SAMPLES = 100
    CONFIG_NAME = "vits2_english_debug"
    BATCH_SIZE = min(BATCH_SIZE, 8)  # Reduzido para debug
else:
    print("üöÄ MODO PRODU√á√ÉO: Treinamento completo")
    EPOCHS = 200
    MAX_SAMPLES = None

print(f"üìä Configura√ß√£o: {CONFIG_NAME}")
print(f"üîÑ √âpocas: {EPOCHS}")
print(f"üì¶ Batch Size: {BATCH_SIZE}")
print(f"üìà Amostras: {MAX_SAMPLES if MAX_SAMPLES else 'Todas (22.910)'}")
print(f"‚ö° Performance esperada: {EXPECTED_PERFORMANCE}")
print(f"‚è±Ô∏è Tempo estimado: {ESTIMATED_TIME}")

In [None]:
# === CONFIGURA√á√ÉO DO SISTEMA E CLONAGEM ===
import subprocess
import sys
import os

def run_command(cmd, description):
    """Executa comando com output em tempo real."""
    print(f"üîÑ {description}")
    process = subprocess.Popen(
        cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
        universal_newlines=True, bufsize=1
    )

    for line in process.stdout:
        print(line.rstrip())

    process.wait()
    if process.returncode != 0:
        raise RuntimeError(f"Comando falhou: {cmd}")
    print(f"‚úÖ {description} - Conclu√≠do\n")

# Montar Google Drive se necess√°rio
if MOUNT_DRIVE:
    from google.colab import drive
    drive.mount('/content/drive')

    if USE_DRIVE:
        drive_path = '/content/drive/MyDrive/ValeTTS'
        os.makedirs(drive_path, exist_ok=True)
        os.chdir(drive_path)
        print(f"üìÅ Diret√≥rio de trabalho: {drive_path}")

# Verificar GPU
run_command("nvidia-smi", "Verificando GPU dispon√≠vel")

# Clonar reposit√≥rio
if not os.path.exists('ValeTTS'):
    run_command(
        "git clone https://github.com/wallaceblaia/ValeTTS-Colab.git ValeTTS",
        "Clonando reposit√≥rio ValeTTS"
    )
else:
    print("üìÅ Reposit√≥rio j√° existe")

os.chdir('ValeTTS')
run_command("git pull origin main", "Atualizando reposit√≥rio")
print(f"üìç Diret√≥rio atual: {os.getcwd()}")


In [ ]:
# === INSTALA√á√ÉO DE DEPEND√äNCIAS ESPEC√çFICAS PARA INGL√äS ===

print("üîß Instalando depend√™ncias do sistema para ingl√™s...")

# Instalar depend√™ncias do sistema para ingl√™s
system_deps = [
    "apt-get update",
    "apt-get install -y espeak espeak-data libespeak1 libespeak-dev",
    "apt-get install -y ffmpeg sox libsox-fmt-all",
    "apt-get install -y language-pack-en",  # Suporte espec√≠fico para ingl√™s
]

for cmd in system_deps:
    run_command(cmd, f"Sistema: {cmd.split()[-1]}")

print("üì¶ Instalando depend√™ncias Python...")

# Instalar phonemizer espec√≠fico para ingl√™s
python_deps = [
    "pip install phonemizer==3.3.0",  # Vers√£o espec√≠fica que funciona bem
    "pip install espeak-python",      # Wrapper para espeak
    "pip install num2words",          # Convers√£o n√∫meros para ingl√™s
    "pip install inflect",            # Pluraliza√ß√£o e ordinais em ingl√™s
]

for cmd in python_deps:
    run_command(cmd, f"Python: {cmd.split()[-1]}")

# Instalar ValeTTS em modo desenvolvimento
run_command("pip install -e .", "Instalando ValeTTS em modo desenvolvimento")

# Verificar instala√ß√µes cr√≠ticas
print("üß™ Verificando instala√ß√µes...")

verification_commands = [
    ("python -c 'import phonemizer; print(f\"‚úÖ Phonemizer: {phonemizer.__version__}\")'", "Phonemizer"),
    ("python -c 'import espeak; print(\"‚úÖ ESpeak Python wrapper instalado\")'", "ESpeak wrapper"),
    ("espeak --version", "ESpeak sistema"),
    ("python -c 'import valetts; print(f\"‚úÖ ValeTTS instalado\")'", "ValeTTS"),
    ("python -c 'from valetts.data.preprocessing.text_en import EnglishTextPreprocessor; print(\"‚úÖ EnglishTextPreprocessor dispon√≠vel\")'", "Processador ingl√™s"),
]

for cmd, desc in verification_commands:
    try:
        run_command(cmd, f"Verificar {desc}")
    except Exception as e:
        print(f"‚ö†Ô∏è Aviso: {desc} - {e}")

# Testar processamento b√°sico de ingl√™s
print("üß™ Testando processamento de ingl√™s...")
test_code = '''
from valetts.data.preprocessing.text_en import EnglishTextPreprocessor

# Testar processador de ingl√™s
processor = EnglishTextPreprocessor(
    language="en-us",
    use_phonemes=True,
    normalize_numbers=True
)

# Teste b√°sico
test_text = "Hello world! This is a test with numbers 123 and Mr. Smith."
normalized = processor.normalize_text(test_text)
print(f"‚úÖ Texto normalizado: {normalized}")

# Teste phonemes
try:
    phonemes = processor.text_to_phonemes("hello world")
    print(f"‚úÖ Phonemes: {phonemes}")
except Exception as e:
    print(f"‚ö†Ô∏è Phonemes: {e}")

print("‚úÖ Processador de ingl√™s funcionando!")
'''

try:
    with open("test_english.py", "w") as f:
        f.write(test_code)
    run_command("python test_english.py", "Teste processador ingl√™s")
    run_command("rm test_english.py", "Limpeza")
except Exception as e:
    print(f"‚ö†Ô∏è Teste de ingl√™s falhou: {e}")

print("‚úÖ Todas as depend√™ncias para ingl√™s instaladas!")
print("üéØ Sistema pronto para treinamento VITS2 em ingl√™s")

In [None]:
# === DOWNLOAD DO DATASET ===
dataset_path = "data/generated/Dataset-Unificado"
if not os.path.exists(dataset_path):
    print("üì• Dataset n√£o encontrado localmente")
    print("üí° Certifique-se de ter o dataset dispon√≠vel em:")
    print("   - Google Drive: /content/drive/MyDrive/ValeTTS/data/generated/Dataset-Unificado")
    print("   - Ou fa√ßa upload manual do dataset")

    # Verificar se existe no Drive
    drive_dataset = "/content/drive/MyDrive/ValeTTS/data/generated/Dataset-Unificado"
    if os.path.exists(drive_dataset):
        print(f"‚úÖ Dataset encontrado no Drive: {drive_dataset}")
        print("üîó Criando link simb√≥lico...")
        os.makedirs("data/generated", exist_ok=True)
        os.symlink(drive_dataset, dataset_path)
    else:
        print("‚ö†Ô∏è Dataset n√£o encontrado!")
        print("üìã Use um dos m√©todos abaixo:")
        print("1. Upload manual via interface do Colab")
        print("2. Download direto (substitua o link):")
        print("   !gdown --folder 'LINK_DO_GOOGLE_DRIVE_AQUI'")
else:
    print("‚úÖ Dataset j√° existe")

# Verificar dataset
metadata_file = f"{dataset_path}/metadata.csv"
if os.path.exists(metadata_file):
    import pandas as pd
    df = pd.read_csv(metadata_file)
    print(f"üìä Dataset carregado: {len(df)} amostras")
    print(f"üéôÔ∏è Falantes √∫nicos: {df['speaker_id'].nunique()}")
    print(f"üåç Idiomas: {df['locale'].unique() if 'locale' in df.columns else 'N/A'}")

    # Mostrar amostra dos dados
    print("\nüìã Amostra dos dados:")
    print(df.head(3)[['speaker_id', 'text', 'locale']].to_string())
else:
    print("‚ùå Arquivo metadata.csv n√£o encontrado!")
    print("üí° Verifique se o dataset foi baixado corretamente")


In [ ]:
# === CONFIGURA√á√ÉO DIN√ÇMICA DO MODELO PARA INGL√äS ===
import yaml
import pandas as pd

# Primeiro, verificar o dataset
metadata_file = "data/generated/Dataset-Unificado/metadata.csv"
if os.path.exists(metadata_file):
    df = pd.read_csv(metadata_file)
    total_samples = len(df)
    unique_speakers = df['speaker_id'].nunique()
    locales = df['locale'].unique()
    
    print(f"üìä Dataset carregado:")
    print(f"   üìà Total: {total_samples:,} amostras")
    print(f"   üé§ Falantes: {unique_speakers}")
    print(f"   üåç Idiomas: {locales}")
    
    # Verificar se √© ingl√™s
    if 'en' not in locales:
        print("‚ö†Ô∏è AVISO: Dataset n√£o cont√©m ingl√™s!")
        print(f"   Idiomas encontrados: {locales}")
    else:
        english_samples = len(df[df['locale'] == 'en'])
        print(f"   ‚úÖ Amostras em ingl√™s: {english_samples:,}")
else:
    print("‚ùå Metadata n√£o encontrada!")
    total_samples = 22910  # Fallback
    unique_speakers = 52

# Usar configura√ß√£o espec√≠fica para A100 se dispon√≠vel
if CONFIG_NAME == "vits2_english_a100_optimized":
    # Carregar configura√ß√£o A100 otimizada
    config_path = f"configs/training/{CONFIG_NAME}.yaml"
    if os.path.exists(config_path):
        with open(config_path, 'r') as f:
            config = yaml.safe_load(f)
        print("‚úÖ Usando configura√ß√£o A100 otimizada!")
    else:
        print("‚ö†Ô∏è Configura√ß√£o A100 n√£o encontrada, criando...")
        # Configura√ß√£o A100 inline
        config = {
            "model": {
                "name": "VITS2",
                "text_encoder_hidden_dim": 512 if not DEBUG_MODE else 256,
                "latent_dim": 512 if not DEBUG_MODE else 256,
                "mel_channels": 80,
                "n_speakers": unique_speakers,
                "speaker_embedding_dim": 1024 if not DEBUG_MODE else 512,
                "generator_initial_channels": 1024 if not DEBUG_MODE else 512,
                "decoder_hidden_dim": 1024 if not DEBUG_MODE else 512,
                "text_processor": "english",
                "inference_only": False
            }
        }
else:
    # Configura√ß√£o para outras GPUs
    if DEBUG_MODE:
        model_config = {
            "text_encoder_hidden_dim": 128,
            "latent_dim": 128,
            "speaker_embedding_dim": 256,
            "generator_initial_channels": 256,
            "decoder_hidden_dim": 256,
        }
    else:
        model_config = {
            "text_encoder_hidden_dim": 192,
            "latent_dim": 192,
            "speaker_embedding_dim": 512,
            "generator_initial_channels": 512,
            "decoder_hidden_dim": 512,
        }
    
    config = {
        "model": {
            "name": "VITS2",
            "mel_channels": 80,
            "n_speakers": unique_speakers,
            "text_processor": "english",
            "inference_only": False,
            **model_config
        }
    }

# Adicionar configura√ß√µes de treinamento
config.update({
    "training": {
        "learning_rate": 2.0e-4,
        "batch_size": BATCH_SIZE,
        "max_epochs": EPOCHS,
        "accumulate_grad_batches": 1,
        "max_grad_norm": 1.0,
        "mel_loss_weight": 45.0,
        "kl_loss_weight": 1.0,
        "adv_loss_weight": 1.0,
        "fm_loss_weight": 2.0,
        "duration_loss_weight": 1.0,
        "use_amp": True,
        "gradient_clip_val": 1.0,
        "discriminator_update_frequency": 1,
        "scheduler": {
            "name": "ReduceLROnPlateau",
            "mode": "min",
            "factor": 0.5,
            "patience": 15 if not DEBUG_MODE else 5,
            "min_lr": 1.0e-6
        }
    },
    "data": {
        "dataset_format": "valetts",
        "data_dir": "data/generated/Dataset-Unificado",
        "metadata_file": "data/generated/Dataset-Unificado/metadata.csv",
        "language": "en",  # CR√çTICO: usar "en" para ingl√™s
        "locale_column": "locale",
        "text_processor": {
            "class": "EnglishTextPreprocessor",
            "use_phonemes": True,
            "normalize_numbers": True,
            "normalize_whitespace": True,
            "lowercase": True,
            "backend": "espeak",
            "language": "en-us"
        },
        "sample_rate": 22050,
        "n_mels": 80,
        "n_fft": 1024,
        "hop_length": 256,
        "win_length": 1024,
        "num_workers": 8 if "A100" in gpu_name else 4,
        "pin_memory": True,
        "persistent_workers": True,
        "use_augmentation": True,
        "volume_range": [0.9, 1.1],
        "pitch_range": [-1, 1]
    },
    "logging": {
        "log_dir": "logs",
        "experiment_name": f"vits2_english_{CONFIG_NAME.split('_')[-1]}",
        "checkpoint": {
            "dirpath": f"checkpoints/vits2_english_{CONFIG_NAME.split('_')[-1]}",
            "filename": f"vits2_english-{{epoch:03d}}-{{epoch/val_loss_total:.3f}}",
            "monitor": "epoch/val_loss_total",
            "mode": "min",
            "save_top_k": 3 if DEBUG_MODE else 5,
            "save_last": True,
            "every_n_epochs": 1 if DEBUG_MODE else 10
        },
        "early_stopping": {
            "monitor": "epoch/val_loss_total",
            "mode": "min",
            "patience": 10 if DEBUG_MODE else 30,
            "min_delta": 0.001
        },
        "tensorboard": {
            "save_dir": "logs/tensorboard",
            "name": f"vits2_english_{CONFIG_NAME.split('_')[-1]}"
        }
    },
    "hardware": {
        "accelerator": "gpu",
        "devices": 1,
        "precision": "16-mixed",
        "strategy": "auto",
        "benchmark": True if "A100" in gpu_name else False
    },
    "validation": {
        "val_check_interval": 1.0,
        "generate_samples": True,
        "sample_every_n_epochs": 1 if DEBUG_MODE else 10,
        "limit_val_batches": 1.0
    },
    "dataset_config": {
        "expected_locale": "en",  # CR√çTICO: valor correto para ingl√™s
        "validate_files": True,
        "cache_preprocessing": True,
        "audio_column": "audio_path",
        "text_column": "text_normalized",
        "speaker_column": "speaker_id",
        "locale_column": "locale"
    },
    # CONFIGURA√á√ÉO LLM INTEGRADA
    "llm_monitor": LLM_CONFIG
})

# Adicionar limita√ß√£o de amostras para debug
if DEBUG_MODE:
    config["data"]["max_samples_debug"] = MAX_SAMPLES

# Criar diret√≥rios necess√°rios
os.makedirs("configs/training", exist_ok=True)
os.makedirs(config["logging"]["checkpoint"]["dirpath"], exist_ok=True)
os.makedirs("logs/tensorboard", exist_ok=True)

# Salvar configura√ß√£o
config_path = f"configs/training/{CONFIG_NAME}.yaml"
with open(config_path, 'w') as f:
    yaml.dump(config, f, default_flow_style=False, indent=2)

print(f"‚úÖ Configura√ß√£o criada: {config_path}")
print(f"üéØ Modo: {CONFIG_NAME.upper()}")
print(f"üìä √âpocas: {EPOCHS}")
print(f"üì¶ Batch Size: {BATCH_SIZE}")
print(f"üé§ Falantes: {config['model']['n_speakers']}")
print(f"üåç Idioma: INGL√äS (locale='en')")
print(f"ü§ñ Monitor LLM: {'‚úÖ Ativo' if LLM_CONFIG['enabled'] else '‚ùå Desabilitado'}")
print(f"üíæ Dimens√µes do modelo:")
print(f"   - Hidden: {config['model']['text_encoder_hidden_dim']}")
print(f"   - Speaker: {config['model']['speaker_embedding_dim']}")
print(f"   - Generator: {config['model']['generator_initial_channels']}")
print(f"   - Decoder: {config['model']['decoder_hidden_dim']}")

# Verificar compatibilidade de dimens√µes CR√çTICA
if config['model']['speaker_embedding_dim'] == config['model']['decoder_hidden_dim']:
    print("‚úÖ Dimens√µes compat√≠veis - Sem erro de tensor!")
else:
    print("‚ùå AVISO: Dimens√µes incompat√≠veis detectadas!")
    print(f"   Speaker: {config['model']['speaker_embedding_dim']}")
    print(f"   Decoder: {config['model']['decoder_hidden_dim']}")

print(f"\nüöÄ Configura√ß√£o otimizada para {gpu_name} pronta!")

if LLM_CONFIG['enabled']:
    print("\nü§ñ Monitor LLM ativo - benef√≠cios:")
    print("   üìä An√°lise inteligente do progresso")
    print("   ‚öôÔ∏è Ajustes autom√°ticos de hiperpar√¢metros")
    print("   üéØ Detec√ß√£o precoce de problemas")
    print("   üìà Relat√≥rios detalhados de treinamento")

In [ ]:
# === INICIAR TREINAMENTO OTIMIZADO PARA INGL√äS ===
print(f"üöÄ Iniciando treinamento VITS2 - Modo: {CONFIG_NAME.upper()}")
print(f"üìÅ Configura√ß√£o: {config_path}")
print(f"üéÆ GPU: {gpu_name} ({gpu_memory} MB)")
print(f"üì¶ Batch Size: {BATCH_SIZE}")
print(f"‚è±Ô∏è Estimativa: {ESTIMATED_TIME}")
print(f"‚ö° Performance esperada: {EXPECTED_PERFORMANCE}")
print(f"ü§ñ Monitor LLM: {'‚úÖ Ativo' if LLM_CONFIG['enabled'] else '‚ùå Desabilitado'}")
print("\n" + "="*60)

# Verificar se temos script espec√≠fico para ingl√™s
english_script = "scripts/train_vits2_english.py"
if os.path.exists(english_script):
    print("‚úÖ Usando script espec√≠fico para ingl√™s")
    base_cmd = f"python {english_script} --config {config_path}"
else:
    print("‚ö†Ô∏è Script espec√≠fico n√£o encontrado, usando script padr√£o")
    base_cmd = f"python scripts/train_vits2.py --config {config_path}"

# Configurar comando baseado no LLM
if LLM_CONFIG['enabled']:
    print("ü§ñ Monitor LLM ativo - treinamento inteligente habilitado")
    cmd = base_cmd  # LLM j√° est√° na configura√ß√£o
else:
    print("üîß Monitor LLM desabilitado - usando modo padr√£o")
    cmd = f"{base_cmd} --disable-llm"

# Configurar CUDA para otimiza√ß√£o m√°xima
os.environ['CUDA_LAUNCH_BLOCKING'] = '0'  # Async CUDA
os.environ['TORCH_CUDNN_V8_API_ENABLED'] = '1'  # CuDNN v8
os.environ['TORCH_ALLOW_TF32_CUBLAS_OVERRIDE'] = '1'  # TF32 para A100

if "A100" in gpu_name:
    print("üöÄ Configura√ß√µes especiais para A100:")
    print("   - TF32 habilitado para m√°xima performance")
    print("   - CuDNN v8 API ativado")
    print("   - Tensor cores ativados automaticamente")
    os.environ['TORCH_ALLOW_TF32'] = '1'
    os.environ['TORCH_CUDNN_ALLOW_TF32'] = '1'

# Configura√ß√µes espec√≠ficas para Monitor LLM
if LLM_CONFIG['enabled']:
    print("\nü§ñ Configura√ß√µes do Monitor LLM:")
    print(f"   üß† Modelo: {LLM_CONFIG['model']}")
    print(f"   üìä An√°lise a cada: {LLM_CONFIG['monitor_every_epochs']} √©pocas")
    print(f"   üîÑ Provider: {LLM_CONFIG['provider']}")
    print("   üìà Funcionalidades ativas:")
    print("      - Otimiza√ß√£o autom√°tica de learning rate")
    print("      - Detec√ß√£o de problemas de converg√™ncia")
    print("      - Sugest√µes de ajustes de hiperpar√¢metros")
    print("      - Relat√≥rios detalhados de progresso")

# Executar treinamento com output em tempo real
print(f"\nüìù Comando: {cmd}")
print("üèÅ Iniciando treinamento...\n")

try:
    # Executar treinamento
    run_command(cmd, f"Treinamento VITS2 {CONFIG_NAME.upper()}")
    
    print("\n" + "="*60)
    print("üéâ TREINAMENTO CONCLU√çDO COM SUCESSO!")
    
    # Verificar checkpoints gerados
    checkpoint_dir = config["logging"]["checkpoint"]["dirpath"]
    if os.path.exists(checkpoint_dir):
        checkpoints = [f for f in os.listdir(checkpoint_dir) if f.endswith('.ckpt')]
        print(f"üìÅ Checkpoints gerados: {len(checkpoints)}")
        
        if checkpoints:
            # Mostrar checkpoints mais recentes
            checkpoint_paths = [os.path.join(checkpoint_dir, f) for f in checkpoints]
            checkpoint_paths.sort(key=os.path.getmtime, reverse=True)
            
            print("üì¶ Checkpoints dispon√≠veis:")
            for i, ckpt in enumerate(checkpoint_paths[:3]):  # Mostrar 3 mais recentes
                size_mb = os.path.getsize(ckpt) / 1024 / 1024
                print(f"   {i+1}. {os.path.basename(ckpt)} ({size_mb:.1f} MB)")
    
    # Verificar logs do TensorBoard
    tensorboard_dir = config["logging"]["tensorboard"]["save_dir"]
    if os.path.exists(tensorboard_dir):
        print(f"üìä Logs TensorBoard: {tensorboard_dir}")
    
    # Verificar logs do Monitor LLM se ativo
    if LLM_CONFIG['enabled']:
        llm_logs_dir = "logs/llm_monitor"
        if os.path.exists(llm_logs_dir):
            llm_files = [f for f in os.listdir(llm_logs_dir) if f.endswith('.json')]
            print(f"ü§ñ An√°lises LLM geradas: {len(llm_files)}")
            if llm_files:
                latest_analysis = sorted(llm_files)[-1]
                print(f"   üìÑ √öltima an√°lise: {latest_analysis}")
    
    print(f"\n‚úÖ Modelo treinado com sucesso para ingl√™s!")
    print(f"üéØ Configura√ß√£o: {CONFIG_NAME}")
    print(f"üåç Idioma: Ingl√™s")
    print(f"üìà Dataset: 22.910 amostras")
    print(f"ü§ñ Monitor LLM: {'Usado' if LLM_CONFIG['enabled'] else 'N√£o usado'}")

except Exception as e:
    print(f"\n‚ùå ERRO DURANTE TREINAMENTO:")
    print(f"   {str(e)}")
    print(f"\nüîç Diagn√≥stico:")
    
    # Verificar se arquivos necess√°rios existem
    required_files = [
        config_path,
        config["data"]["metadata_file"],
        config["data"]["data_dir"]
    ]
    
    for file_path in required_files:
        if os.path.exists(file_path):
            print(f"   ‚úÖ {file_path}")
        else:
            print(f"   ‚ùå {file_path} - N√ÉO ENCONTRADO")
    
    # Verificar GPU
    if torch.cuda.is_available():
        print(f"   ‚úÖ CUDA dispon√≠vel: {torch.cuda.get_device_name(0)}")
    else:
        print("   ‚ùå CUDA n√£o dispon√≠vel")
    
    # Verificar configura√ß√£o LLM se ativa
    if LLM_CONFIG['enabled']:
        if os.environ.get('OPENROUTER_API_KEY'):
            print("   ‚úÖ API Key OpenRouter configurada")
        else:
            print("   ‚ùå API Key OpenRouter n√£o encontrada")
    
    raise

In [None]:
# === DOWNLOAD DOS RESULTADOS ===
import zipfile
from datetime import datetime
import glob

# Criar arquivo ZIP com resultados
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
zip_filename = f"valetts_vits2_english_{CONFIG_NAME.split('_')[-1]}_{timestamp}.zip"

print(f"üì¶ Criando arquivo ZIP: {zip_filename}")

with zipfile.ZipFile(zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
    # Adicionar checkpoints
    checkpoint_dir = config["logging"]["checkpoint"]["dirpath"]
    if os.path.exists(checkpoint_dir):
        for root, dirs, files in os.walk(checkpoint_dir):
            for file in files:
                if file.endswith('.ckpt'):
                    file_path = os.path.join(root, file)
                    zipf.write(file_path, os.path.relpath(file_path, '.'))

    # Adicionar configura√ß√£o
    zipf.write(config_path, config_path)

    # Adicionar amostras geradas
    if os.path.exists("samples"):
        for root, dirs, files in os.walk("samples"):
            for file in files:
                if file.endswith('.wav'):
                    file_path = os.path.join(root, file)
                    zipf.write(file_path, os.path.relpath(file_path, '.'))

    # Adicionar logs principais
    if os.path.exists("logs/training.log"):
        zipf.write("logs/training.log", "logs/training.log")

print(f"‚úÖ Arquivo criado: {zip_filename}")
print(f"üíæ Tamanho: {os.path.getsize(zip_filename) / 1024 / 1024:.1f} MB")

# Download no Colab
if 'google.colab' in sys.modules:
    from google.colab import files
    print("‚¨áÔ∏è Iniciando download...")
    files.download(zip_filename)
    print("‚úÖ Download conclu√≠do!")

print("\nüéØ TREINAMENTO FINALIZADO!")
print(f"üìä Modo: {CONFIG_NAME.upper()}")
print(f"‚è±Ô∏è Status: {'Teste conclu√≠do' if DEBUG_MODE else 'Produ√ß√£o conclu√≠da'}")
print(f"üì¶ Resultados salvos em: {zip_filename}")

# Encontrar e mostrar checkpoints dispon√≠veis
checkpoint_dir = config["logging"]["checkpoint"]["dirpath"]
checkpoints = glob.glob(f"{checkpoint_dir}/*.ckpt")
if checkpoints:
    print(f"\nüìÅ Checkpoints dispon√≠veis ({len(checkpoints)}):")
    for ckpt in sorted(checkpoints)[-3:]:  # Mostrar √∫ltimos 3
        size_mb = os.path.getsize(ckpt) / 1024 / 1024
        print(f"   üì¶ {os.path.basename(ckpt)} ({size_mb:.1f} MB)")
else:
    print("\n‚ö†Ô∏è Nenhum checkpoint encontrado")
