In [None]:
!pip install openai-whisper --no-cache-dir
!pip install pydub
!pip install psutil
!pip install ffmpeg

import os
import whisper
import gc
from pydub import AudioSegment
import tempfile
from tqdm.notebook import tqdm
import psutil
import time

Collecting openai-whisper
  Downloading openai-whisper-20230918.tar.gz (794 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/794.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.8/794.3 kB[0m [31m5.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m794.3/794.3 kB[0m [31m16.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting triton==2.0.0 (from openai-whisper)
  Downloading triton-2.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (63.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.3/63.3 MB[0m [31m265.2 MB/s[0m eta [36m0:00:00[0m
Collecting tiktoken==0.3.3 (from openai-whisper)
  Downloading tiktoken-0.3.3-cp310-cp310-manylinu

In [None]:
def split_audio(file_name, chunk_length_ms=300000):
    audio = AudioSegment.from_file(file_name, format="mp3")
    return [audio[i:i+chunk_length_ms] for i in range(0, len(audio), chunk_length_ms)]

def split_and_transcribe_audio(model, file_name, output_file, chunk_length_ms=300000):
    chunks = split_audio(file_name, chunk_length_ms)
    with open(output_file, 'w', encoding='utf-8') as f_out:
        for chunk_num, audio_chunk in enumerate(chunks):
            temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
            try:
                audio_chunk.export(temp_file.name, format="wav")
                retry_attempts = 3
                delay = 2
                success = False
                while retry_attempts > 0:
                    try:
                        result = model.transcribe(temp_file.name, language="Portuguese")
                        f_out.write(result["text"])
                        success = True
                        break
                    except MemoryError:
                        print(f"Erro de memória no chunk {chunk_num}. Tentando novamente em {delay} segundos...")
                        retry_attempts -= 1
                        time.sleep(delay)
                        delay *= 2
                    except Exception as e:
                        print(f"Erro no chunk {chunk_num}: {e}")
                        break

                if not success:
                    print(f"Não foi possível transcrever o chunk {chunk_num} após várias tentativas.")

                if chunk_num % 20 == 0 and chunk_num > 0:
                    gc.collect()
                    mem = psutil.virtual_memory()
                    while mem.available < 1 * (1024 ** 3):
                        print(f"Memória baixa ({mem.available / (1024.0 ** 3):.2f} GB). Esperando...")
                        time.sleep(10)
                        mem = psutil.virtual_memory()

            finally:
                temp_file.close()
                os.unlink(temp_file.name)

def main():
    model = whisper.load_model("large")
    root_dir = "./sample_data/sections"

    if not os.path.exists(root_dir):
        os.makedirs(root_dir)

    for subdir in os.listdir(root_dir):
        subdir_path = os.path.join(root_dir, subdir)
        if os.path.isdir(subdir_path):
            for file_name in os.listdir(subdir_path):
                if file_name.endswith(".mp3"):
                    input_file = os.path.join(subdir_path, file_name)
                    base_name = os.path.splitext(file_name)[0]
                    output_file = os.path.join(subdir_path, f"{base_name}.txt")
                    split_and_transcribe_audio(model, input_file, output_file)
                    print(f"Transcrição para o arquivo '{input_file}' salva em '{output_file}'.")

if __name__ == "__main__":
    main()

100%|█████████████████████████████████████| 2.87G/2.87G [00:52<00:00, 58.7MiB/s]


Transcrição para o arquivo './sample_data/video-teste.mp3' salva em './sample_data/video-teste.txt'.


In [None]:
import os
import zipfile

def zip_transcription_files(directory, output_zip_filename):
    with zipfile.ZipFile(output_zip_filename, 'w', zipfile.ZIP_DEFLATED) as zipf:
        for root, _, files in os.walk(directory):
            for file in files:
                if file.endswith(".txt"):
                    file_path = os.path.join(root, file)
                    arcname = os.path.relpath(file_path, directory)
                    zipf.write(file_path, arcname)

def main():
    directory = "./sample_data/sections"  # Diretório onde estão os arquivos de transcrição
    output_zip_filename = "transcriptions.zip"  # Nome do arquivo zip de saída

    zip_transcription_files(directory, output_zip_filename)
    print(f"Arquivos de transcrição foram compactados em '{output_zip_filename}'.")

if __name__ == "__main__":
    main()