In [1]:
# Clear any existing installations
# !pip uninstall -y openai-whisper whisper

# Install ffmpeg
!apt update && apt install ffmpeg

# Install whisper
!pip install -q openai-whisper

Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:2 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,626 B]
Get:3 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,107 kB]
Get:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ Packages [59.5 kB]
Get:5 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:6 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:7 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:8 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:9 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease [18.1 kB]
Get:10 http://security.ubuntu.com/ubuntu jammy-security/universe amd64 Packages [1,164 kB]
Get:11 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Get:12 https://r2u.stat.illinois.edu/ubuntu jammy/main all Packages [8,457 kB]
Get:13 https://ppa.launch

In [2]:
import whisper
import torch
import os
from google.colab import files
import time
from datetime import datetime

In [3]:
print("GPU Bilgisi:")
!nvidia-smi

GPU Bilgisi:
Mon Nov 11 19:13:01 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   50C    P8              12W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                                                       

In [None]:
# Mount Google Drive to access files
# drive.mount('/content/drive') 

def process_audio_from_drive(audio_path, model_size="large", output_folder=None):
    """
    Process audio file from Google Drive using Whisper model
    
    Args:
        audio_path (str): Full path to the audio file in Drive
        model_size (str): Whisper model size ("tiny", "base", "small", "medium", "large")
        output_folder (str): Drive folder for output file (default: same folder as audio)
    
    Returns:
        str: Transcribed text if successful, None otherwise
    """
    try:
        # Display GPU information for monitoring resources
        print("\nGPU Information:")
        !nvidia-smi
        
        # Verify file exists before processing
        if not os.path.exists(audio_path):
            print(f"ERROR: {audio_path} not found!")
            return None
            
        print(f"\nProcessing {os.path.basename(audio_path)} with {model_size} model...")
        
        # Load the specified Whisper model
        print(f"\nLoading {model_size} model...")
        model = whisper.load_model(model_size)
        
        # Track processing time for performance monitoring
        start_time = time.time()
        
        # Perform transcription with specified parameters
        print("\nConverting speech to text...")
        result = model.transcribe(
            audio_path,
            language="en",  # Change language as needed (e.g., "tr" for Turkish)
            fp16=True      # Use float16 for faster GPU processing
        )
        
        # Calculate and display processing duration
        process_time = time.time() - start_time
        minutes = int(process_time // 60)
        seconds = int(process_time % 60)
        print(f"\nProcessing completed in {minutes} minutes {seconds} seconds")
        
        # Determine output directory
        if output_folder is None:
            output_folder = os.path.dirname(audio_path)
        
        # Generate output filename with metadata
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        input_filename = os.path.splitext(os.path.basename(audio_path))[0]
        output_filename = f"{input_filename}_{model_size}_{process_time:.0f}s.txt"
        output_path = os.path.join(output_folder, output_filename)
        
        # Save transcription to file
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(result["text"])
        
        print(f"\nResult saved to Drive: {output_path}")
        
        return result["text"]
        
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None
    finally:
        # Clean up GPU memory
        torch.cuda.empty_cache()

# Example usage
if __name__ == "__main__":
    # Specify path to your audio file in Drive
    AUDIO_FILE = "/content/drive/MyDrive/Lectures/Lecture-2.m4a"  # Change this path
    
    # Optional: Specify output folder (defaults to audio file location if not specified)
    OUTPUT_FOLDER = "/content/drive/MyDrive/Lectures"  # Change this path
    
    # Create output directory if it doesn't exist
    os.makedirs(OUTPUT_FOLDER, exist_ok=True)
    
    print("Starting transcription...")
    transcription = process_audio_from_drive(
        audio_path=AUDIO_FILE,
        model_size="large",
        output_folder=OUTPUT_FOLDER
    )
    
    if transcription:
        print("\nTranscription completed successfully!")
        print("\nFirst 500 characters of the text:")
        print(transcription[:500] + "...")

Transkripsiyon başlıyor...

GPU Bilgisi:
Mon Nov 11 19:13:21 2024       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.104.05             Driver Version: 535.104.05   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla T4                       Off | 00000000:00:04.0 Off |                    0 |
| N/A   50C    P8              12W /  70W |      0MiB / 15360MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
                           

100%|██████████████████████████████████████| 2.88G/2.88G [00:30<00:00, 101MiB/s]
  checkpoint = torch.load(fp, map_location=device)



Ses metne dönüştürülüyor...
