<a href="https://colab.research.google.com/github/yshalsager/OpenAI_Whisper_ytdlp/blob/master/OpenAI_Whisper_ar_ytdlp.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# تفريغ الصوتيات باستخدم OpenAI Whisper

هذا الملف مبني على [هذا الملف](https://colab.research.google.com/gist/Kazuki-tam/04e85708e4fd1c4b8af180d317977f4d/whisper-mock-en.ipynb)


## 📖 كيفية الاستخدام
1. شغل خطوة "اﻹعداد".
2. اختر نمط التشغيل: يوتيوب أو ملف محلي.
  - إذا اخترت local لتفريغ ملف تقوم برفعه بنفسك،  شغل خطوة اﻹعداد أولا ثم ارفع الملف إلى مجلد `download`.
  - إذا اخترت يوتيوب ضع رابط المقطع أو قائمة التشغيل في `youtube_urls`
  - يمكنك تحديد بداية ونهاية قائمة التشغيل من الخيارين التاليين.
3. اختر لغة الملف.
4. اختر النموذج المستخدم في التفريغ، `large` يعطي نتائج أفضل لكن أبطأ.
5. شغل خطوة `Transcription`.

In [None]:
#@title اﻹعداد
# Install packages
!pip install git+https://github.com/openai/whisper.git
!pip install yt-dlp
!wget https://raw.githubusercontent.com/jianfch/stable-ts/dcf3c55e6655ddee5694ad442c93d8f49fe476de/stable_whisper.py -O stable_whisper.py

from pathlib import Path

# Add folders
download_folder = Path("download")
if not download_folder.exists():
  download_folder.mkdir()
output_folder = Path("output")
if not output_folder.exists():
  output_folder.mkdir()

In [None]:
#@title Transcription
import whisper
import yt_dlp

import mimetypes
from pathlib import Path

from google.colab import files
from stable_whisper import modify_model, results_to_sentence_srt


process_type = "youtube"#@param ["youtube", "local"]
youtube_urls = "https://www.youtube.com/watch?v=sqOn0aG7bwQ"#@param {type:"string"}
playlist_start = 1 #@param {type:"integer"}
playlist_end = 9999 #@param {type:"integer"}

mimetypes.init()
ydl_opts = {
    'format': 'm4a/bestaudio/best',
    'postprocessors': [{  # Extract audio using ffmpeg
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'm4a',
    }],
    "playliststart": int(playlist_start),
    "playlistend": int(playlist_end),
    "outtmpl": f"{str(download_folder)}/%(playlist_index)04d-%(title)s-%(id)s.%(ext)s"
}

if process_type == "youtube":
  for youtube_url in youtube_urls.split():
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download(youtube_url)

language = "ar"#@param ["en", "ar", "Afrikaans", "Albanian", "Amharic", "Arabic", "Armenian", "Assamese", "Azerbaijani", "Bashkir", "Basque", "Belarusian", "Bengali", "Bosnian", "Breton", "Bulgarian", "Burmese", "Castilian", "Catalan", "Chinese", "Croatian", "Czech", "Danish", "Dutch", "English", "Estonian", "Faroese", "Finnish", "Flemish", "French", "Galician", "Georgian", "German", "Greek", "Gujarati", "Haitian", "Haitian Creole", "Hausa", "Hawaiian", "Hebrew", "Hindi", "Hungarian", "Icelandic", "Indonesian", "Italian", "Japanese", "Javanese", "Kannada", "Kazakh", "Khmer", "Korean", "Lao", "Latin", "Latvian", "Letzeburgesch", "Lingala", "Lithuanian", "Luxembourgish", "Macedonian", "Malagasy", "Malay", "Malayalam", "Maltese", "Maori", "Marathi", "Moldavian", "Moldovan", "Mongolian", "Myanmar", "Nepali", "Norwegian", "Nynorsk", "Occitan", "Panjabi", "Pashto", "Persian", "Polish", "Portuguese", "Punjabi", "Pushto", "Romanian", "Russian", "Sanskrit", "Serbian", "Shona", "Sindhi", "Sinhala", "Sinhalese", "Slovak", "Slovenian", "Somali", "Spanish", "Sundanese", "Swahili", "Swedish", "Tagalog", "Tajik", "Tamil", "Tatar", "Telugu", "Thai", "Tibetan", "Turkish", "Turkmen", "Ukrainian", "Urdu", "Uzbek", "Valencian", "Vietnamese", "Welsh", "Yiddish", "Yoruba", "af", "am", "as", "az", "ba", "be", "bg", "bn", "bo", "br", "bs", "ca", "cs", "cy", "da", "de", "el", "es", "et", "eu", "fa", "fi", "fo", "fr", "gl", "gu", "ha", "haw", "hi", "hr", "ht", "hu", "hy", "id", "is", "it", "iw", "ja", "jw", "ka", "kk", "km", "kn", "ko", "la", "lb", "ln", "lo", "lt", "lv", "mg", "mi", "mk", "ml", "mn", "mr", "ms", "mt", "my", "ne", "nl", "nn", "no", "oc", "pa", "pl", "ps", "pt", "ro", "ru", "sa", "sd", "si", "sk", "sl", "sn", "so", "sq", "sr", "su", "sv", "sw", "ta", "te", "tg", "th", "tk", "tl", "tr", "tt", "uk", "ur", "uz", "vi", "yi", "yo", "zh"]
model = "large"#@param ["large", "medium", "base", "small", "tiny"]
model = whisper.load_model(model)
# jianfch/stable-ts
modify_model(model)

for audio_file in sorted(download_folder.iterdir()):
    mime = mimetypes.guess_type(audio_file)[0]
    if mime is None:
        continue
    mime_type = mime.split('/')[0]
    if mime_type not in ('audio', 'video'):
        continue

    print(f"Transcription of {audio_file} will start!")
    text_file = Path(f"{output_folder}/{audio_file.stem}.txt")
    subtitle_file = Path(f"{output_folder}/{audio_file.stem}.srt")

    result = model.transcribe(str(audio_file), language=language)
    # save TXT
    with open(str(text_file), "w", encoding="utf-8") as txt:
      for segment in result["segments"]:
          txt.write(segment['text'].strip() + '\n')
    files.download(str(text_file))
    # save SRT
    #with open(f"{download_folder}/{audio_file.name}.srt", "w", encoding="utf-8") as srt:
    #    whisper.write_srt(result["segments"], file=srt)
    results_to_sentence_srt(result, str(subtitle_file))
    files.download(str(subtitle_file))
    ## Write into a text file
    #Path(f"{download_folder}/{file_name.name}.txt").write_text(result["text"])
    audio_file.unlink()
    print("Done!")