# 🎥 YouTube'dan Hugging Face'e Toplu Veri Seti Yükleme

Bu notebook, YouTube videolarını toplu olarak indirip işleyerek Hugging Face'e yükler.

📝 Kullanım:
1. Runtime > Run all seçeneğini seçin
2. Hugging Face token'ınızı girin
3. YouTube URL'lerini listeleyin
4. İşlemin tamamlanmasını bekleyin

In [1]:
# @title 🔧 Kurulum ve Bağımlılıklar

!pip install yt-dlp pydub webvtt-py datasets transformers librosa huggingface_hub python-dotenv

import os
import subprocess
import time
from getpass import getpass
from typing import List

# Klasörleri oluştur
!mkdir -p /content/output/audio
!mkdir -p /content/output/json
!mkdir -p /content/output/spectrogram

# Gerekli scriptleri indir
!wget https://raw.githubusercontent.com/zinderud/sayha/main/youtube_splitter_tr.py
!wget https://raw.githubusercontent.com/zinderud/sayha/main/processed_dataset.py
!wget https://raw.githubusercontent.com/zinderud/sayha/main/upload_to_huggingface.py

Collecting yt-dlp
  Downloading yt_dlp-2025.3.27-py3-none-any.whl.metadata (172 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m172.1/172.1 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting webvtt-py
  Downloading webvtt_py-0.5.1-py3-none-any.whl.metadata (3.4 kB)
Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2024.12.0,>=2023.1.0 (from fsspec[http]<=2024.12.0,>=2023.1.0-

In [3]:
# @title 🛠 İşlev Tanımlamaları

def process_youtube_video(youtube_url: str) -> bool:
    try:
        # YouTube videosunu işle
        !python youtube_splitter_tr.py {youtube_url}

        # Veri setini işle
        !python processed_dataset.py

        # Hugging Face'e yükle
        !python upload_to_huggingface.py

        print(f"✅ {youtube_url} başarıyla işlendi!")
        return True

    except Exception as e:
        print(f"❌ Hata oluştu ({youtube_url}): {e}")
        return False
    finally:
        # Geçici dosyaları temizle
        !rm -rf /content/output/audio/*
        !rm -rf /content/output/json/*
        !rm -rf /content/output/spectrogram/*

def process_video_list(youtube_urls: List[str]):
    total_videos = len(youtube_urls)
    successful_videos = 0
    failed_videos = 0

    print(f"\nToplam {total_videos} video işlenecek...")
    print("-" * 50)

    for index, url in enumerate(youtube_urls, 1):
        print(f"\n📽 Video {index}/{total_videos} işleniyor...")
        print(f"🔗 URL: {url}")

        if process_youtube_video(url):
            successful_videos += 1
        else:
            failed_videos += 1

        # Son video değilse bekle
        if index < total_videos:
            print("\n⏳ Bir sonraki video için 60 saniye bekleniyor...")
            time.sleep(60)

    print("\n" + "=" * 50)
    print("İşlem Özeti:")
    print(f"✅ Başarılı: {successful_videos}")
    print(f"❌ Başarısız: {failed_videos}")
    print(f"📊 Başarı Oranı: {(successful_videos/total_videos)*100:.1f}%")
    print("=" * 50)

In [2]:
# @title 🔑 Hugging Face Token'ı Ayarla

token = getpass('Hugging Face Token: ')
os.environ['HUGGINGFACE_TOKEN'] = token

Hugging Face Token: ··········


In [None]:
# @title 🎬 Video İşleme

# Video URL'lerini buraya ekleyin
youtube_urls = [
"https://www.youtube.com/watch?v=Wz2yWif0BEY",
"https://www.youtube.com/watch?v=Ieor8SGc1-M",
"https://www.youtube.com/watch?v=ldJVIpQcqNI",
"https://www.youtube.com/watch?v=0DMIZlyOc4E",
"https://www.youtube.com/watch?v=t005uAu1azw",
"https://www.youtube.com/watch?v=4-IzrVPVFvs",
"https://www.youtube.com/watch?v=a4eUH6FHuQY",
"https://www.youtube.com/watch?v=KD6rQZuUZsQ",
"https://www.youtube.com/watch?v=vbsxGZDthW0",
"https://www.youtube.com/watch?v=SZEWQDI4jU8",
"https://www.youtube.com/watch?v=ufReh3LGHtM",
"https://www.youtube.com/watch?v=1WllTP6Z4Kg",
"https://www.youtube.com/watch?v=wuXnllwK_U0",
"https://www.youtube.com/watch?v=poaLRUOKPow",
"https://www.youtube.com/watch?v=3O5gS6NRHss",
"https://www.youtube.com/watch?v=FCOnZTez6n4",
"https://www.youtube.com/watch?v=QG_pbHPYAkg",
"https://www.youtube.com/watch?v=V2dMMwzVOxE",
"https://www.youtube.com/watch?v=QJIK6KFOZl4",
"https://www.youtube.com/watch?v=KTIKtDqH9sw",
"https://www.youtube.com/watch?v=qlRDk1rcYa0",
"https://www.youtube.com/watch?v=wDwmBPB4RH4",
"https://www.youtube.com/watch?v=V-4iJY8vN6g",
"https://www.youtube.com/watch?v=soYwCzEBJT8",
"https://www.youtube.com/watch?v=GsAcf_lv8wI",
"https://www.youtube.com/watch?v=T36_0XtBGmM",
"https://www.youtube.com/watch?v=LoFjYaEMSRw",
"https://www.youtube.com/watch?v=dWolJBOc82Q",
"https://www.youtube.com/watch?v=Q4ep0_4X4To",
"https://www.youtube.com/watch?v=Rw3ZhtvEigg",
"https://www.youtube.com/watch?v=qDZ4sbEJgB8",
"https://www.youtube.com/watch?v=USbrWXFHpB4",
"https://www.youtube.com/watch?v=isUdaqpw6fI",
"https://www.youtube.com/watch?v=OyGXe6BNbdM",
"https://www.youtube.com/watch?v=6WUhbidnwCo",
"https://www.youtube.com/watch?v=n2egR0uOj30",
"https://www.youtube.com/watch?v=Rc5DHPOxy1I",
"https://www.youtube.com/watch?v=OLvyXjtnTGA",
"https://www.youtube.com/watch?v=aVDE5SFcaJY",
"https://www.youtube.com/watch?v=hE-NArFIfI4",
"https://www.youtube.com/watch?v=Z36c3fDqvJY",
"https://www.youtube.com/watch?v=mwtjZQeymV0",
"https://www.youtube.com/watch?v=ONwI0hg2Xyc",
"https://www.youtube.com/watch?v=Dq8d-XP4y4w",
"https://www.youtube.com/watch?v=8t5bDsBZkKE",
"https://www.youtube.com/watch?v=o08ynmlr4yM",
"https://www.youtube.com/watch?v=E1giwLZEC4I",
"https://www.youtube.com/watch?v=gXzFFu2bWnc",
"https://www.youtube.com/watch?v=cuz76eC1lOI",
"https://www.youtube.com/watch?v=zzAZWLI_dHI",
"https://www.youtube.com/watch?v=SRuj-4UWnc0",
"https://www.youtube.com/watch?v=Mipyvpu-o6g",
"https://www.youtube.com/watch?v=2-nE5-in8vA",
"https://www.youtube.com/watch?v=09NUt7nqFnI",
"https://www.youtube.com/watch?v=HYyPD94BOds",
"https://www.youtube.com/watch?v=Ir3Odoaky58",
"https://www.youtube.com/watch?v=aJYrfFLzdjM",
"https://www.youtube.com/watch?v=O6GD6jGqCJg",
"https://www.youtube.com/watch?v=xpZjMhBq17w",
"https://www.youtube.com/watch?v=pnMIjcY_FT0",
"https://www.youtube.com/watch?v=qkY0p3arp84",
"https://www.youtube.com/watch?v=SMdTNSts-d0",
"https://www.youtube.com/watch?v=rt7GdvtV9U8",
"https://www.youtube.com/watch?v=ZS38I3L6O_k",
"https://www.youtube.com/watch?v=Ukfuyy8l-GE",
"https://www.youtube.com/watch?v=IjsdURbAMOM",
"https://www.youtube.com/watch?v=ceCjXq-ZoRA",
"https://www.youtube.com/watch?v=bHbj2tlJMl4",
"https://www.youtube.com/watch?v=nOEK5A967dk",
"https://www.youtube.com/watch?v=4JzfoARxC74",
"https://www.youtube.com/watch?v=TNjMSoQ5rk4",
"https://www.youtube.com/watch?v=THuewrRsuRg",
"https://www.youtube.com/watch?v=mjbm5I6pmBE",
"https://www.youtube.com/watch?v=jmG_0Ur0IDk",
"https://www.youtube.com/watch?v=XWwLKxsQXFw",
"https://www.youtube.com/watch?v=BQFxGP-yxqA",
"https://www.youtube.com/watch?v=xt3ZOk6GKwM",
]

# URL'leri işle
if youtube_urls:
    process_video_list(youtube_urls)
else:
    print("❌ En az bir YouTube URL'si eklemelisiniz!")

[1;30;43mGörüntülenen çıkış son 5000 satıra kısaltıldı.[0m
Silindi: output/spectrogram/0000880_091_Ama_bunu_bitkisel_sıvı_yağlar,_rafine_yağlardan_bir.mp3_spectrogram.png
Silindi: output/spectrogram/0000677_309_Türk_kahvesinin_kalpte_bir_yan_etkisi_var_mıdır.mp3_spectrogram.png
Silindi: output/spectrogram/0000817_306_ondan_sonra_yediğine,_içtiğine_dikkat_etmek___.mp3_spectrogram.png
Silindi: output/spectrogram/0000075_806_2_gün,_3_gün_öncesine_kadar_genellikle_bir_şikayet_yok_.mp3_spectrogram.png
Silindi: output/spectrogram/0000596_844_40_yıldır_.mp3_spectrogram.png
Silindi: output/spectrogram/0000932_642_Yani_sigara_isteyerek_bırakmak_lazım_.mp3_spectrogram.png
Silindi: output/spectrogram/0000145_054_Kardiyolog_tanı_koyar,_tedavi_yapar_.mp3_spectrogram.png
Silindi: output/spectrogram/0000843_758_Hızlı_yemek,_eşittir_kilo,_eşittir_diyabet_demektir_.mp3_spectrogram.png
Silindi: output/spectrogram/0000816_147_Keçi_sütü_olabilir_.mp3_spectrogram.png
Silindi: output/spectrogram/0000384_0