# PROSES PENGAMBILAN VIDEO

Dalam proses pengambilan data, kami menggunakan YouTube API dengan memasukkan beberapa keyword yang relevan dengan topik analisis. Setelah itu, sistem menampilkan sejumlah video yang sesuai dengan keyword tersebut. Dari hasil tersebut, kami tidak mengambil semua video, melainkan hanya memilih beberapa video secara selektif. Pemilihan ini didasarkan pada kriteria tertentu, seperti jumlah like, view, isi komentar yang bervariasi, dan relevansi komentar terhadap isu yang sedang diteliti. Tujuan dari penyaringan ini adalah untuk memperoleh data komentar yang lebih kaya secara konteks dan sesuai dengan fokus analisis yang telah ditetapkan.

In [1]:
!pip install google-api-python-client
!pip install unidecode



## TAHAP 1

In [2]:
from googleapiclient.discovery import build
import pandas as pd

# === KONFIGURASI ===
api_key = 'AIzaSyDG0E9CNV_5yK1E_FUFdcAuBh0t6JMV6jI'
keywords = ['kelemahan dan kelebihan kendaraan listrik', 'Kendaraan listrik ramah lingkungan', 'subsidi pemerintah untuk kendaraan listrik: layak atau tidak?']
max_results = 7 # maksimal 7

youtube = build('youtube', 'v3', developerKey=api_key)

video_info_list = []
top_comments = []
replies = []

# === FUNGSI ===
def search_videos_by_keyword(keywords, max_results=5):
    video_ids = []
    for keyword in keywords:
        search_response = youtube.search().list(
            q=keyword,
            part='id',
            type='video',
            maxResults=max_results
        ).execute()
        video_ids_temp = [
            item['id']['videoId']
            for item in search_response.get('items', [])
            if 'videoId' in item.get('id', {})
        ]
        video_ids = video_ids + video_ids_temp
        print(f"Ditemukan {len(video_ids_temp)} video untuk keyword '{keyword}'")
    return set(video_ids)


def get_video_info(video_id):
    response = youtube.videos().list(
        part='snippet,statistics',
        id=video_id
    ).execute()

    if not response['items']:
        return {}

    item = response['items'][0]
    snippet = item['snippet']
    stats = item['statistics']

    return {
        'video_id': video_id,
        'title': snippet.get('title'),
        'description': snippet.get('description'),
        'uploader': snippet.get('channelTitle'),
        'upload_date': snippet.get('publishedAt'),
        'view_count': stats.get('viewCount'),
        'like_count': stats.get('likeCount'),
        'comment_count': stats.get('commentCount')
    }

def get_all_replies(parent_id):
    all_replies = []
    next_page_token = None
    while True:
        response = youtube.comments().list(
            part="snippet",
            parentId=parent_id,
            maxResults=100,
            pageToken=next_page_token,
            textFormat="plainText"
        ).execute()

        for r in response.get("items", []):
            s = r['snippet']
            all_replies.append({
                'reply_id': r['id'],
                'parent_id': parent_id,
                'author': s.get('authorDisplayName'),
                'text': s.get('textOriginal'),
                'likes': s.get('likeCount'),
                'published': s.get('publishedAt'),
                'updated': s.get('updatedAt')
            })

        next_page_token = response.get("nextPageToken")
        if not next_page_token:
            break
    return all_replies

def get_all_comments(video_id, cek_replies = True):
    next_page_token = None
    while True:
        response = youtube.commentThreads().list(
            part='snippet',
            videoId=video_id,
            maxResults=100,
            pageToken=next_page_token,
            textFormat='plainText'
        ).execute()

        for item in response.get("items", []):
            try:
                top = item['snippet']['topLevelComment']
                s = top['snippet']
                comment_id = top['id']

                top_comment_data = {
                    'comment_id': comment_id,
                    'video_id': video_id,
                    'author': s.get('authorDisplayName'),
                    'text': s.get('textOriginal'),
                    'likes': s.get('likeCount'),
                    'published': s.get('publishedAt'),
                    'updated': s.get('updatedAt')
                }
                top_comments.append(top_comment_data)
                if cek_replies:
                    all_r = get_all_replies(comment_id)
                    replies.extend(all_r)

            except KeyError:
                continue

        next_page_token = response.get("nextPageToken")
        if not next_page_token:
            break

# === PROSES UTAMA ===
video_ids = search_videos_by_keyword(keywords, max_results=max_results)
print(f"Ditemukan {len(video_ids)} video untuk keyword '{keywords}'")
print(video_ids)
for vid in video_ids:
    try:
        print(f"Mengambil data video: {vid}")
        info = get_video_info(vid)
        if info:
            video_info_list.append(info)
            get_all_comments(vid, cek_replies=False) # cek_replies=True, Jika ingin mengambil data balasan komen
    except Exception as e:
        print(f"Error saat memproses video {vid}: {e}")

# === SIMPAN KE EXCEL ===
df_video = pd.DataFrame(video_info_list)
df_top = pd.DataFrame(top_comments)
df_replies = pd.DataFrame(replies)

output_file = "Data Awal kendaraan listrik.xlsx"
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
    df_video.to_excel(writer, sheet_name='VideoInfo', index=False)
    df_top.to_excel(writer, sheet_name='TopComments', index=False)
    # df_replies.to_excel(writer, sheet_name='Replies', index=False) # aktifkan Jika ingin menyimpan data balasan komen

print(f"✅ Data disimpan ke file Excel: {output_file}")


Ditemukan 7 video untuk keyword 'kelemahan dan kelebihan kendaraan listrik'
Ditemukan 7 video untuk keyword 'Kendaraan listrik ramah lingkungan'
Ditemukan 7 video untuk keyword 'subsidi pemerintah untuk kendaraan listrik: layak atau tidak?'
Ditemukan 21 video untuk keyword '['kelemahan dan kelebihan kendaraan listrik', 'Kendaraan listrik ramah lingkungan', 'subsidi pemerintah untuk kendaraan listrik: layak atau tidak?']'
{'R55pRFBDyAI', 'nTDLDK6UlzI', '8FVOLVfGgZ0', '3NcRyVgFlnU', '3vd5WAFW9i8', '6MmqTocDXPg', 'Obajud-tLwg', 'sVwttNtOgv0', 'l2MwqYjmVSY', 'vqCLNSNDu6c', 'xcDZaMW8Se8', 'tw7c7Iv2DSo', '50KZ16zj5LE', 'kv_HRNPpq2c', 'E2QS4p3zFvQ', '44iENKIM14k', 'MbhVp8btNgE', 'v6tgbL_Acs8', 'Xe2Dp3WODMk', '6RCfwZqqQeY', 'noltRzcuXSI'}
Mengambil data video: R55pRFBDyAI
Mengambil data video: nTDLDK6UlzI
Mengambil data video: 8FVOLVfGgZ0
Mengambil data video: 3NcRyVgFlnU
Mengambil data video: 3vd5WAFW9i8
Mengambil data video: 6MmqTocDXPg
Mengambil data video: Obajud-tLwg
Mengambil data video:

In [3]:
import pandas as pd

# Baca file Excel
file_path = output_file

# Baca masing-masing sheet ke DataFrame
df_video = pd.read_excel(file_path, sheet_name='VideoInfo')
df_top = pd.read_excel(file_path, sheet_name='TopComments')
# df_replies = pd.read_excel(file_path, sheet_name='Replies') # aktifkan Jika sebelumnya menyimpan data balasan komen

## TAHAP 2



In [4]:
import pandas as pd
from googleapiclient.discovery import build
!pip install openpyxl



# Inisialisasi YouTube API
youtube = build('youtube', 'v3', developerKey='AIzaSyDG0E9CNV_5yK1E_FUFdcAuBh0t6JMV6jI')

def get_video_info(video_ids):
    response = youtube.videos().list(
        part='snippet,statistics',
        id=','.join(video_ids),
        maxResults=50
    ).execute()

    results = []
    for item in response.get('items', []):
        snippet = item['snippet']
        stats = item['statistics']
        results.append({
            'video_id': item['id'],
            'title': snippet.get('title'),
            'description': snippet.get('description'),
            'channel': snippet.get('channelTitle'),
            'published_at': snippet.get('publishedAt'),
            'views': stats.get('viewCount', 0),
            'likes': stats.get('likeCount', 0),
            'comments': stats.get('commentCount', 0),
            'channel_id': snippet.get('channelId')
        })
    return results

def get_all_comments(video_ids, include_replies=True):
    comments = []
    replies = []

    for video_id in video_ids:
        try:
            next_page_token = None
            while True:
                response = youtube.commentThreads().list(
                    part='snippet',
                    videoId=video_id,
                    maxResults=100,
                    pageToken=next_page_token,
                    textFormat='plainText'
                ).execute()

                for item in response['items']:
                    top_comment = item['snippet']['topLevelComment']['snippet']
                    comments.append({
                        'comment_id': item['id'],
                        'video_id': video_id,
                        'author': top_comment.get('authorDisplayName'),
                        'likes': top_comment.get('likeCount', 0),
                        'published': top_comment.get('publishedAt'),
                        'updated': top_comment.get('updatedAt', top_comment.get('publishedAt')),  # fallback
                        'text': top_comment.get('textOriginal'),
                        'teks_new': preprocess_text(top_comment.get('textOriginal')),
                        'teks_new_lower': preprocess_text(top_comment.get('textOriginal')).lower()
                    })

                    if include_replies and item['snippet']['totalReplyCount'] > 0:
                        reply_data = get_replies(item['id'], video_id)
                        replies.extend(reply_data)

                next_page_token = response.get('nextPageToken')
                if not next_page_token:
                    break

        except Exception as e:
            print(f"Error processing video {video_id}: {str(e)}")
            continue

    return comments + replies

def get_replies(parent_id, video_id):
    replies = []
    next_page_token = None

    while True:
        try:
            response = youtube.comments().list(
                part='snippet',
                parentId=parent_id,
                maxResults=100,
                pageToken=next_page_token,
                textFormat='plainText'
            ).execute()

            for item in response['items']:
                reply = item['snippet']
                replies.append({
                    'comment_id': item['id'],
                    'video_id': video_id,
                    'author': reply.get('authorDisplayName'),
                    'likes': reply.get('likeCount', 0),
                    'published': reply.get('publishedAt'),
                    'updated': reply.get('updatedAt', reply.get('publishedAt')),
                    'text': reply.get('textOriginal'),
                    'teks_new': preprocess_text(reply.get('textOriginal')),
                    'teks_new_lower': preprocess_text(reply.get('textOriginal')).lower()
                })

            next_page_token = response.get('nextPageToken')
            if not next_page_token:
                break

        except Exception as e:
            print(f"Error getting replies for {parent_id}: {str(e)}")
            break

    return replies

def preprocess_text(text):
    if text is None:
        return ''
    return text.strip()

# Video ID
video_ids = ['pxmq9yyOjo0','UYXcAnb1mV8', '0fzwRDVdkC4', 'zOqALVotnRs', 'kv_HRNPpq2c', '8FVOLVfGgZ0', '50KZ16zj5LE']

# Ambil data
print("Mengambil data video dan komentar...")
video_data = get_video_info(video_ids)
all_comments = get_all_comments(video_ids)

# Simpan ke XLSX
print("Menyimpan ke file Excel (.xlsx)...")
pd.DataFrame(video_data).to_excel('Data_youtube_videos.xlsx', index=False)
pd.DataFrame(all_comments).to_excel('Data_youtube_comments.xlsx', index=False)

print("Selesai! File disimpan sebagai:")
print("- youtube_videos.xlsx (info video)")
print("- youtube_comments.xlsx (komentar dan balasan lengkap)")

Mengambil data video dan komentar...
Menyimpan ke file Excel (.xlsx)...
Selesai! File disimpan sebagai:
- youtube_videos.xlsx (info video)
- youtube_comments.xlsx (komentar dan balasan lengkap)
