# 載入所需套件

In [1]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
from concurrent import futures
from tqdm import tqdm

# 定義爬取個別YouTube發燒影片頻道訂閱數函數

In [2]:
def channel_crawler(link_list):
    link=link_list[0]
    i=link_list[1]
    r=requests.get(link)
    s=BeautifulSoup(r.text,'html.parser')
    #解決從發燒影片頻道可能無訂閱數問題
    if s.select('span.yt-subscription-button-subscriber-count-branded-horizontal')==[]:
        subscription=np.nan
    else:
        subscription=s.select('span.yt-subscription-button-subscriber-count-branded-horizontal')[0].text
   
    return i,subscription

# 定義爬取YouTube發燒影片資訊函數

In [3]:
def youtube_trending_crawler():
    url='https://www.youtube.com/feed/trending'
    response=requests.get(url)
    soup=BeautifulSoup(response.text,'html.parser')

    items=soup.select('div.yt-lockup-content')
    videos=[e.select('a')[0].text for e in items]
    vlinks=['https://www.youtube.com{}'.format(e.select('a')[0]['href']) for e in items]
    channels=[e.select('a')[1].text for e in items]
    clinks=['https://www.youtube.com{}'.format(e.select('a')[1]['href']) for e in items]
    days=[e.select('li')[0].text for e in items]
    times=[int(e.select('li')[1].text[5:-1].replace(',','')) for e in items]
    #解決有可能發燒影片沒影片描述問題
    descriptions=[]
    for e in items:
        if e.select('div.yt-lockup-description')==[]:
            descriptions.append(np.nan)
        else:
            descriptions.append(e.select('div.yt-lockup-description')[0].text)
    
    ranks=list(range(1,len(videos)+1))
    subscriptions=[0]*len(clinks)
    
    link_list=[]
    for i in range(len(clinks)):
        link_list.append([clinks[i],i])
    
    #進入每個連結爬取(執行thread層級的非同步任務)
    with futures.ThreadPoolExecutor(max_workers=8) as executor:
        results=list(tqdm(executor.map(channel_crawler,link_list),total=len(link_list)))

        for future in results:
            i,subscription=future
            subscriptions[i]=subscription
            
    df=pd.DataFrame({
        '排名':ranks,
        '影片':videos,
        '觀看次數':times,
        '時間':days,
        '影片連結':vlinks,
        '描述':descriptions,
        '頻道':channels,
        '訂閱數':subscriptions,
        '頻道連結':clinks
    })
    
    return df

# 抓取YouTube發燒影片資訊

In [4]:
df=youtube_trending_crawler()
df.head()

100%|██████████████████████████████████████████████████████████████████████████████████| 81/81 [00:48<00:00,  1.67it/s]


Unnamed: 0,排名,影片,觀看次數,時間,影片連結,描述,頻道,訂閱數,頻道連結
0,1,"他1人傳染10多人武漢出現""超級傳播者"" 口罩禁出口范范臉書罵""泯滅人性"" 遭網友譙翻後道歉...",299941,1 天前,https://www.youtube.com/watch?v=W2jxntejoh8,➲ iNEWS 最正新聞直播 https://ppt.cc/fjzJax➲ 鄭知道了新頻道 ...,三立iNEWS,59.7萬,https://www.youtube.com/user/setmoney159
1,2,#最新消息 ＮＢＡ傳奇球星黑曼巴Kobe Bryant搭乘直升機前往籃球學校發生意外 享年...,466962,2 天前,https://www.youtube.com/watch?v=pb_OPfXCiFA,,從台灣看見世界的故事,29.1萬,https://www.youtube.com/channel/UCaHvWPMrGgcd4...
2,3,"摔爛酷炫公仔-1:1鋼鐵人半身雕像,酷炫氣得跳腳,好笑嗎?│WACKYBOYS│反骨│酷炫發...",558977,1 天前,https://www.youtube.com/watch?v=k3LM9jFgo7g,話劇社生氣來了!!另一支影片有公仔修復喔看起來培根唯一一款Tshirt⭐我被開除了預購起來►...,WACKYBOYS 反骨男孩,137萬,https://www.youtube.com/user/KEVIN0204660
3,4,“冠状病毒”是什么？武汉新型肺炎病毒是如何使人生病的？,1132255,1 天前,https://www.youtube.com/watch?v=E46_veB0DPU,【加入会员链接】https://www.youtube.com/channel/UCSs4A...,李永乐老师,95.6萬,https://www.youtube.com/channel/UCSs4A6HYKmHA2...
4,5,BTS: Black Swan,2935997,8 小時前,https://www.youtube.com/watch?v=wSNd02kVv8o,Late Late Show music guest BTS lights up Stage...,The Late Late Show with James Corden,2200萬,https://www.youtube.com/user/TheLateLateShow
