In [1]:
import os
from googleapiclient.discovery import build
import pandas as pd
from pytube import YouTube

In [2]:
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = "AIzaSyD_s3lu76rpoAt_Ym6WC87-KFFBTxPj3hY"
youtube = build(api_service_name, api_version, developerKey = DEVELOPER_KEY)

In [3]:
def get_channel_stats(youtube, channel_ids):
    """
    Get channel statistics: title, subscriber count, view count, video count, upload playlist
    Params:
    
    youtube: the build object from googleapiclient.discovery
    channels_ids: list of channel IDs
    
    Returns:
    Dataframe containing the channel statistics for all channels in the provided list: title, subscriber count, view count, video count, upload playlist
    
    """
    all_data = []
    request = youtube.channels().list(
                part='snippet,contentDetails,statistics',
                id=','.join(channel_ids))
    response = request.execute() 
    
    for i in range(len(response['items'])):
        data = dict(channelName = response['items'][i]['snippet']['title'],
                    subscribers = response['items'][i]['statistics']['subscriberCount'],
                    views = response['items'][i]['statistics']['viewCount'],
                    totalVideos = response['items'][i]['statistics']['videoCount'],
                    playlistId = response['items'][i]['contentDetails']['relatedPlaylists']['uploads'])
        all_data.append(data)
    
    return pd.DataFrame(all_data)

In [4]:
def get_video_ids(youtube, playlist_id):
    """
    Get list of video IDs of all videos in the given playlist
    Params:
    
    youtube: the build object from googleapiclient.discovery
    playlist_id: playlist ID of the channel
    
    Returns:
    List of video IDs of all videos in the playlist
    
    """
    
    request = youtube.playlistItems().list(
                part='contentDetails',
                playlistId = playlist_id,
                maxResults = 50)
    response = request.execute()
    
    video_ids = []
    
    for i in range(len(response['items'])):
        video_ids.append(response['items'][i]['contentDetails']['videoId'])
        
    next_page_token = response.get('nextPageToken')
    more_pages = True
    
    while more_pages:
        if next_page_token is None:
            more_pages = False
        else:
            request = youtube.playlistItems().list(
                        part='contentDetails',
                        playlistId = playlist_id,
                        maxResults = 50,
                        pageToken = next_page_token)
            response = request.execute()
    
            for i in range(len(response['items'])):
                video_ids.append(response['items'][i]['contentDetails']['videoId'])
            
            next_page_token = response.get('nextPageToken')
        
    return video_ids

In [5]:
def Download(channel, video_id):
    link = "https://www.youtube.com/watch?v={}channel={}".format(video_id,channel)
    youtubeObject = YouTube(link)
    youtubeObject = youtubeObject.streams.get_highest_resolution()
    try:
        print("Starting download")
        youtubeObject.download('./downloads')
    except:
        print("An error has occurred")
    print("Download is completed successfully")

In [6]:
news_channel_ids = ['UCt4t-jeY85JegMlZ-E5UWtA', #Aaj tak
                   'UCYPvAwZP8pZhSMW8qs7cVCw',  #India Today
                   'UC_gUM8rL-Lrg6O3adPW9K1g',  #Wion
                   'UCRWFSbif-RFENbBrSiez1DA',  #ABP news
                   'UC8dnBi4WUErqYQHZ4PfsLTg',  #Tv9 Kannada
                   ]

music_channel_ids = ['UC0C-w0YjGpqDXGB8IHb662A', #Ed Sheeran
                   'UCqECaJ8Gagnn7YCbPEzWH6g',  #Taylor Swift
                   'UCDPM_n1atn2ijUwHd0NNRQw',  #Coldplay
                   ]

In [7]:
channel_data = get_channel_stats(youtube, music_channel_ids)
channel_data

Unnamed: 0,channelName,subscribers,views,totalVideos,playlistId
0,Coldplay,25600000,17760431438,401,UUDPM_n1atn2ijUwHd0NNRQw
1,Ed Sheeran,53700000,30847079439,445,UU0C-w0YjGpqDXGB8IHb662A
2,Taylor Swift,54400000,31372088899,220,UUqECaJ8Gagnn7YCbPEzWH6g


In [10]:
for channel in music_channel_ids:
    playlist_id = channel_data.loc[channel_data['channelName']== c, 'playlistId'].iloc[0]
    video_ids = get_video_ids(youtube, playlist_id)
    for video_id in video_ids:
        Download(channel, video_id)

NameError: name 'channel_data' is not defined

In [24]:
channel = "EdSheeran"
videoid = "JGwWNGJdvx8&ab_"
Download(channel, videoid)

Download is completed successfully


In [8]:
mankibaat_playlist_id = "PLVOgwA_DiGzotAXVMDT50J2fp9w8ASRC7"
video_id = get_video_ids(youtube, mankibaat_playlist_id)

['MEeBdca7Do0',
 'SAyhnHl_5TU',
 'fq6fAaS6A4c',
 '2C9Hadeoi3E',
 'G52NO-QE6Fk',
 'ksd5jUpGWv4',
 'kovi9FDfUzs',
 'm3qHlzv4ock',
 '7YKwz82KEmI',
 'mZ0H1gyQQyc',
 'QxLum4JMQWQ',
 'ox8IkNhEZ4g',
 'SNlZV4eg7Bs',
 '2BUhYFdEj98',
 'dhdbhiPoxnc',
 'JjxjNyCiErk',
 '_l9qt7zhciI',
 'rAyreycNR_k',
 'l4ZhHhtBmVg',
 'K29X-cPZXh8',
 'iDQN9qL8OD4',
 'RTgrcsms9vA',
 'xNR2Rk6ps24',
 'lgBrbkmYixw',
 'gaw8u_QHJEI',
 '3ZcOo_ThdTw',
 'TC2OrrPYMsw',
 'Rp_FY6y4cmw',
 'Bw3PxD-Z1MY',
 'zt_STVCmDrQ',
 'UWULJ0eWBl4',
 'tmqm_qES_4Y',
 'gWLTdEKzcI0',
 'ROgvnXLjPsI',
 'Pi_FVeGZF-o',
 'OPVtKXoQaEo',
 'XojZYteYWEo',
 'XojZYteYWEo',
 'ZQbNLgOOVXw',
 'ez3Z6zRj1Ts',
 'tvSHcLdlfMU',
 'sib1AnU1az8',
 'WaVZtxEMmeo',
 'yn-gmDrzaD0',
 '82pExD6tHLg',
 'TbmW4KxHqlc',
 'Salz2CWPx_I',
 'BwoPImUk3Kg',
 'nFYg2wljnyM',
 'KUEL_-YcZ9k',
 'MfaM8jtLEkU',
 't3lhdsxok74',
 '0oqKtwXQZ44',
 'DaMUTP609rI',
 'G8eS3jgbzeU',
 '8iRreea6Qh4',
 '24OKbzNoLgw',
 'Qziq5m70z9Y',
 'HZnpRGjyOZ0',
 'xwi5lUuXOOc',
 'mGeWjur7R6o',
 'ECGOVODYy6E',
 'xyl-zw

In [20]:
vid = pd.Series(video_id)
vid.to_csv('mankibaat_video_ids.csv', index=False)

In [15]:
c = "SansadTV"
v = "MEeBdca7Do0"
p = "PLVOgwA_DiGzotAXVMDT50J2fp9w8ASRC7"
link = "https://www.youtube.com/watch?v={}&list={}&ab_channel={}".format(v,p,c)
print(link)

https://www.youtube.com/watch?v=MEeBdca7Do0&list=PLVOgwA_DiGzotAXVMDT50J2fp9w8ASRC7&ab_channel=SansadTV


In [29]:
vi = pd.read_csv("mankibaat_video_ids.csv")
vi

Unnamed: 0,video_ids
0,MEeBdca7Do0
1,SAyhnHl_5TU
2,fq6fAaS6A4c
3,2C9Hadeoi3E
4,G52NO-QE6Fk
...,...
87,qYojh752U2w
88,m25RYModjvo
89,nDXc-kJd9yc
90,0Gal-rIZwDs


In [31]:
for i in vi.index:
#     print(vi.loc[i,"video_ids"])
    link = "https://www.youtube.com/watch?v={}&list={}&ab_channel={}".format(vi.loc[i,"video_ids"],p,c)
    print(link)

https://www.youtube.com/watch?v=MEeBdca7Do0&list=PLVOgwA_DiGzotAXVMDT50J2fp9w8ASRC7&ab_channel=SansadTV
https://www.youtube.com/watch?v=SAyhnHl_5TU&list=PLVOgwA_DiGzotAXVMDT50J2fp9w8ASRC7&ab_channel=SansadTV
https://www.youtube.com/watch?v=fq6fAaS6A4c&list=PLVOgwA_DiGzotAXVMDT50J2fp9w8ASRC7&ab_channel=SansadTV
https://www.youtube.com/watch?v=2C9Hadeoi3E&list=PLVOgwA_DiGzotAXVMDT50J2fp9w8ASRC7&ab_channel=SansadTV
https://www.youtube.com/watch?v=G52NO-QE6Fk&list=PLVOgwA_DiGzotAXVMDT50J2fp9w8ASRC7&ab_channel=SansadTV
https://www.youtube.com/watch?v=ksd5jUpGWv4&list=PLVOgwA_DiGzotAXVMDT50J2fp9w8ASRC7&ab_channel=SansadTV
https://www.youtube.com/watch?v=kovi9FDfUzs&list=PLVOgwA_DiGzotAXVMDT50J2fp9w8ASRC7&ab_channel=SansadTV
https://www.youtube.com/watch?v=m3qHlzv4ock&list=PLVOgwA_DiGzotAXVMDT50J2fp9w8ASRC7&ab_channel=SansadTV
https://www.youtube.com/watch?v=7YKwz82KEmI&list=PLVOgwA_DiGzotAXVMDT50J2fp9w8ASRC7&ab_channel=SansadTV
https://www.youtube.com/watch?v=mZ0H1gyQQyc&list=PLVOgwA_DiGzotA