In [None]:
"""This library queries the YouTube API for the list of videos on any given channel and returns the video ids, details,
and stats as a pandas dataframe that can be exported to a .csv"""

In [1]:
import requests
import json
import pandas as pd

with open(key_file_loc) as f: keys = dict(x.rstrip().split(None,1) for x in f)
    
key = keys['ytapi']

channel_id = "UC3XTzVzaHQEd30rQbuvCtTQ"

In [2]:
url = "https://www.googleapis.com/youtube/v3/channels?part=contentDetails&id={}&key={}".format(channel_id,key)

In [3]:
response = requests.get(url)

In [4]:
file_ = json.loads(response.text)

In [5]:
playlist = file_['items'][0]['contentDetails']['relatedPlaylists']['uploads']

In [6]:
if playlist:
    next_page_token = None
    ids = []
    run = True
    while run:
        query = "https://www.googleapis.com/youtube/v3/playlistItems?part=contentDetails&maxResults=50&playlistId={}&key={}".format(playlist, key)
        
        if next_page_token:
            query += "&pageToken={}".format(next_page_token)
        
        response = json.loads((requests.get(query)).text)
        
        for item in response['items']:
            ids.append(item['contentDetails']['videoId'])
            
        try: 
            next_page_token = response['nextPageToken']
        except:
            run = False
        
        

In [7]:
df = pd.DataFrame()

for id_ in ids:

    query = "https://www.googleapis.com/youtube/v3/videos?part=statistics,snippet&id={}&key={}".format(id_,key)
    response = json.loads((requests.get(query).text))
    
    dict_ = {
        'channelId' : response['items'][0]['snippet']['channelId'],
        'channelTitle' : response['items'][0]['snippet']['channelTitle'],
        'description' : response['items'][0]['snippet']['description'],
        'title' : response['items'][0]['snippet']['title'],
        'publishedAt' : response['items'][0]['snippet']['publishedAt'],
        'commentCount' : response['items'][0]['statistics']['commentCount'],
        'dislikeCount' : response['items'][0]['statistics']['dislikeCount'],
        'likeCount' : response['items'][0]['statistics']['likeCount'],
        'viewCount' : response['items'][0]['statistics']['viewCount']       
        
    }
    
    df = df.append(dict_, ignore_index=True)

    

    

In [8]:
comment_frame = pd.DataFrame()
ids = [ids[0]]
for id_ in ids:
    
    next_page_token = None
    run = True
    comments = []
    
    while run:
        query = "https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&maxResults=100&videoId={}&key={}".format(id_,key)
        if next_page_token:
            query += "&pageToken={}".format(next_page_token)
        
        response = json.loads((requests.get(query)).text)
        
        for item in response['items']:
            comments.append(item)
            
        try: 
            next_page_token = response['next_page_token']
        except:
            run = False
    
    for item in comments:
        dict_ = {
            'id' : item['id'],
            'authorChannelUrl' : item['snippet']['topLevelComment']['snippet']['authorChannelUrl'],
            'authorDisplayName' : item['snippet']['topLevelComment']['snippet']['authorDisplayName'],
            'likeCount' : item['snippet']['topLevelComment']['snippet']['likeCount'],
            'publishedAt' : item['snippet']['topLevelComment']['snippet']['publishedAt'],
            'textDisplay' : item['snippet']['topLevelComment']['snippet']['textDisplay'],
            'videoId' : item['snippet']['topLevelComment']['snippet']['videoId'],
            'viewerRating' : item['snippet']['topLevelComment']['snippet']['viewerRating'],
            'totalReplyCount' : item['snippet']['totalReplyCount'],
            'isReply' : 0,
            'isReplyTo' : ""
        }
        comment_frame = comment_frame.append(dict_, ignore_index=True)
        
        if dict_['totalReplyCount'] > 0:
            replies = []
            next_page_token = None
            run = True
        
            while run:
                query = "https://www.googleapis.com/youtube/v3/comments?part=snippet&textFormat=plainText&maxResults=100&parentId={}&key={}".format(dict_['id'],key)
                
                if next_page_token:
                    query += "&pageToken={}".format(next_page_token)
                
                response = json.loads((requests.get(query)).text)
                
                for item in response['items']:
                    replies.append(item)
                
                try: 
                    next_page_token = response['nextPageToken']
                except:
                    run = False
            
            for item in replies:
                
                rdict_ = {
                    'id' : item['id'],
                    'authorChannelUrl' : item['snippet']['authorChannelUrl'],
                    'authorDisplayName' : item['snippet']['authorDisplayName'],
                    'likeCount' : item['snippet']['likeCount'],
                    'publishedAt' : item['snippet']['publishedAt'],
                    'textDisplay' : item['snippet']['textDisplay'],
                    'videoId' : dict_['videoId'],
                    'viewerRating' : item['snippet']['viewerRating'],
                    'totalReplyCount' : 0,
                    'isReply' : 1,
                    'isReplyTo' : dict_['authorDisplayName']
                }
                
                comment_frame = comment_frame.append(rdict_, ignore_index = True)
                
            
    

In [74]:
comment_frame

Unnamed: 0,authorChannelUrl,authorDisplayName,id,isReply,isReplyTo,likeCount,publishedAt,textDisplay,totalReplyCount,videoId,viewerRating
0,http://www.youtube.com/channel/UCeIvXrlFTomz__...,MisstressMourtisha,Ugwci7DhNzKQrvcz4cJ4AaABAg,0.0,,0.0,2018-03-09T06:51:15.000Z,this is so biased its stupid.,0,LEcbagW4O-s,none
1,http://www.youtube.com/channel/UCKJL7teoSMFQrw...,StudyingForLife,UgxJTMwxS-5Hu_fTXN94AaABAg,0.0,,0.0,2018-03-09T06:30:00.000Z,I love you my british Milhouse.,0,LEcbagW4O-s,none
2,http://www.youtube.com/channel/UCws6FICb7u5g_5...,fakshen1973,UgxK5mcaOvZpXty-8pN4AaABAg,0.0,,0.0,2018-03-09T06:22:39.000Z,Dear wingnuts... the reason why we put LIMITS ...,0,LEcbagW4O-s,none
3,http://www.youtube.com/channel/UCAHvjcuCbCJuir...,AceFailure,Ugx7QzcMdia0IXly8gd4AaABAg,0.0,,0.0,2018-03-09T06:15:03.000Z,"Serial killers write better letters, lets be h...",0,LEcbagW4O-s,none
4,http://www.youtube.com/channel/UC1IJrYve0ukJ2z...,Av arm Online,Ugz6pB0BJtIQei6oXiF4AaABAg,0.0,,0.0,2018-03-09T06:11:34.000Z,ape karoxa govazd anes qez kberem ereq bajanort,0,LEcbagW4O-s,none
5,http://www.youtube.com/channel/UCOOkk9nSGCNpat...,This is not My real Name,Ugyuul7QY2ZuK3e3JEV4AaABAg,0.0,,0.0,2018-03-09T06:07:00.000Z,If you eat enough beets it looks like you&#39;...,0,LEcbagW4O-s,none
6,http://www.youtube.com/channel/UCuxfhcpHPymhsz...,Angela Stovall,UgxN3Chccu8-lftFoX54AaABAg,0.0,,0.0,2018-03-09T06:04:29.000Z,John Oliver: I will keep in mind that you are ...,0,LEcbagW4O-s,none
7,http://www.youtube.com/channel/UCsZkyZOJwEU6RI...,Miguel Ortega,Ugw4n4j-GmA0Prg6vo14AaABAg,0.0,,1.0,2018-03-09T05:31:33.000Z,and we NRA members will still win ;),0,LEcbagW4O-s,none
8,http://www.youtube.com/channel/UCIdqB2jJPfmau5...,M Soda,UgysM4LHTNSldFnKtBZ4AaABAg,0.0,,0.0,2018-03-09T05:29:57.000Z,So the NRA thinks pot is what’s really bad but...,0,LEcbagW4O-s,none
9,http://www.youtube.com/channel/UCsqk7v8uOrVxZl...,breeze aleigh,UgwvIA-U5lE7UnEpiDV4AaABAg,0.0,,0.0,2018-03-09T05:10:12.000Z,fuck idiot.,0,LEcbagW4O-s,none
