# Get all comments through API

 *the api only allows you to get 100 comments, but you get a nextPageToken from the api response. Pass the nextPageToken to next api call, until the nextPageToken is undefined. Then you can get all comments if you like.

In [104]:
# -*- coding: utf-8 -*-

# Sample Python code for youtube.commentThreads.list
# See instructions for running these code samples locally:
# https://developers.google.com/explorer-help/guides/code_samples#python

import os
import time
import googleapiclient.discovery

def get_comments(vdid):
    # Disable OAuthlib's HTTPS verification when running locally.
    # *DO NOT* leave this option enabled in production.
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

    api_service_name = "youtube"
    api_version = "v3"
    DEVELOPER_KEY = __API-KEY__

    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, developerKey=DEVELOPER_KEY)

    comments = []
    page_token = ''
    page = 1
    print("=================")
    print("vdid: "+vdid)
    while(True):
        print("page "+str(page)+"...")
        request = youtube.commentThreads().list(
            part="snippet",
            videoId=vdid,
            pageToken=page_token,
            maxResults=100,
            textFormat="plainText",
        )
        response = request.execute()
        comments.extend(response['items'])
        print("result: "+str(response["pageInfo"]["totalResults"]))
        if 'nextPageToken' not in response:
            break;
        time.sleep(2)
        page += 1
        page_token = response['nextPageToken']
    print("*total:", len(comments))
    print("=================")
        
    return comments


if __name__ == "__main__":
    
    links =[
        'https://www.youtube.com/watch?v=V9K9gONlh5g',

        'https://www.youtube.com/watch?v=6D74v9ABSIg',

        'https://www.youtube.com/watch?v=VzXKijeWN4U',

        'https://www.youtube.com/watch?v=dFPepr6T7y0',

        'https://www.youtube.com/watch?v=ozm7lCEK328'
    ]
    
    vdids = [link.replace('https://www.youtube.com/watch?v=', '') for link in links]
    print(vdids)
        
#     data = get_comments()
    comments = [get_comments(vdid) for vdid in vdids]

['V9K9gONlh5g', '6D74v9ABSIg', 'VzXKijeWN4U', 'dFPepr6T7y0', 'ozm7lCEK328']
vdid: V9K9gONlh5g
page 1...
result: 100
page 2...
result: 100
page 3...
result: 35
*total: 235
vdid: 6D74v9ABSIg
page 1...
result: 100
page 2...
result: 100
page 3...
result: 100
page 4...
result: 100
page 5...
result: 100
page 6...
result: 100
page 7...
result: 100
page 8...
result: 100
page 9...
result: 100
page 10...
result: 100
page 11...
result: 100
page 12...
result: 100
page 13...
result: 100
page 14...
result: 100
page 15...
result: 100
page 16...
result: 100
page 17...
result: 81
*total: 1681
vdid: VzXKijeWN4U
page 1...
result: 100
page 2...
result: 100
page 3...
result: 100
page 4...
result: 100
page 5...
result: 100
page 6...
result: 100
page 7...
result: 100
page 8...
result: 100
page 9...
result: 100
page 10...
result: 100
page 11...
result: 100
page 12...
result: 1
*total: 1101
vdid: dFPepr6T7y0
page 1...
result: 100
page 2...
result: 100
page 3...
result: 100
page 4...
result: 50
*total: 350
vdid

## Save relevant data only

In [184]:
# aa = comments[0][0]
comments_data = []
for comments2 in comments:
    for cm in comments2:
        comments_data.append({
            'id_video':cm['snippet']['videoId'],
            'id_komentar':cm['snippet']['topLevelComment']['id'],
            'text_display':cm['snippet']['topLevelComment']['snippet']['textDisplay'],
            'text_original':cm['snippet']['topLevelComment']['snippet']['textOriginal'],
            'author_name':cm['snippet']['topLevelComment']['snippet']['authorDisplayName'],
            'id_author':cm['snippet']['topLevelComment']['snippet']['authorChannelId']['value'],
            'like_count':cm['snippet']['topLevelComment']['snippet']['likeCount'],
            'published_at':cm['snippet']['topLevelComment']['snippet']['publishedAt'],
            'updated_at':cm['snippet']['topLevelComment']['snippet']['updatedAt']
        })

In [185]:
len(comments_data)

8233

In [186]:
comments_data[-1]

{'id_video': 'ozm7lCEK328',
 'id_komentar': 'UgxVd5v2EVCPo2YeQap4AaABAg',
 'text_display': 'Firts\nLike',
 'text_original': 'Firts\nLike',
 'author_name': 'Nob Gaming',
 'id_author': 'UC9fVKERTcbRJS7AMoPmqQhg',
 'like_count': 1,
 'published_at': '2020-05-26T10:58:39Z',
 'updated_at': '2020-05-26T10:59:00Z'}

In [1]:
with open('raw/comments_full_flat.txt', 'w', encoding='utf-8') as f:
    f.write(str(comments_data))

In [169]:
import pandas as pd

In [189]:
pd.DataFrame(comments_data).to_csv('raw/comments_all.csv', index=False)

In [196]:
pd.DataFrame(comments_data).loc[2].text_display

'Pro-kontra penerapan new normal dalam video debat berikut:\n1. https://youtu.be/A2W0iQyML0A\n2. https://youtu.be/QrxM4ilJ84U\n3. https://youtu.be/-BKB2lls30I\nMohon bantuan like-nya, terima kasih🙏'

In [195]:
pd.DataFrame(comments_data)

Unnamed: 0,id_video,id_komentar,text_display,text_original,author_name,id_author,like_count,published_at,updated_at
0,V9K9gONlh5g,UgyltwOekKcaG3BCxQV4AaABAg,Gw kesini gara gara tugas,Gw kesini gara gara tugas,ilham abdilah,UCq0F46qULka1JcUyIn_sHaA,0,2020-07-23T04:41:04Z,2020-07-23T04:41:04Z
1,V9K9gONlh5g,UgyOIH9MFqE6OVhWsFd4AaABAg,"Faktanya, new normal hanya membuka gerbang unt...","Faktanya, new normal hanya membuka gerbang unt...",iiqrhaa bukhan,UCQY7gvJ0ZyLMpWiQGbIMdmQ,0,2020-07-22T02:01:52Z,2020-07-22T02:01:52Z
2,V9K9gONlh5g,UgwVmU6UcsNZfuUKs6p4AaABAg,Pro-kontra penerapan new normal dalam video de...,Pro-kontra penerapan new normal dalam video de...,Octa Nadia,UCN7qsYgI5g-pq4jvf-ghjiQ,1,2020-06-26T09:35:58Z,2020-06-26T09:35:58Z
3,V9K9gONlh5g,Ugxxt8GJ_mWvOH6mTJF4AaABAg,Yang kuat akan bertahan yang lemah akan Meninggal,Yang kuat akan bertahan yang lemah akan Meninggal,Rama WK,UCyvhHciGAeZ3NgPXQgyLvrQ,0,2020-06-19T06:19:53Z,2020-06-19T06:19:53Z
4,V9K9gONlh5g,UgydjjDPsoKy5AuFiOB4AaABAg,"Bukannya meremehkan corona, tapi tak perlu pho...","Bukannya meremehkan corona, tapi tak perlu pho...",Zainul Zain,UCJ_u7_aWC2A0kEPFevS-cZw,0,2020-06-17T16:06:18Z,2020-06-17T16:51:07Z
...,...,...,...,...,...,...,...,...,...
8228,ozm7lCEK328,UgxqFAfoyg4cYFvRJth4AaABAg,Pak polisi Ama pak TNI nanti jgn galak galak\n...,Pak polisi Ama pak TNI nanti jgn galak galak\n...,NEGARA MUSLIM,UCfniUgpp6NWm8GKLp5zBFQg,1,2020-05-26T11:00:32Z,2020-05-26T11:00:32Z
8229,ozm7lCEK328,UgzPJJZQMKts7XXF-VV4AaABAg,Semangat pak Jokowi\nSehat selalu beserta raky...,Semangat pak Jokowi\nSehat selalu beserta raky...,NEGARA MUSLIM,UCfniUgpp6NWm8GKLp5zBFQg,5,2020-05-26T10:59:41Z,2020-05-26T10:59:41Z
8230,ozm7lCEK328,UgzTfNkCYCfYR6bAX5h4AaABAg,First,First,Patrick Star,UCG37s2Puwj86zWyhpuIBMWQ,1,2020-05-26T10:59:17Z,2020-05-26T10:59:17Z
8231,ozm7lCEK328,UgxE3-X0zh2iDHMWrj54AaABAg,New normal new world nb order??,New normal new world nb order??,NEGARA MUSLIM,UCfniUgpp6NWm8GKLp5zBFQg,1,2020-05-26T10:58:56Z,2020-05-26T10:58:56Z
