In [1]:
import googleapiclient.discovery
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
from youtube_transcript_api import YouTubeTranscriptApi

import urllib.parse as p
import re
import os
import pickle
import pandas


SCOPES = ["https://www.googleapis.com/auth/youtube.force-ssl"]

In [2]:
def youtube_authenticate():
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"
    api_service_name = "youtube"
    api_version = "v3"
    client_secrets_file = "credentials.json"
    creds = None
    # the file token.pickle stores the user's access and refresh tokens, and is
    # created automatically when the authorization flow completes for the first time
    if os.path.exists("token.pickle"):
        with open("token.pickle", "rb") as token:
            creds = pickle.load(token)
    # if there are no (valid) credentials availablle, let the user log in.
    if not creds or not creds.valid:
        if creds and creds.expired and creds.refresh_token:
            creds.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(client_secrets_file, SCOPES)
            creds = flow.run_local_server(port=0)
        # save the credentials for the next run
        with open("token.pickle", "wb") as token:
            pickle.dump(creds, token)

    return build(api_service_name, api_version, credentials=creds)

# authenticate to YouTube API
youtube = youtube_authenticate()

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=1047575720685-ltlgd9ut6c2vt88envr8nbbblqrugpgr.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A53971%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fyoutube.force-ssl&state=E9HGLNlPYXld3oLpa91Rl4AIZq9s0f&access_type=offline


In [3]:
def get_video_id_by_url(url):
    """
    Return the Video ID from the video `url`
    """
    # split URL parts
    parsed_url = p.urlparse(url)
    # get the video ID by parsing the query of the URL
    video_id = p.parse_qs(parsed_url.query).get("v")
    if video_id:
        return video_id[0]
    else:
        raise Exception(f"Wasn't able to parse video URL: {url}")

In [4]:
example_user = "JayzTwoCents"
example_id = 'UCkWQ0gDrqOCarmUKmppD7GQ'

In [5]:
my_chosen = 'MandaloreGaming'

In [6]:
chosen_video = 'https://www.youtube.com/watch?v=YirkpurLHkU'

In [7]:
vid_id = get_video_id_by_url(chosen_video)

In [8]:
vid_id

'YirkpurLHkU'

In [9]:
#Example 1a
file1a = 'example1aresults.csv'
request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        forUsername=example_user
    )
response = request.execute()
channel_id = response['items'][0]['id']
#print(response)
#Note the api does not display total number of likes and uploads per channel
#Response output is difficult to work with. I select outputs I like from the response and put them into a dataframe

#results_dataframe= pandas.json_normalize(response['items'])[['snippet.title','snippet.description','snippet.country','statistics.viewCount','statistics.subscriberCount','statistics.videoCount']]

#print("Done, file",file1a,'is ready.')
#results_dataframe.to_csv(file1a, index=False)


In [10]:
#Example 1b
file1b = 'example1bresults.csv'
request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id=channel_id
    )
response = request.execute()

#print(response)
#Note the api does not display total number of likes and uploads per channel
#Response output is difficult to work with. I select outputs I like from the response and put them into a dataframe

results_dataframe= pandas.json_normalize(response['items'])[['snippet.title','snippet.description','statistics.viewCount','statistics.subscriberCount','statistics.videoCount']]

print("Done, file",file1b,'is ready.')
results_dataframe.to_csv(file1b, index=False)


Done, file example1bresults.csv is ready.


In [11]:
#Example 2
file2 = 'example2results.csv'
appended_data_list = []
counter = 10
while True:
    counter = counter - 1
    if counter == 0:
        break
    #userinput = input("Enter Youtube Channel ID: ")
    userinput = channel_id
    
    # your code

    request = youtube.channels().list(
        part="snippet,contentDetails,statistics",
        id=userinput
    )
    response = request.execute()

    # print(response)
    # Note the api does not display total number of likes and uploads per channel
    # Response output is difficult to work with. I select outputs I like from the response and put them into a dataframe

    results_dataframe = pandas.json_normalize(response['items'])[
        ['snippet.title', 'snippet.description', 'statistics.viewCount',
         'statistics.subscriberCount', 'statistics.videoCount']]

    appended_data_list.append(results_dataframe)

    #cont = input("Another one? yes/no > ")
    cont = 'no'


    while cont.lower() not in ("yes", "no"):
        cont = input("Another one? yes/no > ")

    if cont == "no":
        print("Done, file",file2,'is ready.')
        appended_data_dataframe = pandas.concat(appended_data_list)
        appended_data_dataframe.to_csv(file2, index=False)
        break

Done, file example2results.csv is ready.


In [12]:
#Example 3
file3 = 'example3results.csv'
request = youtube.videos().list(
    part="snippet,contentDetails,statistics",
    id=vid_id
)
response = request.execute()

#print(response)

results_dataframe= pandas.json_normalize(response['items'])[['snippet.channelTitle','snippet.title','statistics.viewCount','statistics.likeCount','statistics.commentCount', 'statistics.favoriteCount']]

print("Done, file",file3,'is ready.')
results_dataframe.to_csv(file3, index=False)


Done, file example3results.csv is ready.


In [13]:
#Example 4
file4 = 'example4results.csv'
request = youtube.videos().list(
    part="snippet,contentDetails,statistics",
    id="bISWIk5pUH0,tWbjy_sCbUA,p79H_XOwpZo,CmNMfRpjknA"
)

response = request.execute()

#print(response)

results_dataframe= pandas.json_normalize(response['items'])[['snippet.channelTitle','snippet.title','statistics.viewCount','statistics.likeCount','statistics.commentCount', 'statistics.favoriteCount']]

#The code below renames the column names in the dataframe

results_dataframe.rename(columns= {'snippet.channelTitle': 'Channel_Title','snippet.title': 'Video_Title', 'statistics.viewCount': 'View_Count', 'statistics.likeCount':'Like_Count','statistics.commentCount':'Comment_Count','statistics.favoriteCount':'Favorite_Count'}, inplace=True)

print("Done, file",file4,'is ready.')
results_dataframe.to_csv(file4, index=False)


Done, file example4results.csv is ready.


In [14]:
my_key = 'AIzaSyBzkWwOGepaadbrVIE-aotGVkGMymi774c'

In [15]:
#example 5
file5 = 'example5results.csv'

def main():
    # Disable OAuthlib's HTTPS verification when running locally.
    # *DO NOT* leave this option enabled in production.
    os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1"

    api_service_name = "youtube"
    api_version = "v3"
    DEVELOPER_KEY = my_key

    youtube = googleapiclient.discovery.build(
        api_service_name, api_version, developerKey = DEVELOPER_KEY)

    request = youtube.commentThreads().list(
        part="snippet,replies",
        maxResults=100,
        videoId=vid_id
    )
    response = request.execute()

    #print(response)

    # I am creating an empty dictionary here for the results I like to save to a CSV file
    output = {"CommentID": [], "UserID": [], "Comment": [], "Number_of_Likes": [], "Number_of_Replies": [],
              "Updated_At": []}

    results_out = pandas.DataFrame(columns=['CommentID,UserID,Comment,Number_of_Likes,Number_of_Replies,Updated_At'])
    for item in response['items']:
        # I am accessing different parts of the JSON response file that I consider important
        comment_id = item['snippet']['topLevelComment']['id']
        userID = item["snippet"]["topLevelComment"]["snippet"]["authorDisplayName"]
        comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
        like_count = item['snippet']['topLevelComment']['snippet']['likeCount']
        reply_count = item['snippet']['totalReplyCount']
        updated_at = item["snippet"]["topLevelComment"]["snippet"]["updatedAt"]

        # I am appending the results for each comment
        output['CommentID'].append(comment_id)
        output['UserID'].append(userID)
        output['Comment'].append(comment)
        output['Number_of_Likes'].append(like_count)
        output['Number_of_Replies'].append(reply_count)
        output['Updated_At'].append(updated_at)

        results_in = pandas.DataFrame.from_dict(output, orient='index').T
        results_out = pandas.concat([results_out,results_in]).drop_duplicates()
    print("Done, file",file5,'is ready.')
    results_out.to_csv(file5, index=False)


if __name__ == "__main__":
    main()


Done, file example5results.csv is ready.


In [16]:
#example 6
file6 = 'example6results.csv'

# assigning srt variable with the list
# of dictonaries obtained by the get_transcript() function
caption_list = YouTubeTranscriptApi.get_transcript(vid_id)
#print(caption_list)

caption_dataframe = pandas.DataFrame(caption_list)

print("Done, file",file6,'is ready.')
caption_dataframe.to_csv(file6, index=False)

Done, file example6results.csv is ready.


In [17]:
#example 7
file7 = file6
#I am creating a dictionary here titled inputdata
inputdata={}

inputdata = pandas.read_csv(file7).to_dict()

# I created a new dictionary here for the text column in my csv file

caption_text_dictionary = inputdata.get('text')

# I am converting the caption dictionary to a list so I can analyze the data

caption_text_list =  list(caption_text_dictionary.values())

#convert list to string
caption_text_instring = ''
for eachletter in  caption_text_list:
    caption_text_instring += eachletter

print(caption_text_instring)

[Mechanicus OST - Children of the Omnissiah]Welcome back to Warhammer. There are no Space Marines this time around.Instead, this game has you playing as the Adeptus Mechanicus. And you're on a field trip / home invasion to rob the Necrons.That may have sounded like gibberish...The Mechanicus have been in some games. Usually with the Imperial Guard; but they're not the Imperial Guard-Okay, picture Inspector Gadget, but he's a monk, and he fixes your car.Okay, I'll get into the lore...I'm going to assume you know about the Imperium of Man and all that. The Mechanicus works with the Guard, but they're not part of the Guard.Like, picture Amazon; Imagine it has a giant government contract.Not only do they make and sell knick-knacks and random stuff,they also make cars, tanks, guns, ships, and everything else you need to wage a war and also just run the economy in general.That's them, and there is no competition.They make most every gun and vehicle in the Imperial military, so their organiza