In [1]:
import os
import google.oauth2.credentials
import pickle
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
import pandas as pd

In [2]:
credentials = None

In [3]:
if os.path.exists('yt_token.pickle'):
    with open('yt_token.pickle', 'rb') as t:
        credentials = pickle.load(t)

In [4]:
if not credentials or not credentials.valid:
    if credentials and credentials.expired and credentials.refresh_token:
        credentials.refresh(Request())
    else:
        flow = InstalledAppFlow.from_client_secrets_file(
            'client_secret.json',
            scopes = [
                'https://www.googleapis.com/auth/youtube.readonly'
            ]
        )

        flow.run_local_server(port=8080, prompt='consent')
        credentials = flow.credentials
        # FIXED THE SPELLING OF PICKLE
        with open('yt_token.pickle', 'wb') as t:
            pickle.dump(credentials, t)

In [5]:
yt_service = build('youtube', 'v3', credentials=credentials)

In [6]:
playlists_request = yt_service.playlists().list(
    part='contentDetails',
    mine=True
)

playlists_data = playlists_request.execute()
spotify_playlist = 'PL527aseUaLIaXhc_s2WM8QgMQfKyO8L5v' #ID for the desired playlist

In [7]:
def get_video_ids(data):
    ''' 
    TODO description
    '''
    return ','.join([item['contentDetails']['videoId'] for item in data['items']]) + ','

In [8]:
playlist_items_request = yt_service.playlistItems().list(
    part='contentDetails',
    playlistId=spotify_playlist
)

items_data = playlist_items_request.execute()
num_items = items_data['pageInfo']['totalResults']
video_ids = ''
video_ids += get_video_ids(items_data)

for _ in range(num_items // 5):
    # TODO scan each page after the first one
    playlist_items_request = yt_service.playlistItems().list(
        part='contentDetails',
        playlistId=spotify_playlist,
        pageToken=items_data['nextPageToken']
    )

    items_data = playlist_items_request.execute()
    video_ids += get_video_ids(items_data)


    

In [9]:
title_request = yt_service.videos().list(
    part='snippet',
    id=video_ids
)

video_info = title_request.execute()

In [10]:
channel_ids = [channel_id['snippet']['channelId'] for channel_id in video_info['items']]

channel_request = yt_service.channels().list(
    part='snippet',
    id=channel_ids
)

channel_info = channel_request.execute()

In [11]:
unique_channel_names = [channel_name['snippet']['title'] for channel_name in channel_info['items']]
temp_channel_ids = [info['id'] for info in channel_info['items']]

id_to_name = dict(zip(temp_channel_ids, unique_channel_names))
full_channel_names = [id_to_name[channel_id] for channel_id in channel_ids]

In [12]:
yt_service.close()

In [13]:
titles = [title['snippet']['title'] for title in video_info['items']]

In [14]:
df = pd.DataFrame(
    {
        'video_title': titles,
        'channel_id': channel_ids,
        'channel_name': full_channel_names
    }
)
df.to_csv('data/yt_titles.csv', index=False)