In [1]:
import googleapiclient.discovery
from googleapiclient.errors import HttpError
from utils.util import str_clear, check_path
import json
import os

my_api_key = 'YOUR KEY'
api_service_name = 'youtube'
api_version = 'v3'

youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey=my_api_key)

In [12]:
def collect_video_data(word, year):

    """
    This function collects data from YouTube videos

    :param word: words used to search for videos on Youtube
    :param year: year
    :return: Returns data from searched YouTube videos
    """

    results = youtube.search().list(
        part='snippet',
        q=word,
        type='video',
        publishedAfter=year+'-01-01T00:00:00Z',
        publishedBefore=year+'-12-31T00:00:00Z',
        order='viewCount',
        maxResults=50
                               ).execute()

    print('50 videos returned')
    
    return results


def format_information(videos, year, name):

    """
    This function formats the information of the collected videos and writes them to a JSON file

    :param videos: Youtube video data
    :param name: name of the JSON file
    :return: JSON file
    """
    
    videos_data = []
    
    for video in videos['items']:
        
        data = video['snippet']
        
        videos_data.append({
            'id': str_clear(video['id']['videoId']),
            'channel': str_clear(data['channelTitle']),
            'date': str_clear(data['publishedAt']),
            'title': str_clear(data['title']),
            'description': str_clear(data['description'])
        })
        
    parent_dir = os.getcwd() + '/database/videos/'
    path = os.path.join(parent_dir, year)

    check_path(path)
                
    with open(f'database/videos/{year}/{name}.json', 'w', encoding="utf8") as file:
        json.dump(videos_data, file, ensure_ascii=False)
    
    print('Information was stored in json file')

In [13]:
words = ['anime amv', 'cs go fragmovie']

years = ['2019', '2020']

for year in years:
    for word in words:
        
        try:
            
            videos = collect_video_data(word, year)
            format_information(videos, year, word)
            
        except HttpError as error:
            
            if error.resp.status == 403:
                print("Your quota has been exceeded")
                break

50 videos returned
Information was stored in json file
50 videos returned
Information was stored in json file
50 videos returned
Information was stored in json file
50 videos returned
Information was stored in json file
