In [64]:
import requests
import json
from datetime import datetime, timedelta
import time
import os

In [68]:
def load_config():
    with open('config.json', 'r') as config_file:
        config = json.load(config_file)
    return config

# gather bestseller lists by year

In [59]:
def fetch_best_sellers_monthly(api_key, year):
    base_url = "https://api.nytimes.com/svc/books/v3/lists/full-overview.json"
    results = {}
    start_date = datetime(year, 1, 1)

    for month in range(1, 13):  # For each month in the year
        try:
            date = start_date.strftime('%Y-%m-%d')
            params = {
                'api-key': api_key,
                'published_date': date
            }
            response = requests.get(base_url, params=params)
            if response.status_code == 200:
                results[date] = response.json()
                print(f"Data fetched for {date}")
            else:
                print(f"Failed to fetch data for {date}: {response.status_code}")

            # Add a delay of 12 seconds to manage API rate limits
            print("Waiting 12 seconds before next API call...")
            time.sleep(12)

            start_date += timedelta(days=31)  # Move to the next month
            start_date = start_date.replace(day=1)  # Ensure we start at the first of the month
        except Exception as e:
            print(f"Error processing date {date}: {str(e)}")

    return results

In [65]:
def save_data(year, data):
    # Ensure the directory exists
    os.makedirs('../data/raw_data', exist_ok=True)

    # File path where the JSON will be saved
    file_path = f'../data/raw_data/{year}.json'

    # Writing JSON data to a file
    with open(file_path, 'w') as json_file:
        json.dump(data, json_file, indent=4)
        print(f"Data for {year} successfully saved to {file_path}")

In [71]:
config = load_config()
api_key = config['api_key']

In [66]:

for year in range(2019, 2024):  # Loop from 2019 to 2023
    print(f"Fetching data for {year}")
    data = fetch_best_sellers_monthly(api_key, year)
    save_data(year, data)

Fetching data for 2019
Data fetched for 2019-01-01
Waiting 12 seconds before next API call...
Data fetched for 2019-02-01
Waiting 12 seconds before next API call...
Data fetched for 2019-03-01
Waiting 12 seconds before next API call...
Data fetched for 2019-04-01
Waiting 12 seconds before next API call...
Data fetched for 2019-05-01
Waiting 12 seconds before next API call...
Data fetched for 2019-06-01
Waiting 12 seconds before next API call...
Data fetched for 2019-07-01
Waiting 12 seconds before next API call...
Data fetched for 2019-08-01
Waiting 12 seconds before next API call...
Data fetched for 2019-09-01
Waiting 12 seconds before next API call...
Data fetched for 2019-10-01
Waiting 12 seconds before next API call...
Data fetched for 2019-11-01
Waiting 12 seconds before next API call...
Data fetched for 2019-12-01
Waiting 12 seconds before next API call...
Data for 2019 successfully saved to ../data/raw_data/2019.json
Fetching data for 2020
Data fetched for 2020-01-01
Waiting 12 

# Extract the info of books