In [None]:
from csv import reader, writer
from json import dump
from os import chdir, makedirs
from os.path import exists
from time import sleep, time

from IPython.display import clear_output
from requests import get

In [None]:
# Defines output directory
output_dir = "../../data/store_info/"

# Makes output directory if it doesn't exist
if not exists(output_dir):
    makedirs(output_dir)

In [None]:
# Reads app ids into list
with open("../../data/appids/appids.csv", "r", newline="") as f:
    appid_reader = reader(f)
    appids = [appid[0] for appid in appid_reader]

In [None]:
# Loop prep

# Instantiates list to hold app ids for games with broken store pages
failed_requests = []

# Keeps track of minutes elapsed
min_elapsed = 0

# Keeps track of store pages seen
store_pages_seen = 0

# Keeps track of time
timer = time()

In [None]:
# Iterates through app ids, retrieving store info for each
for appid in appids:
    # Formats url template with current app id

    url = f"https://store.steampowered.com/api/appdetails?appids={appid}&json=1"

    # Requests data
    response = get(url)
    status = response.status_code
    request_time = time()

    # Retries request if there's a 502 error


    while status == 502:
        print("502 Error! Retrying in 5 seconds...")
        sleep(5)
        response = get(url)
        status = response.status_code

    # Terminates loop if unexpected http error occurs
    if status != 200:
        print(f"Error! HTTP response code {status} for appid {appid}")
        break

    # Skips games with broken store pages, saving ids
    try:
        with open(output_dir + appid + ".json", "w") as f:
            dump(response.json()[appid]["data"], f)
    except:
        failed_requests.append([appid])

    # Augments counter
    store_pages_seen += 1

    # Prints progress to console.
    if (time() - timer) // 300 > min_elapsed:
        min_elapsed = (time() - timer) // 60
        clear_output()
        if min_elapsed < 60:
            print(f"{store_pages_seen} store pages scraped in {min_elapsed} minutes")
        else:
            print(
                f"{store_pages_seen} store pages scraped in {min_elapsed//60} hours and {min_elapsed % 60} minutes"
            )

    # Waits 2 seconds between requests to avoid making Steam mad.
    while time() - request_time <= 2:
        pass

clear_output()

print(
    f"Store information successfully scraped for {store_pages_seen-len(failed_requests)} games in {min_elapsed//60} hours and {min_elapsed % 60} minutes!"
)

In [None]:
# Informs user of any failed requests
if len(failed_requests) > 0:

    with open(output_dir + "failed_requests.csv", "w", newline="") as f:
        appid_writer = writer(f)
        appid_writer.writerows(failed_requests)

    print(
        f"Retrieval failed for {len(failed_requests)} games.\nApp ids written to '{output_dir}failed_requests.csv'."
    )