## Get info of each game
Here are some examples of responded API:
- https://api.rawg.io/api/games/rimworld
- https://api.rawg.io/api/games/grand-theft-auto-v
- https://rawg.io/games/grand-theft-auto-v

In [1]:
import csv
import requests
import json
from pprint import pprint
from time import time
import concurrent.futures
import functools
import os

with open("../secret.json", "r") as f:
    API_KEY = json.load(f)["API_KEY"]

### Load CSV file which has game's id and its name

In [2]:
csv_data = []
with open("../data/game_id.csv", "r") as f:
    csv_data = list(csv.reader(f))

# Preview
print(*csv_data[:10], sep="\n")

['3498', 'grand-theft-auto-v']
['3328', 'the-witcher-3-wild-hunt']
['4200', 'portal-2']
['5286', 'tomb-raider']
['5679', 'the-elder-scrolls-v-skyrim']
['4291', 'counter-strike-global-offensive']
['12020', 'left-4-dead-2']
['13536', 'portal']
['4062', 'bioshock-infinite']
['802', 'borderlands-2']


## Multithreading

### This function is responsible for requesting each game and save as a JSON file in `/data/game_info/`

In [3]:
def worker(start_index, games_per_worker, urls, downloaded_files, headers):
    for url in urls[start_index : start_index + games_per_worker]:
        if url.rsplit("/")[-1] in downloaded_files: continue
        try:
            # Request API
            json_data = json.loads(requests.get(url, headers=headers).text)

            # Only include wanted keys
            D = {k:v for k,v in json_data.items() if k in include}

            # Clean up dictionary
            D["platforms"] = []
            for platform in json_data["platforms"]:
                D["platforms"].append(platform["platform"]["name"])

            for key in ("developers", "genres", "publishers"):
                D[key] = []
                for data in json_data[key]:
                    D[key].append(data["name"])

            if json_data["esrb_rating"]:
                D["esrb_rating"] = json_data["esrb_rating"]["name"]
            
            # Save as JSON file
            name = D["id"]
            with open(f"../data/game_info/{name}.json","w", encoding="utf-8") as f:
                json.dump(D, f)
        except:
            print(f"Failed", end=" ")

In [4]:
# Create folder if not existed
if not os.path.exists('../data/game_info/'):
    os.makedirs('../data/game_info/')

#### Threading Preparation

In [5]:
headers = { 'User-Agent': 'App Name: Education purpose',}
params = {"key": API_KEY}
include = {"id",
           "slug",
           "name",
           "metacritic",
           "released",
           "tba",
           "updated",
           "website",
           "rating",
           "rating_top",
           "added_by_status",
           "playtime",
           "achievements_count",
           "ratings_count",
           "suggestions_count",
           "game_series_count",
           "reviews_count",
           "platforms",
           "developers",
           "genres",
           "publishers",
           "esrb_rating",
           }

# Set up number of workers
max_workers = 64
start_game_index = 0
end_game_index = len(csv_data)
number_of_games = end_game_index - start_game_index
games_per_worker = int(number_of_games/max_workers) + 1 
start_index = range(start_game_index, end_game_index, games_per_worker)

# Skip downloaded files
downloaded_files = {file.split(".",1)[0] for file in os.listdir("../data/game_info/")}
game_ids = [game_id for game_id, _ in csv_data if game_id not in downloaded_files]

# Make urls
base_url = r"https://api.rawg.io/api/games/{i}?key={key}"
urls = [base_url.format(i=game_id, key=API_KEY) for game_id in game_ids]

print(len(urls))
print(*urls[:10], sep="\n")

447797
https://api.rawg.io/api/games/25133?key=f92efb7027874ab899c1ff83335a6a97
https://api.rawg.io/api/games/378683?key=f92efb7027874ab899c1ff83335a6a97
https://api.rawg.io/api/games/378476?key=f92efb7027874ab899c1ff83335a6a97
https://api.rawg.io/api/games/378455?key=f92efb7027874ab899c1ff83335a6a97
https://api.rawg.io/api/games/378452?key=f92efb7027874ab899c1ff83335a6a97
https://api.rawg.io/api/games/378443?key=f92efb7027874ab899c1ff83335a6a97
https://api.rawg.io/api/games/378427?key=f92efb7027874ab899c1ff83335a6a97
https://api.rawg.io/api/games/378395?key=f92efb7027874ab899c1ff83335a6a97
https://api.rawg.io/api/games/378390?key=f92efb7027874ab899c1ff83335a6a97
https://api.rawg.io/api/games/378316?key=f92efb7027874ab899c1ff83335a6a97


In [None]:
# Time
t0 = time()
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
    temp = functools.partial(worker,
                             games_per_worker=games_per_worker,
                             urls=urls,
                             downloaded_files=downloaded_files,
                             headers=headers,
                            )
    executor.map(temp, start_index)
    
# Time
print(f"Time taken: {time()-t0}")

Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed FailedFailed FailedFailed Failed   Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed Failed FailedFailed Failed  Failed Failed Failed Failed FailedFailed  Failed FailedFailed Failed Failed Failed  FailedFailed FailedFailed   Failed Failed FailedFailedFailed   FailedFailedFailed   Failed Failed Failed Failed Failed Failed Failed Failed