In [2]:
import pandas as pd
import requests
import sqlite3
import time
import numpy as np

class StopExecution(Exception):
    def _render_traceback_(self):
        pass

### ID's and Names

**Scrapes the unique ids and names of all Steam video games from Steam's API** 

The API URL is 'https://api.steampowered.com/IStoreService/GetAppList/v1/', where the ids and names are stored in the nested dictionary hierarchy response['response']['apps']. Only a maximum of 50,000 results can be retrieved per response. The API has a parameter called 'last_appid' which will resume data retrieval from the last recorded game (app) id; I replaced appid with game_id or id for a better naming convention. 

In [117]:
idname_df = pd.DataFrame()
last_appid = 0

while True:

    params = {
        'key': '1674C7309B00CA08D73A8CC100CA24C7', 
        'max_results': '50000', # maximum of 50,000 retrievals per response
        'last_appid': last_appid} # will resume data retrieval from this id
    gameListUrl = 'https://api.steampowered.com/IStoreService/GetAppList/v1/'
    
    response = requests.get(gameListUrl, params=params)
    response = response.json()

    if len(response['response']) == 0:
        break
    
    temp_df = pd.DataFrame(response['response']['apps']) # location of ids and names
    idname_df = pd.concat([idname_df, temp_df], ignore_index=True) # adding data to already existing data frame
    last_appid = idname_df['appid'].max()

# Renaming appid to id
idname_df.rename(columns={'appid': 'id'}, inplace=True)
# Dropping unnecessary columns
idname_df.drop(columns=['last_modified', 'price_change_number'], inplace=True)
# Adding columns for future data insertion
idname_df[['release_date', 'price']] = np.nan
idname_df.head()

Unnamed: 0_level_0,name,release_date,price
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
10,Counter-Strike,,
20,Team Fortress Classic,,
30,Day of Defeat,,
40,Deathmatch Classic,,
50,Half-Life: Opposing Force,,


**Converts the 'idname_df' data frame into a table called 'game' in a SQL file called 'steam_db.sqlite' for permanent storage**

In [129]:
conn = sqlite3.connect('steam_db.sqlite')
cur = conn.cursor()

with conn:
    try:
        idname_df.to_sql(name='game', con=conn, index=False)
    except:
        print("Table 'game' already exists")

        steam_db_ids = set([id[0] for id in cur.execute('SELECT id FROM game').fetchall()])
        idname_df_ids = set(list(idname_df['id']))

        new_game_ids = idname_df_ids - steam_db_ids
        print(len(new_game_ids))
        
        idname_df.loc[idname_df['id'] == list(new_game_ids)]
        


        


Table 'game' already exists
975


ValueError: ('Lengths must match to compare', (89188,), (975,))

In [124]:
raise StopExecution

source_conn = sqlite3.connect(r"C:\Users\xuqc0\Documents\XUQC01\WORK\Projects\Steam_Games-Predicting_Success\steam_db.sqlite")
destination_conn = sqlite3.connect(r"C:\Users\xuqc0\Documents\XUQC01\WORK\Projects\Predicting_the_Success_of_Steam_Games\steam_db.sqlite")

with source_conn, destination_conn:
    source_conn_cur = source_conn.cursor()
    destination_conn_cur = destination_conn.cursor()

    source_conn_cur.execute("SELECT * FROM game")
    rows = source_conn_cur.fetchall()
    for row in rows:
        id = row[0]
        name = row[1]
        date = row[4]
        price = row[5]

        data = (id, name, date, price)
        destination_conn_cur.execute("INSERT INTO game VALUES (?, ?, ?, ?)", data)
    
    destination_conn.commit()


In [116]:
conn = sqlite3.connect('steam_db.sqlite')
cur = conn.cursor()

cur.execute('CREATE TABLE game (id, name, release_date, price)')

conn.commit()
conn.close()