# **"More Like This" Reccomendation System for Steam Games**
*ITCS 5154, Evan Youssef*

## **Installations**

In [None]:
# pip install requests

Collecting requests
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting charset-normalizer<4,>=2 (from requests)
  Using cached charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl.metadata (36 kB)
Collecting idna<4,>=2.5 (from requests)
  Using cached idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests)
  Using cached urllib3-2.3.0-py3-none-any.whl.metadata (6.5 kB)
Collecting certifi>=2017.4.17 (from requests)
  Downloading certifi-2025.1.31-py3-none-any.whl.metadata (2.5 kB)
Using cached requests-2.32.3-py3-none-any.whl (64 kB)
Downloading certifi-2025.1.31-py3-none-any.whl (166 kB)
Using cached charset_normalizer-3.4.1-cp313-cp313-win_amd64.whl (102 kB)
Using cached idna-3.10-py3-none-any.whl (70 kB)
Using cached urllib3-2.3.0-py3-none-any.whl (128 kB)
Installing collected packages: urllib3, idna, charset-normalizer, certifi, requests
Successfully installed certifi-2025.1.31 charset-normalizer-3.4.1 idna-3.10 requests-2.32


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


## **Data Collection**
*Based on ["Scraping Information of All Games From Steam With Python"](https://medium.com/codex/scraping-information-of-all-games-from-steam-with-python-6e44eb01a299) by mmmmmm4*

### **Collecting App IDs**

In [2]:
import requests

def get_app_ids():
    req = requests.get('http://api.steampowered.com/ISteamApps/GetAppList/v0002/?format=json')

    if (req.status_code != 200): # if request fails
        print('Request failed')
        return
    
    data = req.json()

    apps_data = data['applist']['apps']
    app_ids = []

    for app in apps_data:
        app_id = app['appid']
        name = app['name']

        if not name:
            continue

        app_ids.append(app_id)
    
    return app_ids

print('App IDs collected:', len(get_app_ids()))

App IDs collected: 239945


### **Collecting App Details**

In [9]:
from collections import deque
import time

def get_app_details(app_ids):
    remaining_apps = deque(set(app_ids))
    all_app_data = []

    while len(remaining_apps) > 0:
        app_id = remaining_apps.popleft()

        req = requests.get(f'https://steamspy.com/api.php?request=appdetails&appid={app_id}')

        all_app_data.append(req.json())

        if len(remaining_apps) % 500 == 0:
            with open('app_details.txt', 'w', encoding='utf-8') as file:
                file.write(str(all_app_data))
                print(f"Written to file | Remaining: {len(remaining_apps)}")
        
    with open('app_details.txt', 'w', encoding='utf-8') as file:
        file.write(str(all_app_data))
        print(f"Written to file | Remaining: {len(remaining_apps)}")

        '''
        if req.status_code == 200: # if request succeeds
            app_details = req.json()
            app_details = app_details[str(app_id)]
            if app_details['success'] == True and app_details['data']['type']:
                all_app_data.append(app_details)
        
        elif req.status_code == 429: # if request timed out
            print(f'{app_id} Request timed out...sleeping 10 seconds')
            remaining_apps.appendleft(app_id)
            time.sleep(10)
            continue

        elif req.status_code == 403: # if request forbidden
            print(f'{app_id} Request forbidden...sleeping for 5 minutes')
            remaining_apps.appendleft(app_id)
            time.sleep(60*5)
            continue
        
        else: # if request error
            print('Error:', req.status_code)
            print(f'App ID:', {app_id})
            app_errors.append(app_id)
            continue
        '''

get_app_details(get_app_ids())

Written to file | Remaining: 166000
Written to file | Remaining: 165500
Written to file | Remaining: 165000
Written to file | Remaining: 164500
Written to file | Remaining: 164000
Written to file | Remaining: 163500
Written to file | Remaining: 163000


ReadTimeout: HTTPSConnectionPool(host='steamspy.com', port=443): Read timed out. (read timeout=None)

## **Data Cleaning**

In [14]:
import ast
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer

app_details = []
with open("app_details.txt", "r", encoding="utf8") as inFile:
    app_details = ast.literal_eval(inFile.read())

# All app details data
df = pd.DataFrame(app_details)
df = pd.concat([df[['appid', 'name', 'developer', 'publisher', 'genre', 'tags']]], axis=1)

empty_values = [None, '', {}, []]

rows_to_discard = []

for i, row in df.iterrows():
    for detail in row:
        if detail in empty_values:
            rows_to_discard.append(int(i))
            break

df = df.dropna()

df_clean = df.drop(index=rows_to_discard)
df_clean.head()

Unnamed: 0,appid,name,developer,publisher,genre,tags
0,2621440,Infinity Castle Dungeon,Morty's Develop,Morty's Develop,"Action, Adventure, Casual, Indie","{'Action Roguelike': 167, 'Exploration': 161, ..."
7,10,Counter-Strike,Valve,Valve,Action,"{'Action': 5498, 'FPS': 4923, 'Multiplayer': 3..."
11,20,Team Fortress Classic,Valve,Valve,Action,"{'Action': 767, 'FPS': 333, 'Multiplayer': 280..."
13,2097180,OxU,Quiet River,Quiet River,"Casual, Indie","{'Puzzle': 174, 'Relaxing': 164, 'Casual': 161..."
14,2621470,Endless Desert TD,Paw Strike Games,Paw Strike Games,"Indie, Strategy","{'Tower Defense': 78, 'Rogue-like': 71, 'Strat..."


In [15]:
# Extract and encode tags
mlb = MultiLabelBinarizer()
tags_encoded = mlb.fit_transform(df['tags'])
df_tags = pd.DataFrame(tags_encoded, columns=mlb.classes_)

# extract features
df_features = pd.concat([df[['appid']], df_tags], axis=1)

In [26]:
from sklearn.neighbors import NearestNeighbors

knn = NearestNeighbors(n_neighbors=6, metric='euclidean')
knn.fit(df_features.drop('appid', axis=1))

def recommend_similar_games(input_appid):
    input_features = df_features[df_features['appid'] == input_appid].drop('appid', axis=1)

    if input_features.empty:
        print(f"No features found for AppID {input_appid}.")
        return []

    distances, indices = knn.kneighbors(input_features)

    recommended_game_ids = df_features.iloc[indices[0]]['appid'].values
    return recommended_game_ids

recommended_games = recommend_similar_games(10)
print(f"Recommended games for appid 1: {recommended_games}")

Recommended games for appid 1: [  10   80  240 2350  730  100]
