In [None]:
%pip install opendatasets

In [None]:
%pip install pandas

In [None]:
%pip install missingno

In [1]:
import opendatasets as od 
import pandas as pd
import numpy as np
import os

if not os.path.exists('steam-games-dataset'):
    od.download( 
        "https://www.kaggle.com/datasets/fronkongames/steam-games-dataset/data") 
    
if not os.path.exists('steam-video-games'):
    od.download( 
        "https://www.kaggle.com/datasets/tamber/steam-video-games/data") 
    
if not os.path.exists('popularity-of-games-on-steam'):
    od.download( 
        "https://www.kaggle.com/datasets/michau96/popularity-of-games-on-steam") 

In [2]:
import pandas as pd

In [3]:
game_popularity = pd.read_csv('popularity-of-games-on-steam/SteamCharts.csv', encoding='ISO-8859-1')
steam_games = pd.read_csv('steam-games-dataset/games.csv', encoding='ISO-8859-1')
user_data = pd.read_csv('steam-video-games/steam-200k.csv', encoding='ISO-8859-1', usecols=[0, 1, 2, 3], names=['UserID', 'Game', 'Behavior', 'Quantity'])
# Note: Behvior is either purchase or play. When behavior is purchase, quantity is always 1. When behavior is play, quantity is the number of hours

Brainstorm:
Step 1: If the user does not have any previous games played from the model above....print NONE (retrive a list of users with no         games for presentation)
Step 2: Create a function that grabs the following [Game ID, Game Title, Total Hours Played, Release Date, Total Postive Reviews/Total Reviews] and Displays the Top 10 Recommendations based on positve_reviews, engagement, and release_date

    Step 2a: Call "steam_games" attributes [AppId, Name, Release Date, Postive, Negative, Average Playtime Forever]
             Aggregate "steam_games" to present total hours played and total postive reviews for each game (name: game_stats)
    Step 2b: Verify that the data is good by extracting the file as a .csv and manually check all datapoints
    Step 2c: Fix any errors (list errors below as needed)
             [] Fixed through .drop() of outliers of no reviews, no engagement
             NOTE: some games have weird characters, but the characters are of different languages (Korean, Russian, etc.)
             NOTE: did not drop 'hours_played' as it is believed to be inaccurate but still useful to an extent.
    Step 2d: Set the dataset to be in descending order of most positively rated, most total ratings, most recent release date,                most hours played.
    
Step 3: Recommend the game through an automatic link that takes the user directly to Steam to view the game.

In [4]:
# Step 2a-2c

# Grab the desired attributes from desired dataset
game_stats = pd.read_csv('steam-games-dataset/games.csv', encoding='ISO-8859-1', usecols = [0, 1, 2, 22, 23, 28])

# Create a new aggregated column 'total_reviews'
game_stats['total_review'] = (game_stats['Positive'] + game_stats['Negative'])

# Compute the postive rating percentage for each game (Total Positive/(Total Positive + Total Negative) * 100), formatted and replace 'NaN' with 0
game_stats['pos_rating_percentage'] = ((game_stats['Positive'] / game_stats['total_review']) * 100).round(2).fillna(0)

# Rename columns for ease
game_stats.rename(columns={'AppID': 'game_id','Name':'game_name','Average playtime forever':'hours_played', 'Release date': "release_date"}, inplace=True)

### Count of possible recommendations before outlier removal: 85,097
game_stats = game_stats.drop(game_stats[game_stats['Positive'] == 0].index)

### Count of possible recommendations after outlier removal: 61,789
print(len(game_stats))
game_stats.head(3)

61789


Unnamed: 0,game_id,game_name,release_date,Positive,Negative,hours_played,total_review,pos_rating_percentage
0,20200,Galactic Bowling,"Oct 21, 2008",6,11,0,17,35.29
1,655370,Train Bandit,"Oct 12, 2017",53,5,0,58,91.38
3,1355720,Henosisâ¢,"Jul 23, 2020",3,0,0,3,100.0


In [5]:
# Step 2d

game_stats = game_stats.sort_values(by=['pos_rating_percentage', 'total_review', 'release_date','hours_played'], ascending=[False, False, False, False])
game_stats.head(10)
# when comparing this dataset 'pos_rating_percentage', 
# it has an average error margin of 5% at worst so you 
# are getting the most postively reviewed games/popular games.

# EXPLAINATION:
# upon further investigation, games that may be recognizable
# but not on our top 10 is due to their pos_rating_percentage.
# meaning, the more engagement = more critiques = more damage to the rating
# and less likely to be on our top 10.

# flip 'pos_rating_percentage' and 'total_review' to display the explaination above.

Unnamed: 0,game_id,game_name,release_date,Positive,Negative,hours_played,total_review,pos_rating_percentage
23483,790060,The Void Rains Upon Her Heart,"Feb 14, 2018",451,0,0,451,100.0
61530,992740,ç¥é¢¨ Inorikaze,"Feb 13, 2019",318,0,0,318,100.0
44121,1043240,Touhou Kishinjou ~ Double Dealing Character.,"Jun 18, 2019",247,0,43,247,100.0
83475,1293170,Bzzzt,"Nov 13, 2023",238,0,0,238,100.0
67275,2206340,Aokana - Four Rhythms Across the Blue - EXTRA2,"Dec 16, 2022",222,0,0,222,100.0
18561,1176050,ç§å°æè¡ ~ Secret Sealing Travel,"Oct 17, 2020",215,0,0,215,100.0
42009,1684410,Seal World,"Apr 1, 2022",204,0,0,204,100.0
30943,1290220,Elasto Mania Remastered,"May 25, 2020",188,0,0,188,100.0
9472,1249480,Ex-Zodiac,"Jul 21, 2022",167,0,42,167,100.0
73262,2055910,Poosh XL,"May 8, 2023",148,0,0,148,100.0


In [6]:
# If you want to print the dataset
game_stats.to_csv('game_stats.csv')

In [7]:
# Step 3

# These are the values to print the links to Steam
game_stats2 = game_stats[['game_id', 'game_name']]
recommendations = game_stats2.head(10)
x = recommendations.to_numpy(dtype=str)

print("Your top 10 recommendations for model 1: \n")

for i in range(len(x)):
    for j in range(len(x[0])):
        if j == 0:
            appid = x[i][j]
        elif j == 1:
            gamename = x[i][j]
    print(gamename + ':\n\t https://store.steampowered.com/app/' + appid)

Your top 10 recommendations for model 1: 

The Void Rains Upon Her Heart:
	 https://store.steampowered.com/app/790060
ç¥é¢¨ Inorikaze:
	 https://store.steampowered.com/app/992740
Touhou Kishinjou ~ Double Dealing Character.:
	 https://store.steampowered.com/app/1043240
Bzzzt:
	 https://store.steampowered.com/app/1293170
Aokana - Four Rhythms Across the Blue - EXTRA2:
	 https://store.steampowered.com/app/2206340
ç§å°æè¡ ~ Secret Sealing Travel:
	 https://store.steampowered.com/app/1176050
Seal World:
	 https://store.steampowered.com/app/1684410
Elasto Mania Remastered:
	 https://store.steampowered.com/app/1290220
Ex-Zodiac:
	 https://store.steampowered.com/app/1249480
Poosh XL:
	 https://store.steampowered.com/app/2055910
