In [12]:
# import Required Libraries

import pandas as pd
import numpy as np
import dask.dataframe as dd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD

In [13]:
# import data
games_df = pd.read_csv("../datasets/merged_steam_games_.csv")
games_df.head(5)

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,...,RPG,Indie,Software Training,Simulation,Game Development,Massively Multiplayer,Early Access,Nudity,Strategy,Violent
0,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0
1,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0
2,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0
3,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0
4,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
user_item_matrix = games_df.pivot_table(index='title', columns='user_id', aggfunc=lambda x: 1, fill_value=0)
user_item_matrix.columns = user_item_matrix.columns.get_level_values(1)
user_item_matrix.fillna(0, inplace=True)
user_item_matrix.head(5)

user_id,491,1183,2710,2765,3214,4446,15089,15619,16448,18794,...,13755432,13758387,13761904,13763873,13777901,13778085,13778487,13778506,13778726,13781520
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7 Days to Die,1,0,0,1,0,0,1,0,1,0,...,0,0,0,0,0,1,0,0,0,0
ARK: Survival Evolved,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
American Truck Simulator,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
Among Us,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,1,0
Arma 3,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [15]:
# Perform SVD on the user-item matrix
svd = TruncatedSVD()  # You can choose the number of components
svd_matrix = svd.fit_transform(user_item_matrix)

# Compute cosine similarity on the transformed matrix
svd_based_collabor = cosine_similarity(svd_matrix)
svd_based_collabor = pd.DataFrame(data=svd_based_collabor, index=user_item_matrix.index, columns=user_item_matrix.index)

In [16]:
def get_user_liked_games(user_id):

    if user_id not in user_item_matrix.columns:
        return None  # Return None if user_id is not found

    # Get games liked by the user
    liked_games_indices = np.where(user_item_matrix[user_id] == 1)[0]
    liked_games = user_item_matrix.index[liked_games_indices]

    return liked_games

In [17]:
def get_similar_game(game_name):
    if game_name not in user_item_matrix.index:
        return None, None
    else:
        sim_games = svd_based_collabor.sort_values(by=game_name, ascending=False).index
        sim_score = svd_based_collabor.sort_values(by=game_name, ascending=False).loc[:, game_name].tolist()
        return sim_games, sim_score

In [18]:
def get_similar_games_for_user(user_id):
    liked_games = get_user_liked_games(user_id)

    if liked_games is None or len(liked_games) == 0:
        return "No games liked by the user."

    similar_games_list = []
    for game in liked_games:
        similar_games, _ = get_similar_game(game)
        similar_games_list.extend(similar_games)

    # Removing duplicates and keeping the counts of each game
    similar_games_counts = pd.Series(similar_games_list).value_counts()

    # Selecting top 10 similar games
    top_similar_games = similar_games_counts.head(10)

    return top_similar_games.index.tolist()

In [19]:
def check_user_like_specific_game(user_id, game_name):
    if user_id not in user_item_matrix.columns:
        return f"User ID '{user_id}' not found in the dataset columns."

    if game_name not in user_item_matrix.index:
        return f"Game title '{game_name}' not found in the dataset index."

    # Get games similar to the provided game
    similar_games, _ = get_similar_game(game_name)

    # Get games liked by the user
    user_liked_games = get_user_liked_games(user_id)

    if user_liked_games is None or len(user_liked_games) == 0:
        return "No games liked by the user."

    # Check if the provided game is among the games liked by the user
    if game_name in user_liked_games:
        return "User will definitely like this game."
    else:
        # Check if the provided game is among the top similar games for the user
        if game_name in similar_games:
            return "Yes, user might like this game based on their preferences."
        else:
            return "No, it's less likely that the user will like this game based on their preferences."

In [20]:
def get_top_similar_games_for_user(user_id):
    liked_games = get_user_liked_games(user_id)

    if liked_games is None or len(liked_games) == 0:
        return "No games liked by the user."

    similar_games_list = []
    for game in liked_games:
        similar_games, _ = get_similar_game(game)
        similar_games_list.extend(similar_games)

    # Removing duplicates and keeping the counts of each game
    similar_games_counts = pd.Series(similar_games_list).value_counts()

    # Selecting top 10 similar games
    top_similar_games = similar_games_counts.head(10)

    return top_similar_games.index.tolist()


In [21]:
top_similar_games = get_top_similar_games_for_user(7606333)
top_similar_games

['Counter-Strike: Global Offensive',
 'Factorio',
 'Hunt: Showdown',
 'Borderlands 2',
 'DOOM',
 'Risk of Rain 2',
 'The Binding of Isaac: Rebirth',
 'Life is Strange - Episode 1',
 'Project Zomboid',
 'Warframe']

In [22]:
check_user_like_specific_game(7606333, "Dying Light")

'Yes, user might like this game based on their preferences.'

In [23]:
games, score = get_similar_game("Garry's Mod")
for x,y in zip(games[:10], score[:10]):
    print("{} with similarity of {}".format(x, y))

Garry's Mod with similarity of 1.0
Counter-Strike: Global Offensive with similarity of 0.9999553895567478
World of Tanks Blitz with similarity of 0.99898138056906
BeamNG.drive with similarity of 0.9984089261089114
Among Us with similarity of 0.9971708536650874
Counter-Strike: Source with similarity of 0.9959094478974789
Unturned with similarity of 0.9955021418000567
PAYDAY 2 with similarity of 0.9951784746523186
Rust with similarity of 0.9942513641098985
War Thunder with similarity of 0.9884692797726358
