In [37]:
# import Required Libraries

import pandas as pd
import numpy as np
import dask.dataframe as dd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD

In [38]:
# import data
games_df = pd.read_csv("../datasets/merged_steam_games_.csv")
games_df.head(5)

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,...,RPG,Indie,Software Training,Simulation,Game Development,Massively Multiplayer,Early Access,Nudity,Strategy,Violent
0,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0
1,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0
2,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0
3,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0
4,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0


In [39]:
user_item_matrix = games_df.pivot_table(index='title', columns='user_id', aggfunc=lambda x: 1, fill_value=0)
user_item_matrix.columns = user_item_matrix.columns.get_level_values(1)
user_item_matrix.fillna(0, inplace=True)
user_item_matrix.head(5)

user_id,491,1183,2710,2765,3214,4446,15089,15619,16448,18794,...,13755432,13758387,13761904,13763873,13777901,13778085,13778487,13778506,13778726,13781520
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7 Days to Die,1,0,0,1,0,0,1,0,1,0,...,0,0,0,0,0,1,0,0,0,0
ARK: Survival Evolved,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
American Truck Simulator,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
Among Us,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,1,0
Arma 3,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [40]:
# Perform SVD on the user-item matrix
svd = TruncatedSVD()  # You can choose the number of components
svd_matrix = svd.fit_transform(user_item_matrix)

# Compute cosine similarity on the transformed matrix
svd_based_collabor = cosine_similarity(svd_matrix)
svd_based_collabor = pd.DataFrame(data=svd_based_collabor, index=user_item_matrix.index, columns=user_item_matrix.index)

In [41]:
def get_user_liked_games(user_id):
    user_id = str(user_id)  # Convert user_id to string

    if user_id not in user_item_matrix.columns:
        return None  # Return None if user_id is not found

    # Get games liked by the user
    liked_games_indices = np.where(user_item_matrix[user_id] == 1)[0]
    liked_games = user_item_matrix.index[liked_games_indices]

    return liked_games

In [42]:
def get_top_similar_games(game_name):
    if game_name not in user_item_matrix.index:
        return None  # Return None if game_name is not found

    game_index = user_item_matrix.index.get_loc(game_name)
    sim_scores = svd_based_collabor[game_index]

    # Get indices of top similar games (excluding the game itself)
    similar_game_indices = np.argsort(sim_scores)[::-1][1:11]
    similar_games = user_item_matrix.index[similar_game_indices]
    scores = sim_scores[similar_game_indices]

    return similar_games, scores

In [43]:
def get_similar_games_for_user(user_id):
    liked_games = get_user_liked_games(user_id)

    if liked_games is None or len(liked_games) == 0:
        return "No games liked by the user."

    similar_games_list = []
    for game in liked_games:
        similar_games, _ = get_top_similar_games(game)
        similar_games_list.extend(similar_games)

    # Removing duplicates and keeping the counts of each game
    similar_games_counts = pd.Series(similar_games_list).value_counts()

    # Selecting top 10 similar games
    top_similar_games = similar_games_counts.head(10)

    return top_similar_games.index.tolist()

In [44]:
# def check_user_like_svd(user_id, game_title):

#     if user_id not in user_item_matrix.columns:
#         return f"User ID '{user_id}' not found in the dataset columns."

#     if game_title not in user_item_matrix.index:
#         return f"Game title '{game_title}' not found in the dataset index."

#     liked_games, _ = get_similar_game_svd(game_title)
#     user_liked_games, _ = get_similar_game_svd(user_id)

#     if liked_games is None:  
#         return "No similar games found for the given title."

#     if user_liked_games is None:  
#         return "No games liked by the user."

#     top_similar_liked_games = set(user_liked_games[:10])

#     if game_title in top_similar_liked_games:
#         return "Yes"
#     else:
#         return "No"

In [45]:
print(user_item_matrix.index)
print(user_item_matrix.columns)


Index(['7 Days to Die', 'ARK: Survival Evolved', 'American Truck Simulator',
       'Among Us', 'Arma 3', 'Assassin's Creed® Odyssey', 'BeamNG.drive',
       'Bloons TD 6', 'Borderlands 2', 'Brawlhalla',
       'Call of Duty®: Black Ops III', 'Cities: Skylines',
       'Counter-Strike: Global Offensive', 'Counter-Strike: Source',
       'DARK SOULS™ III', 'DOOM', 'Darkest Dungeon®', 'DayZ',
       'Dead by Daylight', 'Deep Rock Galactic',
       'Divinity: Original Sin 2 - Definitive Edition',
       'Don't Starve Together', 'Dota 2', 'Dying Light',
       'Euro Truck Simulator 2', 'Factorio', 'Fallout 4', 'Fallout: New Vegas',
       'Far Cry® 5', 'Garry's Mod',
       'Grand Theft Auto IV: The Complete Edition', 'Grand Theft Auto V',
       'Half-Life 2', 'Hearts of Iron IV', 'Hollow Knight', 'Human: Fall Flat',
       'Hunt: Showdown', 'Left 4 Dead 2', 'Life is Strange - Episode 1',
       'Monster Hunter: World', 'Mount & Blade: Warband', 'No Man's Sky',
       'PAYDAY 2', 'PUBG: B

In [52]:
def check_user_like_specific_game(user_id, game_name):

    if user_id not in user_item_matrix.columns:
        return f"User ID '{user_id}' not found in the dataset columns."

    if game_name not in user_item_matrix.index:
        return f"Game title '{game_name}' not found in the dataset index."

    # Get games similar to the provided game
    similar_games, _ = get_top_similar_games(game_name)

    # Get games liked by the user
    user_liked_games = get_user_liked_games(user_id)

    if user_liked_games is None or len(user_liked_games) == 0:
        return "No games liked by the user."

    # Check if the provided game is among the top similar games for the user
    if game_name in similar_games:
        return "Yes, user might like this game based on their preferences."
    else:
        return "No, it's less likely that the user will like this game based on their preferences."


In [53]:
# Use the function for checking user likeness with SVD
like_prediction = check_user_like_specific_game(13755432, "Garry's Mod")
print(like_prediction)

KeyError: 29