In [10]:
# import Required Libraries

import pandas as pd
import numpy as np
import dask.dataframe as dd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [11]:
# import data
games_df = pd.read_csv("../datasets/merged_steam_games_.csv")
games_df.head(5)

Unnamed: 0,app_id,title,date_release,win,mac,linux,rating,positive_ratio,user_reviews,price_final,...,RPG,Indie,Software Training,Simulation,Game Development,Massively Multiplayer,Early Access,Nudity,Strategy,Violent
0,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0
1,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0
2,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0
3,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0
4,552520,Far Cry® 5,2018-03-26,1,0,0,7,80,129943,60.0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
# Create user-item matrix
# 'user_id' 열을 카테고리로 변환
# games_df['user_id'] = games_df['user_id'].astype('category')

# # 'user_id' 열의 카테고리를 미리 알려진 상태로 변환
# games_df['user_id'] = games_df['user_id'].cat.as_known()

user_item_matrix = games_df.pivot_table(index='title', columns='user_id', aggfunc=lambda x: 1, fill_value=0)
user_item_matrix.columns = user_item_matrix.columns.get_level_values(1)
user_item_matrix.head(5)

user_id,491,1183,2710,2765,3214,4446,15089,15619,16448,18794,...,13755432,13758387,13761904,13763873,13777901,13778085,13778487,13778506,13778726,13781520
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7 Days to Die,1,0,0,1,0,0,1,0,1,0,...,0,0,0,0,0,1,0,0,0,0
ARK: Survival Evolved,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
American Truck Simulator,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
Among Us,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,1,0
Arma 3,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [13]:
user_item_matrix.fillna(0, inplace=True)
user_item_matrix.head(5)

user_id,491,1183,2710,2765,3214,4446,15089,15619,16448,18794,...,13755432,13758387,13761904,13763873,13777901,13778085,13778487,13778506,13778726,13781520
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7 Days to Die,1,0,0,1,0,0,1,0,1,0,...,0,0,0,0,0,1,0,0,0,0
ARK: Survival Evolved,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
American Truck Simulator,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
Among Us,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,1,0,1,0
Arma 3,0,0,0,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,0,0,0


In [14]:
item_based_collabor = cosine_similarity(user_item_matrix)
item_based_collabor

array([[1.        , 0.1905194 , 0.06551799, ..., 0.12740813, 0.04062967,
        0.10782565],
       [0.1905194 , 1.        , 0.06910737, ..., 0.22471863, 0.05237901,
        0.11982567],
       [0.06551799, 0.06910737, 1.        , ..., 0.04570707, 0.02807173,
        0.1371451 ],
       ...,
       [0.12740813, 0.22471863, 0.04570707, ..., 1.        , 0.08083383,
        0.08775901],
       [0.04062967, 0.05237901, 0.02807173, ..., 0.08083383, 1.        ,
        0.04042395],
       [0.10782565, 0.11982567, 0.1371451 , ..., 0.08775901, 0.04042395,
        1.        ]])

In [15]:
item_based_collabor = pd.DataFrame(data = item_based_collabor, index = user_item_matrix.index, columns = user_item_matrix.index)
item_based_collabor

title,7 Days to Die,ARK: Survival Evolved,American Truck Simulator,Among Us,Arma 3,Assassin's Creed® Odyssey,BeamNG.drive,Bloons TD 6,Borderlands 2,Brawlhalla,...,Tom Clancy's Rainbow Six® Siege,Tomb Raider,Undertale,Unturned,VRChat,Wallpaper Engine,War Thunder,Warframe,World of Tanks Blitz,theHunter: Call of the Wild™
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
7 Days to Die,1.000000,0.190519,0.065518,0.139131,0.124775,0.062060,0.089470,0.147443,0.176382,0.081718,...,0.131340,0.083394,0.047258,0.165100,0.108871,0.156664,0.087289,0.127408,0.040630,0.107826
ARK: Survival Evolved,0.190519,1.000000,0.069107,0.218325,0.141011,0.102866,0.087458,0.223679,0.170746,0.182857,...,0.147194,0.107510,0.054832,0.176130,0.139598,0.228198,0.103345,0.224719,0.052379,0.119826
American Truck Simulator,0.065518,0.069107,1.000000,0.106371,0.123435,0.087140,0.203281,0.110720,0.080646,0.038111,...,0.092808,0.079225,0.019591,0.070991,0.060673,0.119927,0.073663,0.045707,0.028072,0.137145
Among Us,0.139131,0.218325,0.106371,1.000000,0.171064,0.116791,0.172075,0.360016,0.190587,0.231071,...,0.273653,0.108941,0.151917,0.232116,0.201769,0.325003,0.138072,0.163360,0.064498,0.132753
Arma 3,0.124775,0.141011,0.123435,0.171064,1.000000,0.065195,0.163004,0.114678,0.098002,0.068547,...,0.138451,0.057151,0.031091,0.155996,0.123657,0.183886,0.142748,0.127345,0.057916,0.123337
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Wallpaper Engine,0.156664,0.228198,0.119927,0.325003,0.183886,0.135190,0.156341,0.312744,0.199265,0.187321,...,0.252432,0.137774,0.103449,0.216540,0.215075,1.000000,0.114337,0.224658,0.061530,0.135122
War Thunder,0.087289,0.103345,0.073663,0.138072,0.142748,0.039093,0.140369,0.089467,0.075254,0.082448,...,0.128239,0.066346,0.036914,0.178350,0.096272,0.114337,1.000000,0.116957,0.217452,0.102089
Warframe,0.127408,0.224719,0.045707,0.163360,0.127345,0.076813,0.060639,0.197100,0.207264,0.182903,...,0.166709,0.096783,0.045668,0.209662,0.166392,0.224658,0.116957,1.000000,0.080834,0.087759
World of Tanks Blitz,0.040630,0.052379,0.028072,0.064498,0.057916,0.023588,0.059467,0.035334,0.055885,0.101100,...,0.056274,0.081883,0.022273,0.115891,0.053248,0.061530,0.217452,0.080834,1.000000,0.040424


In [16]:
def get_similar_game(game_name):
    if game_name not in user_item_matrix.index:
        return None, None
    else:
        sim_games = item_based_collabor.sort_values(by=game_name, ascending=False).index[:10]
        sim_score = item_based_collabor.sort_values(by=game_name, ascending=False).loc[:, game_name].tolist()[:10]
        return sim_games, sim_score

In [18]:
# games, score = get_similar_game("World of Warships")
# for x,y in zip(games[:10], score[:10]):
#     print("{} with similarity of {}".format(x, y))

In [19]:
def check_user_like(user_id, game_title):
    if user_id not in user_item_matrix.columns:
        return "User ID not found in the dataset."

    if game_title not in user_item_matrix.index:
        return "Game title not found in the dataset."

    liked_games, _ = get_similar_game(game_title)
    user_liked_games, _ = get_similar_game(user_id)

    if liked_games is None:  # Check if no similar games found for the given title
        return "No similar games found for the given title."

    if user_liked_games is None:  # Check if no games liked by the user
        return "No games liked by the user."

    top_similar_liked_games = set(user_liked_games[:10])

    if game_title in top_similar_liked_games:
        return "Yes"
    else:
        return "No"




In [22]:
check_user_like(2710, "7 Days to Die")

KeyError: 0