# Readme
* Recommendation Engine : Contents Based Filtering
* Used data for compute cosine similarity : steam store games - steam.csv
* Used data for recommendation : steam user games
* rating = (((positive_rating - negative_rating)/2(positive_rating + negative_rating)) + 0.5) * 10

### recommend_game(df, appid, top=10)
* df is the data about contents of games
* The top 10 games are recommended in order of highest rating
* Result is the dataframe of games

### recommend_game_nameLIst(df, appid, top=10)
* df is the data about contents of games
* The top 10 games are recommended in order of highest rating
* Result is the list of the name of games

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

import sklearn
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
warnings.filterwarnings(action='ignore')

In [2]:
df = pd.read_csv('steam.csv')

In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27075 entries, 0 to 27074
Data columns (total 18 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   appid             27075 non-null  int64  
 1   name              27075 non-null  object 
 2   release_date      27075 non-null  object 
 3   english           27075 non-null  int64  
 4   developer         27075 non-null  object 
 5   publisher         27075 non-null  object 
 6   platforms         27075 non-null  object 
 7   required_age      27075 non-null  int64  
 8   categories        27075 non-null  object 
 9   genres            27075 non-null  object 
 10  steamspy_tags     27075 non-null  object 
 11  achievements      27075 non-null  int64  
 12  positive_ratings  27075 non-null  int64  
 13  negative_ratings  27075 non-null  int64  
 14  average_playtime  27075 non-null  int64  
 15  median_playtime   27075 non-null  int64  
 16  owners            27075 non-null  object

In [4]:
df.head()

Unnamed: 0,appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price
0,10,Counter-Strike,2000-11-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,124534,3339,17612,317,10000000-20000000,7.19
1,20,Team Fortress Classic,1999-04-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,3318,633,277,62,5000000-10000000,3.99
2,30,Day of Defeat,2003-05-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Valve Anti-Cheat enabled,Action,FPS;World War II;Multiplayer,0,3416,398,187,34,5000000-10000000,3.99
3,40,Deathmatch Classic,2001-06-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,1273,267,258,184,5000000-10000000,3.99
4,50,Half-Life: Opposing Force,1999-11-01,1,Gearbox Software,Valve,windows;mac;linux,0,Single-player;Multi-player;Valve Anti-Cheat en...,Action,FPS;Action;Sci-fi,0,5250,288,624,415,5000000-10000000,3.99


In [5]:
df['rating'] = ((df['positive_ratings'] - df['negative_ratings'])/(2 * (df['positive_ratings'] + df['negative_ratings'])) + 0.5) * 10.0
df

Unnamed: 0,appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price,rating
0,10,Counter-Strike,2000-11-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,124534,3339,17612,317,10000000-20000000,7.19,9.738882
1,20,Team Fortress Classic,1999-04-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,3318,633,277,62,5000000-10000000,3.99,8.397874
2,30,Day of Defeat,2003-05-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Valve Anti-Cheat enabled,Action,FPS;World War II;Multiplayer,0,3416,398,187,34,5000000-10000000,3.99,8.956476
3,40,Deathmatch Classic,2001-06-01,1,Valve,Valve,windows;mac;linux,0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer,0,1273,267,258,184,5000000-10000000,3.99,8.266234
4,50,Half-Life: Opposing Force,1999-11-01,1,Gearbox Software,Valve,windows;mac;linux,0,Single-player;Multi-player;Valve Anti-Cheat en...,Action,FPS;Action;Sci-fi,0,5250,288,624,415,5000000-10000000,3.99,9.479957
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27070,1065230,Room of Pandora,2019-04-24,1,SHEN JIAWEI,SHEN JIAWEI,windows,0,Single-player;Steam Achievements,Adventure;Casual;Indie,Adventure;Indie;Casual,7,3,0,0,0,0-20000,2.09,10.000000
27071,1065570,Cyber Gun,2019-04-23,1,Semyon Maximov,BekkerDev Studio,windows,0,Single-player,Action;Adventure;Indie,Action;Indie;Adventure,0,8,1,0,0,0-20000,1.69,8.888889
27072,1065650,Super Star Blast,2019-04-24,1,EntwicklerX,EntwicklerX,windows,0,Single-player;Multi-player;Co-op;Shared/Split ...,Action;Casual;Indie,Action;Indie;Casual,24,0,1,0,0,0-20000,3.99,0.000000
27073,1066700,New Yankee 7: Deer Hunters,2019-04-17,1,Yustas Game Studio,Alawar Entertainment,windows;mac,0,Single-player;Steam Cloud,Adventure;Casual;Indie,Indie;Casual;Adventure,0,2,0,0,0,0-20000,5.19,10.000000


In [6]:
contents = df[['categories', 'genres', 'steamspy_tags']]
contents

Unnamed: 0,categories,genres,steamspy_tags
0,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer
1,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer
2,Multi-player;Valve Anti-Cheat enabled,Action,FPS;World War II;Multiplayer
3,Multi-player;Online Multi-Player;Local Multi-P...,Action,Action;FPS;Multiplayer
4,Single-player;Multi-player;Valve Anti-Cheat en...,Action,FPS;Action;Sci-fi
...,...,...,...
27070,Single-player;Steam Achievements,Adventure;Casual;Indie,Adventure;Indie;Casual
27071,Single-player,Action;Adventure;Indie,Action;Indie;Adventure
27072,Single-player;Multi-player;Co-op;Shared/Split ...,Action;Casual;Indie,Action;Indie;Casual
27073,Single-player;Steam Cloud,Adventure;Casual;Indie,Indie;Casual;Adventure


In [7]:
cols = contents.columns.to_list()
cols

['categories', 'genres', 'steamspy_tags']

In [8]:
game_data = pd.DataFrame(columns=['name', 'contents'])
game_data

Unnamed: 0,name,contents


In [9]:
for i in range(df.shape[0]):
    data = df.loc[i]
    name = data['name']
    contents = ''
    merged = []
    for col in cols:
        origin = data[col]
        parsed = origin.split(';')
        if (col == 'categories') and (len(parsed) > 5):
            parsed = parsed[:5]
        for k in range(len(parsed)):
            parse = parsed[k]
            parse = parse.replace(' ', '_')
            parsed[k] = parse
        merged = merged + parsed
        
    set_merged = set(merged)
    merged = list(set_merged)
    for j in range(len(merged)):
        if j == 0:
            contents = contents + merged[j]
        else :
            contents = contents + ' ' + merged[j]
    game_data.loc[i, 'name'] = name
    game_data.loc[i, 'contents'] = contents
    if i % 10000 == 0:
        print(i, 'finished')

0 finished
10000 finished
20000 finished


In [10]:
game_data

Unnamed: 0,name,contents
0,Counter-Strike,Multiplayer Local_Multi-Player Valve_Anti-Chea...
1,Team Fortress Classic,Multiplayer Local_Multi-Player Valve_Anti-Chea...
2,Day of Defeat,World_War_II Multiplayer Valve_Anti-Cheat_enab...
3,Deathmatch Classic,Multiplayer Local_Multi-Player Valve_Anti-Chea...
4,Half-Life: Opposing Force,Sci-fi Single-player Valve_Anti-Cheat_enabled ...
...,...,...
27070,Room of Pandora,Adventure Indie Single-player Casual Steam_Ach...
27071,Cyber Gun,Adventure Indie Single-player Action
27072,Super Star Blast,Single-player Indie Casual Action Steam_Achiev...
27073,New Yankee 7: Deer Hunters,Steam_Cloud Single-player Indie Adventure Casual


In [11]:
cnt_vec = CountVectorizer()

vec = cnt_vec.fit_transform(game_data['contents'])

sim = cosine_similarity(vec, vec).argsort()[:, ::-1]
print(sim)

[[    0     1     3 ... 26302 10158 13504]
 [    0     1     3 ... 26302 10158 13504]
 [    2    15    13 ... 19211  5875 19446]
 ...
 [ 6380 27072  5676 ...  7992  9977  1263]
 [27074 26795 27044 ... 19388 23891 25031]
 [27074 26795 27044 ... 19388 23891 25031]]


In [25]:
def recommend_game(df, appid, top=10):
    # get the data of game that user played
    target_game_idx = df[df['appid'] == appid].index.values
        
    # get the similarity based on the cosine similarity
    sim_idx = sim[target_game_idx, :top].reshape(-1)
    
    # remove index of itself
    sim_idx = sim_idx[sim_idx != target_game_idx]

    # make the name list of top 10 recommended games
    result = df.iloc[sim_idx].sort_values('rating', 
                                               ascending=False)[:10]    
    
    return result

In [26]:
# print a result of Codename CURE (appid = 355180)
print('User Played "Codename CURE"')
print('Recommended games are...')
recommend_game(df, 355180)

User Played "Codename CURE"
Recommended games are...


Unnamed: 0,appid,name,release_date,english,developer,publisher,platforms,required_age,categories,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price,rating
28,1250,Killing Floor,2009-05-14,1,Tripwire Interactive,Tripwire Interactive,windows;mac;linux,0,Single-player;Multi-player;Co-op;Cross-Platfor...,Action,FPS;Zombies;Co-op,285,53710,2649,1328,306,2000000-5000000,14.99,9.529977
3391,306020,Bloons TD 5,2014-11-19,1,Ninja Kiwi,Ninja Kiwi,windows;mac,0,Single-player;Co-op;Online Co-op;Cross-Platfor...,Action;Strategy,Tower Defense;Strategy;Co-op,125,3808,347,1127,1127,200000-500000,6.99,9.164862
16093,678800,SAS: Zombie Assault 4,2017-10-16,1,Ninja Kiwi,Ninja Kiwi,windows;mac,0,Single-player;Online Multi-Player;Online Co-op...,Action;Free to Play;RPG,Free to Play;Multiplayer;Zombies,59,2086,501,40,41,500000-1000000,0.0,8.063394
1727,236390,War Thunder,2013-08-15,1,Gaijin Entertainment,Gaijin Entertainment,windows;mac;linux,0,Single-player;MMO;Co-op;Cross-Platform Multipl...,Action;Free to Play;Massively Multiplayer;Simu...,Free to Play;World War II;Multiplayer,46,83852,22166,4200,297,10000000-20000000,0.0,7.909223
24027,915580,Dungeon Hunter Champions,2018-12-19,1,Gameloft,Gameloft,windows,0,Single-player;Online Multi-Player;Online Co-op...,Action;Free to Play;RPG,Free to Play;RPG;Action,106,124,118,375,375,50000-100000,0.0,5.123967
17204,709870,West of Red,2018-03-07,1,Racing Bros,ANPA.US,windows;mac;linux,0,Single-player;Co-op;Online Co-op;Cross-Platfor...,Violent;Gore;Action;Indie;Simulation,Action;Indie;Gore,5,22,31,18,18,0-20000,3.99,4.150943
17718,724430,Knights Hunt,2018-03-20,1,Racing Bros,ANPA.US,windows;mac;linux,0,Single-player;Co-op;Online Co-op;Cross-Platfor...,Violent;Gore;Action;Indie;Simulation,Action;Indie;Gore,2001,24,37,3,3,20000-50000,3.99,3.934426
24173,921590,DISSIDIA FINAL FANTASY NT Free Edition,2019-03-12,1,"Square Enix;KOEI TECMO GAMES CO., LTD.",Square Enix,windows,0,Multi-player;Online Multi-Player;Co-op;Online ...,Action;Free to Play,Free to Play;Action;Multiplayer,56,620,1005,285,289,200000-500000,0.0,3.815385
17067,705710,Sense of The Devil,2017-09-22,1,Racing Bros,ANPA.US,windows;mac;linux,0,Single-player;Co-op;Online Co-op;Cross-Platfor...,Violent;Gore;Action;Indie,Action;Gore;Indie,1011,29,59,0,0,20000-50000,0.79,3.295455


In [12]:
def recommend_game_nameLIst(df, appid, top=10):
    # get the data of game that user played
    target_game_idx = df[df['appid'] == appid].index.values
        
    # get the similarity based on the cosine similarity
    sim_idx = sim[target_game_idx, :top].reshape(-1)
    
    # remove index of itself
    sim_idx = sim_idx[sim_idx != target_game_idx]

    # make the name list of top 10 recommended games
    recommended = df.iloc[sim_idx].sort_values('rating', 
                                               ascending=False)[:10]    
    result = recommended['name'].to_list()
    
    return result

In [24]:
# print a result of Codename CURE (appid = 355180)
print('User Played "Codename CURE"')
print('Recommended games are...')
recommend_game_nameLIst(df, 355180)

User Played "Codename CURE"
Recommended games are...


['Killing Floor',
 'Bloons TD 5',
 'SAS: Zombie Assault 4',
 'War Thunder',
 'Dungeon Hunter Champions',
 'West of Red',
 'Knights Hunt',
 'DISSIDIA FINAL FANTASY NT Free Edition',
 'Sense of The Devil']

In [14]:
origin_data = pd.read_csv('data_join.csv')

In [15]:
origin_data.shape

(92883, 23)

In [16]:
recommendList = []
for i in range(origin_data.shape[0]):
    gameName = recommend_game_nameLIst(df, origin_data.loc[i, 'appid'])
    recommendList.append(';'.join(gameName))
    if i % 10000 == 0:
        print(i, ': finished')

0 : finished
10000 : finished
20000 : finished
30000 : finished
40000 : finished
50000 : finished
60000 : finished
70000 : finished
80000 : finished
90000 : finished


In [17]:
recommendResult = pd.Series(data=recommendList)

In [18]:
origin_data['recommended'] = recommendResult

In [19]:
origin_data.head()

Unnamed: 0.1,Unnamed: 0,user_id,name,purchase,1.0,0,appid,release_date,english,developer,...,genres,steamspy_tags,achievements,positive_ratings,negative_ratings,average_playtime,median_playtime,owners,price,recommended
0,1394,151603712.0,Fallout 4,purchase,1.0,0.0,377160.0,2015-11-09,1.0,Bethesda Game Studios,...,RPG,Open World;Post-apocalyptic;Exploration,84.0,110376.0,45377.0,4822.0,2904.0,5000000-10000000,19.99,The Witcher® 3: Wild Hunt;Fallout 2: A Post Nu...
1,1395,151603712.0,Fallout 4,play,87.0,0.0,377160.0,2015-11-09,1.0,Bethesda Game Studios,...,RPG,Open World;Post-apocalyptic;Exploration,84.0,110376.0,45377.0,4822.0,2904.0,5000000-10000000,19.99,The Witcher® 3: Wild Hunt;Fallout 2: A Post Nu...
2,1396,87445402.0,Fallout 4,purchase,1.0,0.0,377160.0,2015-11-09,1.0,Bethesda Game Studios,...,RPG,Open World;Post-apocalyptic;Exploration,84.0,110376.0,45377.0,4822.0,2904.0,5000000-10000000,19.99,The Witcher® 3: Wild Hunt;Fallout 2: A Post Nu...
3,1397,87445402.0,Fallout 4,play,83.0,0.0,377160.0,2015-11-09,1.0,Bethesda Game Studios,...,RPG,Open World;Post-apocalyptic;Exploration,84.0,110376.0,45377.0,4822.0,2904.0,5000000-10000000,19.99,The Witcher® 3: Wild Hunt;Fallout 2: A Post Nu...
4,1398,25096601.0,Fallout 4,purchase,1.0,0.0,377160.0,2015-11-09,1.0,Bethesda Game Studios,...,RPG,Open World;Post-apocalyptic;Exploration,84.0,110376.0,45377.0,4822.0,2904.0,5000000-10000000,19.99,The Witcher® 3: Wild Hunt;Fallout 2: A Post Nu...


In [20]:
origin_data.to_csv("content_recommendation_result.csv", index = False)