In [3]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [4]:
games = pd.read_csv('games.csv') # Load the data

In [5]:
games.shape # Check the shape of the data

(85103, 39)

In [6]:
games = games[['AppID', 'Name', 'Release date', 'Required age', 'DLC count', 'About the game','Reviews', 'Header image', 'Website', 'Windows', 'Mac', 'Linux', 'Developers', 'Publishers', 'Categories', 'Genres', 'Tags', 'Screenshots']] # Select the columns we need

In [7]:
games.isnull().sum() # Check for missing values

AppID                 0
Name                  6
Release date          0
Required age          0
DLC count             0
About the game     3567
Reviews           75360
Header image          0
Website           45651
Windows               0
Mac                   0
Linux                 0
Developers         3587
Publishers         3867
Categories         4598
Genres             3555
Tags              21100
Screenshots        2006
dtype: int64

In [8]:
games.dropna(inplace=True) # Drop missing values

In [9]:
games.duplicated().sum() # Check for duplicate rows

0

In [10]:
games.head() # Check the first few rows

Unnamed: 0,AppID,Name,Release date,Required age,DLC count,About the game,Reviews,Header image,Website,Windows,Mac,Linux,Developers,Publishers,Categories,Genres,Tags,Screenshots
10,1026420,WARSAW,"Oct 2, 2019",0,0,Use everything at your disposal to help a team...,“New WW2 Strategy Game Offers A Harrowing Look...,https://cdn.akamai.steamstatic.com/steam/apps/...,http://warsawthegame.com,True,False,False,Pixelated Milk,"Pixelated Milk,gaming company","Single-player,Steam Achievements,Steam Trading...","Indie,RPG","Tactical RPG,Turn-Based Strategy,Wargame,Histo...",https://cdn.akamai.steamstatic.com/steam/apps/...
11,485000,Cthulhu Realms,"Jul 1, 2016",0,1,Star Realms has gone insane! Introducing Cthul...,“The art in Cthulhu Realms is hilarious and be...,https://cdn.akamai.steamstatic.com/steam/apps/...,https://www.cthulhurealms.com/,True,True,False,"Wise Wizard Games, LLC","Wise Wizard Games, LLC","Single-player,Multi-player,Shared/Split Screen...",Strategy,"Card Game,Strategy,Deckbuilding,Lovecraftian,B...",https://cdn.akamai.steamstatic.com/steam/apps/...
17,346560,Hero of the Kingdom II,"Feb 20, 2015",0,0,Sail to the farthest islands to save your sist...,“Hero of the Kingdom II is a title that casual...,https://cdn.akamai.steamstatic.com/steam/apps/...,https://www.lonelytroops.com/hotk2/index.htm,True,True,True,Lonely Troops,Lonely Troops,"Single-player,Steam Achievements,Steam Trading...","Adventure,Casual,Indie,RPG","Adventure,Casual,Point & Click,RPG,Indie,Isome...",https://cdn.akamai.steamstatic.com/steam/apps/...
44,897820,Reigns: Game of Thrones,"Oct 18, 2018",0,0,Reigns: Game of Thrones is the heir to the awa...,“Reigns: Game of Thrones is so much better tha...,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.reignsgame.com,True,True,True,Nerial,Devolver Digital,"Single-player,Steam Achievements,Full controll...","Adventure,Indie,RPG","RPG,Indie,Card Game,Adventure,Choices Matter,2...",https://cdn.akamai.steamstatic.com/steam/apps/...
71,541570,Sally Face - Episode One,"Dec 14, 2016",0,1,A DARK MYSTERY IS UNFOLDING... Delve into an u...,“Wholly unique gaming experience that has to b...,https://cdn.akamai.steamstatic.com/steam/apps/...,http://portablemoose.com/,True,True,True,Portable Moose,Portable Moose,"Single-player,Steam Achievements,Full controll...","Adventure,Indie","Story Rich,Psychological Horror,Dark,Horror,In...",https://cdn.akamai.steamstatic.com/steam/apps/...


In [11]:
games.shape # Check the shape of the data

(7419, 18)

In [12]:
games['Tags'] = games['Publishers'] + ' ' + games['Developers'] + ' ' + games['Tags'] + ' ' + games['Categories'] # Combine the text columns
games['Tags'] = games['Tags'].astype(str)

In [13]:
new_df = games[['Name', 'Release date', 'DLC count', 'Header image', 'Website', 'Screenshots', 'Genres', 'Tags']] # Select the columns we need 

In [14]:
new_df.head() # Check the first few rows

Unnamed: 0,Name,Release date,DLC count,Header image,Website,Screenshots,Genres,Tags
10,WARSAW,"Oct 2, 2019",0,https://cdn.akamai.steamstatic.com/steam/apps/...,http://warsawthegame.com,https://cdn.akamai.steamstatic.com/steam/apps/...,"Indie,RPG","Pixelated Milk,gaming company Pixelated Milk T..."
11,Cthulhu Realms,"Jul 1, 2016",1,https://cdn.akamai.steamstatic.com/steam/apps/...,https://www.cthulhurealms.com/,https://cdn.akamai.steamstatic.com/steam/apps/...,Strategy,"Wise Wizard Games, LLC Wise Wizard Games, LLC ..."
17,Hero of the Kingdom II,"Feb 20, 2015",0,https://cdn.akamai.steamstatic.com/steam/apps/...,https://www.lonelytroops.com/hotk2/index.htm,https://cdn.akamai.steamstatic.com/steam/apps/...,"Adventure,Casual,Indie,RPG","Lonely Troops Lonely Troops Adventure,Casual,P..."
44,Reigns: Game of Thrones,"Oct 18, 2018",0,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.reignsgame.com,https://cdn.akamai.steamstatic.com/steam/apps/...,"Adventure,Indie,RPG","Devolver Digital Nerial RPG,Indie,Card Game,Ad..."
71,Sally Face - Episode One,"Dec 14, 2016",1,https://cdn.akamai.steamstatic.com/steam/apps/...,http://portablemoose.com/,https://cdn.akamai.steamstatic.com/steam/apps/...,"Adventure,Indie","Portable Moose Portable Moose Story Rich,Psych..."


In [15]:
new_df.shape # Check the shape of the data

(7419, 8)

In [16]:
import nltk # Natural Language Toolkit

In [17]:
from nltk.stem.porter import PorterStemmer 
ps = PorterStemmer() # Create a stemmer object

In [18]:
def stem(text): 
    y = [] 
    for i in text.split(): 
        y.append(ps.stem(i)) 

    return ' '.join(y) 

In [19]:
from sklearn.feature_extraction.text import CountVectorizer # Import CountVectorizer
cv = CountVectorizer(max_features=8000, stop_words='english') # Create a CountVectorizer object

In [20]:
vectors = cv.fit_transform(new_df['Tags']).toarray() # Fit the data to the CountVectorizer object

In [21]:
new_df['Tags'] = new_df['Tags'].apply(stem) # Apply the stem function to the tags column    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['Tags'] = new_df['Tags'].apply(stem) # Apply the stem function to the tags column


In [22]:
cv.get_feature_names_out() # Get the feature names

array(['07th', '10', '100', ..., '純白花園weissgarden', '阿龟与阿飞', '高考恋爱委员会'],
      dtype=object)

In [23]:
from sklearn.metrics.pairwise import cosine_similarity # Import cosine_similarity   

In [24]:
similarity = cosine_similarity(vectors) # Compute the cosine similarity matrix

In [25]:
def recommend(name):
    # Check if the game exists in the DataFrame
    if name not in new_df['Name'].values:
        return f"Game '{name}' not found in the dataset."
    
    game_index = new_df[new_df['Name'] == name].index[0]  # Get the index of the game
    distances = similarity[game_index]  # Get the similarity values
    games_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x: x[1])[1:]  # Get the top 10 similar games
    
    recommended_games = [new_df.iloc[i[0]]['Name'] for i in games_list[:10]]
    return recommended_games

In [26]:
new_df.head(20)

Unnamed: 0,Name,Release date,DLC count,Header image,Website,Screenshots,Genres,Tags
10,WARSAW,"Oct 2, 2019",0,https://cdn.akamai.steamstatic.com/steam/apps/...,http://warsawthegame.com,https://cdn.akamai.steamstatic.com/steam/apps/...,"Indie,RPG","pixel milk,gam compani pixel milk tactic rpg,t..."
11,Cthulhu Realms,"Jul 1, 2016",1,https://cdn.akamai.steamstatic.com/steam/apps/...,https://www.cthulhurealms.com/,https://cdn.akamai.steamstatic.com/steam/apps/...,Strategy,"wise wizard games, llc wise wizard games, llc ..."
17,Hero of the Kingdom II,"Feb 20, 2015",0,https://cdn.akamai.steamstatic.com/steam/apps/...,https://www.lonelytroops.com/hotk2/index.htm,https://cdn.akamai.steamstatic.com/steam/apps/...,"Adventure,Casual,Indie,RPG","lone troop lone troop adventure,casual,point &..."
44,Reigns: Game of Thrones,"Oct 18, 2018",0,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.reignsgame.com,https://cdn.akamai.steamstatic.com/steam/apps/...,"Adventure,Indie,RPG","devolv digit nerial rpg,indie,card game,advent..."
71,Sally Face - Episode One,"Dec 14, 2016",1,https://cdn.akamai.steamstatic.com/steam/apps/...,http://portablemoose.com/,https://cdn.akamai.steamstatic.com/steam/apps/...,"Adventure,Indie","portabl moos portabl moos stori rich,psycholog..."
80,Good Knight,"Sep 17, 2021",0,https://cdn.akamai.steamstatic.com/steam/apps/...,https://www.doublethinkgames.com,https://cdn.akamai.steamstatic.com/steam/apps/...,"Action,Casual,Indie,Early Access",doublethink game team good knight earli access...
89,Aurion: Legacy of the Kori-Odan,"Apr 14, 2016",1,https://cdn.akamai.steamstatic.com/steam/apps/...,https://www.aurionthegame.com/,https://cdn.akamai.steamstatic.com/steam/apps/...,"Action,Adventure,Indie,RPG","dear villag kiro'o game action rpg,retro,adven..."
107,Fat City,"Oct 22, 2018",0,https://cdn.akamai.steamstatic.com/steam/apps/...,http://www.fatcitygame.com/index.html,https://cdn.akamai.steamstatic.com/steam/apps/...,"Casual,Indie,Strategy","heavi iron studios, inc. heavi iron studio str..."
119,Ragnarock,"Jul 15, 2021",17,https://cdn.akamai.steamstatic.com/steam/apps/...,https://www.ragnarock-vr.com/,https://cdn.akamai.steamstatic.com/steam/apps/...,"Casual,Indie,Racing,Sports","wanadevstudio wanadevstudio rhythm,vr,multipla..."
124,Go For Launch: Mercury,"Jul 4, 2021",0,https://cdn.akamai.steamstatic.com/steam/apps/...,http://goforlaunch.co.uk,https://cdn.akamai.steamstatic.com/steam/apps/...,"Action,Indie,Simulation","joe chisholm joe chisholm simulation,action,in..."


In [27]:
recommend('Valhalla Hills') # Recommend games similar to Grand Theft Auto V

['Tennis Manager 2021',
 'Brewpub Simulator',
 'Recipe for Disaster',
 'SYMMETRY',
 'Industry Giant',
 'Bronze Age - HD Edition',
 'Computer Tycoon',
 'Sheltered 2',
 'SPE:X',
 'Moviehouse – The Film Studio Tycoon']

In [28]:
import pickle # Import pickle

In [29]:
pickle.dump(new_df.to_dict(), open('new_df_dict.pkl', 'wb')) # Save the new_df dataframe to a pickle file

In [30]:
pickle.dump(similarity, open('similarity.pkl', 'wb')) # Save the similarity matrix to a pickle file