In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from pathlib import Path
from sklearn.preprocessing import StandardScaler

In [2]:
# Need to upload "boardgames.csv" to files every session.
file_path = ('boardgames.csv')
boardgame_df = pd.read_csv(file_path)
# Unicode Problem on row 24
boardgame_df.head(5)

Unnamed: 0,objectid,name,average,avgweight,boardgamecategory,boardgamemechanic
0,174430,Gloomhaven,8.85292,3.8078,"['Adventure', 'Exploration', 'Fantasy', 'Fight...","['Campaign / Battle Card Driven', 'Cooperative..."
1,161936,Pandemic Legacy Season 1,8.62499,2.8301,"['Environmental', 'Medical']","['Action Points', 'Cooperative Game', 'Hand Ma..."
2,167791,Terraforming Mars,8.42299,3.2313,"['Economic', 'Environmental', 'Industry / Manu...","['Card Drafting', 'End Game Bonuses', 'Hand Ma..."
3,182028,Through the Ages A New Story of Civilization,8.49419,4.385,"['Card Game', 'Civilization', 'Economic']","['Action Points', 'Auction/Bidding', 'Auction:..."
4,224517,Brass Birmingham,8.62031,3.9122,"['Economic', 'Industry / Manufacturing', 'Tran...","['Hand Management', 'Income', 'Loans', 'Market..."


In [3]:
# Our decoding
decode_lambda = lambda x: bytearray(x, 'utf-8').decode('unicode-escape')
# Applying the decoding to the column, ignore errors.
boardgame_df['name'] = boardgame_df['name'].apply(lambda x: decode_lambda(x))
# Unicode replaced with proper characters
boardgame_df.head(5)

  
  
  
  
  
  
  


Unnamed: 0,objectid,name,average,avgweight,boardgamecategory,boardgamemechanic
0,174430,Gloomhaven,8.85292,3.8078,"['Adventure', 'Exploration', 'Fantasy', 'Fight...","['Campaign / Battle Card Driven', 'Cooperative..."
1,161936,Pandemic Legacy Season 1,8.62499,2.8301,"['Environmental', 'Medical']","['Action Points', 'Cooperative Game', 'Hand Ma..."
2,167791,Terraforming Mars,8.42299,3.2313,"['Economic', 'Environmental', 'Industry / Manu...","['Card Drafting', 'End Game Bonuses', 'Hand Ma..."
3,182028,Through the Ages A New Story of Civilization,8.49419,4.385,"['Card Game', 'Civilization', 'Economic']","['Action Points', 'Auction/Bidding', 'Auction:..."
4,224517,Brass Birmingham,8.62031,3.9122,"['Economic', 'Industry / Manufacturing', 'Tran...","['Hand Management', 'Income', 'Loans', 'Market..."


In [4]:
boardgame_df = boardgame_df.rename(columns={'objectid': 'ID', 'name': 'Name', 'average': 'Avg Rating', 'avgweight': 'Complexity', 'boardgamecategory': 'Category', 'boardgamemechanic': 'Mechanic'})

# boardgame_df['Category'] = boardgame_df['Category'].apply(lambda x: x[1:len(x)-1].split(', '))

# boardgame_df['Mechanic'] = boardgame_df['Mechanic'].apply(lambda x: x[1:len(x)-1].split(', '))

In [5]:
boardgame_df

Unnamed: 0,ID,Name,Avg Rating,Complexity,Category,Mechanic
0,174430,Gloomhaven,8.85292,3.8078,"['Adventure', 'Exploration', 'Fantasy', 'Fight...","['Campaign / Battle Card Driven', 'Cooperative..."
1,161936,Pandemic Legacy Season 1,8.62499,2.8301,"['Environmental', 'Medical']","['Action Points', 'Cooperative Game', 'Hand Ma..."
2,167791,Terraforming Mars,8.42299,3.2313,"['Economic', 'Environmental', 'Industry / Manu...","['Card Drafting', 'End Game Bonuses', 'Hand Ma..."
3,182028,Through the Ages A New Story of Civilization,8.49419,4.3850,"['Card Game', 'Civilization', 'Economic']","['Action Points', 'Auction/Bidding', 'Auction:..."
4,224517,Brass Birmingham,8.62031,3.9122,"['Economic', 'Industry / Manufacturing', 'Tran...","['Hand Management', 'Income', 'Loans', 'Market..."
...,...,...,...,...,...,...
19995,5154,Franchise,6.00000,0.0000,"['Economic', 'Negotiation']","['Simulation', 'Trading']"
19996,5158,Punto y Raya,4.00000,0.0000,['Abstract Strategy'],['Enclosure']
19997,5159,3 Up,6.00000,0.0000,"['Abstract Strategy', 'Childrens Game']",['Pattern Building']
19998,5160,Dino-Checkers,5.77500,0.0000,"['Abstract Strategy', 'Childrens Game']",['Area Majority / Influence']


In [6]:
categories_df = boardgame_df[['ID', 'Category']].copy()
categories_df['Category'] = categories_df['Category'].apply(lambda x: x[1:len(x)-1].split(', '))

mechanics_df = boardgame_df[['ID', 'Mechanic']].copy()
mechanics_df['Mechanic'] = mechanics_df['Mechanic'].apply(lambda x: x[1:len(x)-1].split(', '))

In [7]:
boardgame_trunc = boardgame_df.drop(["Category","Mechanic"],axis=1)
boardgame_trunc.head()

Unnamed: 0,ID,Name,Avg Rating,Complexity
0,174430,Gloomhaven,8.85292,3.8078
1,161936,Pandemic Legacy Season 1,8.62499,2.8301
2,167791,Terraforming Mars,8.42299,3.2313
3,182028,Through the Ages A New Story of Civilization,8.49419,4.385
4,224517,Brass Birmingham,8.62031,3.9122


In [8]:
boardgames = boardgame_trunc.merge(categories_df, on='ID')


In [9]:
boardgames = boardgames.merge(mechanics_df, on='ID')


In [10]:
boardgames.head()

Unnamed: 0,ID,Name,Avg Rating,Complexity,Category,Mechanic
0,174430,Gloomhaven,8.85292,3.8078,"['Adventure', 'Exploration', 'Fantasy', 'Fight...","['Campaign / Battle Card Driven', 'Cooperative..."
1,161936,Pandemic Legacy Season 1,8.62499,2.8301,"['Environmental', 'Medical']","['Action Points', 'Cooperative Game', 'Hand Ma..."
2,167791,Terraforming Mars,8.42299,3.2313,"['Economic', 'Environmental', 'Industry / Manu...","['Card Drafting', 'End Game Bonuses', 'Hand Ma..."
3,182028,Through the Ages A New Story of Civilization,8.49419,4.385,"['Card Game', 'Civilization', 'Economic']","['Action Points', 'Auction/Bidding', 'Auction:..."
4,224517,Brass Birmingham,8.62031,3.9122,"['Economic', 'Industry / Manufacturing', 'Tran...","['Hand Management', 'Income', 'Loans', 'Market..."


In [11]:
from ast import literal_eval

features = ['Category', 'Mechanic']
for feature in features:
    boardgame_df[feature] = boardgame_df[feature].apply(literal_eval)

In [12]:
import numpy as np

def clean_data(x):
    if isinstance(x, list):
        return [str.lower(i.replace(" ", "")) for i in x]
    else:
        #Check if director exists. If not, return empty string
        if isinstance(x, str):
            return str.lower(x.replace(" ", ""))
        else:
            return ''

features = ['Category', 'Mechanic']
for feature in features:
    boardgame_df[feature] = boardgame_df[feature].apply(clean_data)


In [13]:
def create_soup(x):
    return ' '.join(x['Category']) + ' ' + ' '.join(x['Mechanic'])

In [14]:
boardgame_df['soup'] = boardgame_df.apply(create_soup, axis=1)


In [15]:
# Import CountVectorizer and create the count matrix
from sklearn.feature_extraction.text import CountVectorizer

count = CountVectorizer(stop_words='english')
count_matrix = count.fit_transform(boardgame_df['soup'])

In [16]:
count_matrix.shape


(20000, 589)

In [17]:
# Compute the Cosine Similarity matrix based on the count_matrix
from sklearn.metrics.pairwise import cosine_similarity

cosine_sim = cosine_similarity(count_matrix, count_matrix)

In [18]:
indices = pd.Series(boardgame_df.index, index=boardgame_df['Name'])


In [19]:
def get_recommendations(name, cosine_sim=cosine_sim):
    # Get the index of the game that matches the title
    idx = indices[name]

    # Get the pairwise similarity scores of all games with chosen game
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the games based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the scores of the 10 most similar games
    sim_scores = sim_scores[1:11]

    # Get the game indices
    movie_indices = [i[0] for i in sim_scores]

    # Return the top 10 most similar games
    return boardgame_df['Name'].iloc[movie_indices]

In [20]:
get_recommendations('Gloomhaven', cosine_sim)


11211                                           Frosthaven
1336                  Middara  Unintentional Malum – Act 1
4453                                             Star Saga
12281                             Tower of the Wizard King
95          Descent  Journeys in the Dark (Second Edition)
651      Dungeons & Dragons  Wrath of Ashardalon Board ...
758                                       Massive Darkness
2119                                      Dungeon Alliance
2592              Shadows of Brimstone  Forbidden Fortress
3693                                     Champions of Hara
Name: Name, dtype: object