# Hybrid Model using game tags

In [1]:
import sklearn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import NearestCentroid
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Perceptron
import pandas as pd

First only content based filtering

In [2]:
#get the data 
csvPath = "../Datasets/FinalRatings.csv"

In [3]:
df = pd.read_csv (csvPath)
df = df.drop_duplicates(['user_id', 'name'], keep="last")
df = df.reset_index(drop=True)

### Select a random user

In [4]:
#select a random user that has played more that minHours #JUST LIKE LAB1
minHours = 500
df['hours'] = df['hours']
df['hours'] = df['hours'].astype(float)
users_ratings = df.groupby('user_id').hours.agg(['sum']).reindex(df.user_id).reset_index()
# users_ratings = df.groupby('user_id').total.agg(['sum']).reindex(df.idx).reset_index()
display(users_ratings)
selected = users_ratings['sum'] > minHours

selected_users = df.loc[selected]

# random_selected = selected_users.sample() 
# select_column_df = random_selected.reset_index()['user_id'] 
# selected_user = select_column_df.iloc[0] 
# print("Selected user: " + str(selected_user))

Unnamed: 0,user_id,sum
0,151603712,134.5
1,87445402,87.7
2,25096601,208.0
3,211925330,848.0
4,115396529,365.7
...,...,...
32626,154230723,923.1
32627,116564064,489.0
32628,135400225,1205.5
32629,135400225,1205.5


### Rated games by the user

In [5]:
def get_rated_games_user(selected_user):
    selected_user_ratings = df.loc[df['user_id'] == selected_user]
    rated_games_df = selected_user_ratings
    rated_games_df = rated_games_df[['name', 'game_description', 'Rating M1', 'Rating M2']]
    return rated_games_df


### Unrated games by the user

In [6]:
def get_unrated_games_user(rated_games_df):
    diff = set(df.index) - set(rated_games_df.index)
    unrated_games_df = df.loc[diff]
    unrated_games_df = unrated_games_df[['name', 'game_description']]
    unrated_games_df = unrated_games_df.drop_duplicates(subset ="name") #Had to remove duplicates 
    return unrated_games_df

## Content Based Filtering
#### Remove users who dont have at least 30 games rated

In [7]:
import numpy as np
users = np.array(selected_users['user_id'])
unique, counts = np.unique(users, return_counts=True)
main_users =list()
for i, val in enumerate(unique):
    if counts[i] >= 10:
        main_users.append(val)

#### Train KNN for each user using Rating M1 and Rating M2 seperately for all users

In [8]:
import sklearn
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import NearestCentroid
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Perceptron
allUsersList = []
pd.options.mode.chained_assignment = None  # default='warn'
# initialise the vectorizer
vectorizer = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
# vectorizer = CountVectorizer()
full_predictions = pd.DataFrame()
# for each gamer in the selected user list (min gameplay time of 300)
for gamer in main_users:
    rated_games_df = get_rated_games_user(gamer)
#     display(rated_games_df)
    unrated_games_df = get_unrated_games_user(rated_games_df)
        
    try:
        X = vectorizer.fit_transform(rated_games_df['game_description'])
        y = rated_games_df['Rating M1']
        indices = np.arange(X.shape[0])
        (
            X_train,
            X_test,
            y_train,
            y_test,
            indices_train,
            indices_test,
        ) = train_test_split(X, y, indices, test_size=0.3, random_state=101)

        linReg = LinearRegression().fit(X_train, y_train)
        neigh = KNeighborsRegressor(n_neighbors=10).fit(X_train, y_train)
        nearCentroid = NearestCentroid().fit(X_train, y_train)
        lr = LogisticRegression().fit(X_train, y_train)
        per = Perceptron().fit(X_train, y_train)
    
        X_unrated = vectorizer.transform(unrated_games_df['game_description'].values.astype('U'))

        # predicting already existing rated values 
        y_pred1_reg = linReg.predict(X_test)
        y_pred1_neigh = neigh.predict(X_test)
        y_pred1_nearCentroid = nearCentroid.predict(X_test)
        y_pred1_lr = lr.predict(X_test)
        y_pred1_per = per.predict(X_test)


        # predicting unrated games
        y_pred1_unrated_reg = linReg.predict(X_unrated)
        y_pred1_unrated_neigh = neigh.predict(X_unrated)
        y_pred1_unrated_nearCentroid = nearCentroid.predict(X_unrated)
        y_pred1_unrated_lr = lr.predict(X_unrated)
        y_pred1_unrated_per = per.predict(X_unrated)


        unrated_games_df['predicted_ratings_KNN M1'] = y_pred1_unrated_neigh
        unrated_games_df['predicted_ratings_Reg M1'] = y_pred1_unrated_reg
        unrated_games_df['predicted_ratings_nearCent M1'] = y_pred1_unrated_nearCentroid
        unrated_games_df['predicted_ratings_lr M1'] = y_pred1_unrated_lr
        unrated_games_df['predicted_ratings_per M1'] = y_pred1_unrated_per


        tt = rated_games_df.iloc[indices_test]

        tt['y_test_KNN M1'] = y_pred1_neigh
        tt['y_test_Reg M1'] = y_pred1_reg
        tt['y_test_nearCent M1'] = y_pred1_nearCentroid
        tt['y_test_lr M1'] = y_pred1_lr
        tt['y_test_per M1'] = y_pred1_per


        #                                                   ''' Second Algorithms '''

        y2 = rated_games_df['Rating M2']
        indices2 = np.arange(X.shape[0])
        (
            X_train2,
            X_test2,
            y_train2,
            y_test2,
            indices_train2,
            indices_test2,
        ) = train_test_split(X, y2, indices2, test_size=0.3, random_state=101)

        linReg2 = LinearRegression().fit(X_train, y_train2)
        neigh2 = KNeighborsRegressor(n_neighbors=10).fit(X_train, y_train2)
        nearCentroid2 = NearestCentroid().fit(X_train, y_train2)
        lr2 = LogisticRegression().fit(X_train, y_train2)
        per2 = Perceptron().fit(X_train, y_train2)

        y_unrated2_Reg = linReg2.predict(X_unrated)
        y_pred2_Reg = linReg2.predict(X_test)
    
        y_unrated2_neigh = neigh2.predict(X_unrated)
        y_pred2_neigh = neigh2.predict(X_test)

        y_unrated2_nearCentroid = nearCentroid2.predict(X_unrated)
        y_pred2_nearCentroid = nearCentroid2.predict(X_test)

        y_unrated2_lr = lr2.predict(X_unrated)
        y_pred2_lr = lr2.predict(X_test)

        y_unrated2_per = per2.predict(X_unrated)
        y_pred2_per = per2.predict(X_test)

        unrated_games_df['predicted_ratings_KNN M2'] = y_unrated2_neigh
        unrated_games_df['predicted_ratings_Reg M2'] = y_unrated2_Reg
        unrated_games_df['predicted_ratings_nearCent M2'] = y_unrated2_nearCentroid
        unrated_games_df['predicted_ratings_lr M2'] = y_unrated2_lr
        unrated_games_df['predicted_ratings_per M2'] = y_unrated2_per

        
        unrated_games_df_KNN_ordering = unrated_games_df.sort_values(by='predicted_ratings_KNN M2', ascending=False)
        unrated_games_df_Reg_ordering = unrated_games_df.sort_values(by='predicted_ratings_Reg M2', ascending=False)
        unrated_games_df_nearCent_ordering = unrated_games_df.sort_values(by='predicted_ratings_nearCent M2', ascending=False)
        unrated_games_df_lr_ordering = unrated_games_df.sort_values(by='predicted_ratings_lr M2', ascending=False)
        unrated_games_df_per_ordering = unrated_games_df.sort_values(by='predicted_ratings_per M2', ascending=False)
        

    #     display(unrated_games_df_KNN_ordering.head(10))

        tt['y_test_KNN M2'] = y_pred2_neigh
        tt['y_test_Reg M2'] = y_pred2_Reg
        tt['y_test_nearCent M2'] = y_pred2_nearCentroid
        tt['y_test_lr M2'] = y_pred2_lr
        tt['y_test_per M2'] = y_pred2_per

        full_predictions = full_predictions.append(tt)
        
        
    except:
        continue

In [88]:
userss = []
for ind, fp in full_predictions.iterrows():
    userss.append(df.iloc[ind]['user_id'])

In [62]:
full_predictions.insert(loc=0, column='user_id', value=userss)
display(full_predictions)

Unnamed: 0,user_id,name,game_description,Rating M1,Rating M2,y_test_KNN M1,y_test_Reg M1,y_test_nearCent M1,y_test_lr M1,y_test_per M1,y_test_KNN M2,y_test_Reg M2,y_test_nearCent M2,y_test_lr M2,y_test_per M2
20684,298950,The Walking Dead,About This Series When the world is ravaged b...,4,5,1.9,1.852272,1,1,2,2.4,2.552343,1,1,5
5084,298950,Dota 2,About This Game The most-played game on Stea...,1,1,2.3,1.921009,1,1,2,2.6,2.467740,1,1,3
20760,298950,The Testament of Sherlock Holmes,"About This Game YOU ARE SHERLOCK HOLMES, AND...",1,1,1.7,1.754736,1,1,1,2.6,2.568771,1,1,5
21039,298950,Surgeon Simulator,About This Game Malpractice Makes Perfect. D...,1,2,1.7,1.752004,1,1,1,2.0,2.425019,1,1,1
20965,298950,"Papers, Please",About This Game Congratulations. The October...,1,1,1.2,1.768093,1,1,1,1.9,2.456534,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28301,216785107,Stranded Deep,About This Game Take the role of a plane cras...,3,5,2.9,3.090481,5,5,1,3.4,3.540153,5,5,5
6175,216785107,Dota 2,About This Game The most-played game on Stea...,1,1,2.9,3.141188,5,5,5,3.4,3.546267,5,5,5
15438,216785107,Survarium,About This Game Join SURVARIUM Beta Now and ...,3,5,2.9,3.018844,5,5,1,3.4,3.462722,5,5,1
26043,216785107,Besiege,About This Game Besiege is a physics based bu...,5,5,2.9,3.114093,5,5,5,3.4,3.553784,5,5,5


In [115]:
from sklearn.metrics import mean_squared_error
import math
rmse_M1_KNN = math.sqrt(mean_squared_error(tt['Rating M1'], tt['y_test_KNN M1']))
rmse_M2_KNN = math.sqrt(mean_squared_error(tt['Rating M2'], tt['y_test_KNN M2']))

rmse_M1_Reg = math.sqrt(mean_squared_error(tt['Rating M1'], tt['y_test_Reg M1']))
rmse_M2_Reg = math.sqrt(mean_squared_error(tt['Rating M2'], tt['y_test_Reg M2']))

rmse_M1_nearCent = math.sqrt(mean_squared_error(tt['Rating M1'], tt['y_test_nearCent M1']))
rmse_M2_nearCent = math.sqrt(mean_squared_error(tt['Rating M2'], tt['y_test_nearCent M2']))

rmse_M1_lr = math.sqrt(mean_squared_error(tt['Rating M1'], tt['y_test_lr M1']))
rmse_M2_lr = math.sqrt(mean_squared_error(tt['Rating M2'], tt['y_test_lr M2']))

print('RMSE for KNN M1: ' + str(rmse_M1_KNN))
print('RMSE for KNN M2: ' + str(rmse_M2_KNN))

print('RMSE for Reg M1: ' + str(rmse_M1_Reg))
print('RMSE for Reg M2: ' + str(rmse_M2_Reg))

print('RMSE for nearCent M1: ' + str(rmse_M1_nearCent))
print('RMSE for nearCent M2: ' + str(rmse_M2_nearCent))

print('RMSE for lr M1: ' + str(rmse_M1_lr))
print('RMSE for lr M2: ' + str(rmse_M2_lr))



RMSE for KNN M1: 1.3304134695650072
RMSE for KNN M2: 1.7888543819998317
RMSE for Reg M1: 1.3546560950956983
RMSE for Reg M2: 1.7552278889182553
RMSE for nearCent M1: 2.569046515733026
RMSE for nearCent M2: 1.7888543819998317
RMSE for lr M1: 2.569046515733026
RMSE for lr M2: 1.7888543819998317


## General collaborative filtering

In [43]:
from lenskit.algorithms import Recommender
from lenskit.algorithms.user_knn import UserUser
ratings_df1 = df[['user_id','name','Rating M1']]
ratings_df1 = ratings_df1.rename(columns={'user_id':'user', 'name': 'item', 'Rating M1': 'rating'})
user_user = UserUser(15, min_nbrs=3)  # Minimum (3) and maximum (15) number of neighbors to consider
user_user.fit(ratings_df1)

#Generating cf predictions using lenskit for all users
colabPredDict = dict()
users = []
games = []
predRatings = []
for gamer in main_users:
    rated_games_df = get_rated_games_user(gamer)
    selected_games_useruser1 = user_user.predict_for_user(gamer, list(rated_games_df['name'])) 
    for item in list(rated_games_df['name']):
        users.append(gamer)
        games.append(item)        
        index = list(rated_games_df['name']).index(item)
        gameRating = selected_games_useruser1[index] 
        predRatings.append(gameRating)

In [80]:
#create visualization
userSeries = pd.Series(users,name="user_id")
gamesSeries = pd.Series(games,name="name")
predRatingsSeries = pd.Series(predRatings,name="collabRating")


df_item= pd.DataFrame(userSeries)
df_item = df_item.rename(columns = {0:'user_id'})

df_brand = pd.DataFrame(gamesSeries)
df_brand = df_brand.rename(columns = {0:'name'})

df_price = pd.DataFrame(predRatingsSeries)
df_price = df_price.rename(columns = {0:'collabRating'})

df_all = pd.concat([df_item, df_brand, df_price ], axis=1)
df_all = df_all.fillna(0)
display(df_all)

Unnamed: 0,user_id,name,collabRating
0,298950,Fallout 4,4.328121
1,298950,Left 4 Dead 2,3.382511
2,298950,Left 4 Dead,2.315984
3,298950,Team Fortress 2,4.120956
4,298950,Tomb Raider,3.085204
...,...,...,...
10310,262861632,Warface,0.000000
10311,262861632,Cubic Castles,0.512883
10312,262861632,Creativerse,1.621550
10313,262861632,Trove,0.642872


In [110]:
#generating the hybrid ratings
hybridRatings = []
actualRatings = []
for ind, dff in full_predictions.iterrows():
    user = dff['user_id']
    game = dff['name']
    contentRating = dff['y_test_KNN M1']
    actualRatings.append(dff['Rating M1'])
    
    #to get the rating generated by content based
    x = df_all[df_all['user_id']==user]
    xx = x[x['name'] == game]
    collabRating = xx['collabRating']
    
    #calculate Hybrid
    hybridRating = 0.5*contentRating + 0.5*collabRating
    hybridRatings.append(float(hybridRating))

In [111]:
#creating the dataframe to visualize
hybRating = pd.Series(hybridRatings,name="hybrid")
df_hyb= pd.DataFrame(hybRating)
df_hyb = df_hyb.rename(columns = {0:'hybridRating'})

actRating = pd.Series(actualRatings,name="actual")
df_act= pd.DataFrame(actRating)
df_act = df_act.rename(columns = {0:'actualRating'})


df_all = pd.concat([df_item, df_brand, df_price, df_hyb, df_act], axis=1)
df_all = df_all.fillna(0)
display(df_all)

Unnamed: 0,user_id,name,collabRating,hybrid,actual
0,298950,Fallout 4,4.328121,2.273793,4.0
1,298950,Left 4 Dead 2,3.382511,0.690000,1.0
2,298950,Left 4 Dead,2.315984,1.769841,1.0
3,298950,Team Fortress 2,4.120956,1.204601,1.0
4,298950,Tomb Raider,3.085204,1.019334,1.0
...,...,...,...,...,...
10310,262861632,Warface,0.000000,0.000000,0.0
10311,262861632,Cubic Castles,0.512883,0.000000,0.0
10312,262861632,Creativerse,1.621550,0.000000,0.0
10313,262861632,Trove,0.642872,0.000000,0.0


In [113]:
import math
rmse_Hybrid = mean_squared_error(df_all['actual'], df_all['hybrid'])
math.sqrt(rmse_Hybrid)

0.6990416374633993

## Collaborative Filtering 
#### UserUser with Lenskit

In [11]:
from lenskit.algorithms import Recommender
from lenskit.algorithms.user_knn import UserUser
ratings_df1 = df[['user_id','name','Rating M1']]
ratings_df1 = ratings_df1.rename(columns={'user_id':'user', 'name': 'item', 'Rating M1': 'rating'})
user_user = UserUser(15, min_nbrs=3)  # Minimum (3) and maximum (15) number of neighbors to consider
user_user.fit(ratings_df1)

OMP: Info #271: omp_set_nested routine deprecated, please use omp_set_max_active_levels instead.
Numba is using threading layer omp - consider TBB
BLAS using multiple threads - can cause oversubscription
found 2 potential runtime problems - see https://boi.st/lkpy-perf


<lenskit.algorithms.user_knn.UserUser at 0x12bdad8e0>

# Group Recommendations

Select random users to form a group

In [138]:
import random
group1 = random.choices(main_users, k = 5)
print(group1)

[20772968, 118852041, 108264287, 101695880, 152959594]


Get the games that they have been rated

In [139]:
#rated Games
groupdf = pd.DataFrame()
for val in group1:
    groupdf = groupdf.append(df.loc[df["user_id"] == val])
# display(groupdf)

Get the games that none of the group members rated

In [140]:
games = set(list(df[["name"]]))
unrated_groupdf = get_unrated_games_user(groupdf)
# (unrated_groupdf)

For each user in the group, train the model and then predict for the unrated games

In [141]:
vectorizer = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
predDict = dict()

for gamer in group1:
    #Training
    groupdf = get_rated_games_user(gamer)
    X = vectorizer.fit_transform(groupdf['game_description'])
    y = groupdf['Rating M1']

    indices = np.arange(X.shape[0])
    (
        X_train,
        X_test,
        y_train,
        y_test,
        indices_train,
        indices_test,
    ) = train_test_split(X, y, indices, test_size=0.3, random_state=101)

    linReg = LinearRegression().fit(X_train, y_train)
#     neigh = KNeighborsRegressor(n_neighbors=10).fit(X_train, y_train)
    nearCentroid = NearestCentroid().fit(X_train, y_train)
    lr = LogisticRegression().fit(X_train, y_train)
    per = Perceptron().fit(X_train, y_train)

    #Predicting
    X_unrated = vectorizer.transform(unrated_groupdf['game_description'].values.astype('U'))
    y_pred1_unrated_reg = linReg.predict(X_unrated)
#     y_pred1_unrated_neigh = neigh.predict(X_unrated)
    y_pred1_unrated_nearCentroid = nearCentroid.predict(X_unrated)
    y_pred1_unrated_lr = lr.predict(X_unrated)
    y_pred1_unrated_per = per.predict(X_unrated)

    #add them to dictionary
    if group1.index(gamer)==0:
        predDict['reg'] = [y_pred1_unrated_reg]
#         predDict['neigh'] = [y_pred1_unrated_neigh]
        predDict['nearCentroid'] = [y_pred1_unrated_nearCentroid]
        predDict['lr'] = [y_pred1_unrated_lr]
        predDict['per'] = [y_pred1_unrated_per]
    else:
        predDict['reg'].append(y_pred1_unrated_reg)
#         predDict['neigh'].append(y_pred1_unrated_neigh)
        predDict['nearCentroid'].append(y_pred1_unrated_nearCentroid)
        predDict['lr'].append(y_pred1_unrated_lr)
        predDict['per'].append(y_pred1_unrated_per)

Create a dataframe for the content based ratings 

In [142]:
gameDict = dict()
for user in group1:
    userInd = group1.index(user)
    for game in unrated_groupdf['name']:
        index = list(unrated_groupdf['name']).index(game)
        gameRating = predDict['reg'][userInd][index]
        if game in gameDict.keys():
            gameDict[game].append(gameRating)
        else:
            gameDict[game] = [gameRating]
    

In [143]:
#Only the linear regression
data = (gameDict)
dfContent = pd.DataFrame(data)
dfContent.insert(loc=0, column='user_id', value=group1)


Collaborative Filtering for each user and predict for the unrated games

In [144]:
colabPredDict = dict()
for gamer in group1:
    selected_games_useruser1 = user_user.predict_for_user(gamer, list(unrated_groupdf['name']))
    print
    for item in list(unrated_groupdf['name']):
        index = list(unrated_groupdf['name']).index(item)
        gameRating = selected_games_useruser1[index] 
        if item in colabPredDict.keys():
            colabPredDict[item].append(gameRating)
        else:
            colabPredDict[item] = [gameRating]

Create a dataframe for the collaborative filtering ratings 

In [145]:
#Only the linear regression
data = (colabPredDict)
dfCollab = pd.DataFrame(data)
dfCollab.insert(loc=0, column='user_id', value=group1)
dfCollab = dfCollab.fillna(0)

In [146]:
unratedGames = list(unrated_groupdf['name'])

In [147]:
hybridDict = dict()
for game in unratedGames:
    for userRating in dfCollab[game]:
        ind = list(dfCollab[game]).index(userRating)
        collabScore = userRating
        contentScore = dfContent[game][ind]
        gameRating = (0.5*collabScore) + (0.5*contentScore)
        if game in hybridDict.keys():
            hybridDict[game].append(gameRating)
        else:
            hybridDict[game] = [gameRating]

In [148]:
data = (hybridDict)
dfHybrid = pd.DataFrame(data)
dfHybrid.insert(loc=0, column='user_id', value=group1)
dfHybrid = dfHybrid.fillna(0)

Print them all under each other so we can see whats happening 

In [149]:
display(dfCollab)
display(dfContent)
display(dfHybrid)

Unnamed: 0,user_id,Fallout 4,Left 4 Dead 2,HuniePop,Path of Exile,Poly Bridge,Left 4 Dead,Team Fortress 2,Tomb Raider,The Banner Saga,...,Kuros,Back to Bed,Legend of Fae,DinerTown Tycoon,The Impossible Game,Khet 2.0,SAMURAI WARRIORS 4-II,Life is Hard,Executive Assault,MirrorMoon EP
0,20772968,2.848712,1.900708,3.243167,1.13029,0.0,3.926999,1.328863,0.0,1.410317,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,118852041,4.547641,4.10757,4.335964,2.742136,0.0,3.010566,2.653609,0.0,2.359312,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,108264287,5.134715,1.818212,4.6267,2.855587,0.0,2.625694,2.606662,0.0,2.79889,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,101695880,1.983465,1.010758,3.462122,0.0,0.0,1.926996,0.608891,0.0,1.142382,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,152959594,3.572193,2.40214,3.631165,1.600229,0.0,1.607051,4.812219,0.0,1.719863,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Unnamed: 0,user_id,Fallout 4,Left 4 Dead 2,HuniePop,Path of Exile,Poly Bridge,Left 4 Dead,Team Fortress 2,Tomb Raider,The Banner Saga,...,Kuros,Back to Bed,Legend of Fae,DinerTown Tycoon,The Impossible Game,Khet 2.0,SAMURAI WARRIORS 4-II,Life is Hard,Executive Assault,MirrorMoon EP
0,20772968,2.02369,2.0,1.96119,2.236446,1.807031,1.965214,1.0,1.972723,2.259776,...,2.016631,1.886803,2.168114,2.066551,1.856724,1.92912,2.014982,2.030082,1.888497,1.935256
1,118852041,4.0,2.484664,2.510367,2.483841,2.900598,2.328774,2.553578,2.465413,2.549534,...,2.567259,2.589729,2.274274,2.457273,2.538814,2.517075,2.477077,2.599631,2.827534,2.68333
2,108264287,5.0,1.0,2.732204,2.760441,2.834169,2.129811,2.48258,3.018323,2.722074,...,2.594812,2.854299,2.623617,2.684686,2.616585,2.557243,2.714829,2.755945,2.820515,2.650557
3,101695880,1.557636,1.558889,1.672134,1.808044,1.588608,1.576505,1.775259,1.468253,1.659456,...,1.690496,1.438083,1.430222,1.589313,1.526264,1.66907,1.471377,1.683049,1.504779,1.557082
4,152959594,2.218061,3.0,2.039889,2.077548,2.15852,2.204148,5.0,2.078941,2.142891,...,2.18469,1.905091,1.906519,1.930741,1.903651,2.028185,2.015854,2.101305,1.916938,2.030825


Unnamed: 0,user_id,Fallout 4,Left 4 Dead 2,HuniePop,Path of Exile,Poly Bridge,Left 4 Dead,Team Fortress 2,Tomb Raider,The Banner Saga,...,Kuros,Back to Bed,Legend of Fae,DinerTown Tycoon,The Impossible Game,Khet 2.0,SAMURAI WARRIORS 4-II,Life is Hard,Executive Assault,MirrorMoon EP
0,20772968,2.436201,1.950354,2.602179,1.683368,0.903515,2.946106,1.164432,0.986361,1.835046,...,1.008316,0.943401,1.084057,1.033275,0.928362,0.96456,1.007491,1.015041,0.944248,0.967628
1,118852041,4.273821,3.296117,3.423165,2.612988,0.903515,2.66967,2.603593,0.986361,2.454423,...,1.008316,0.943401,1.084057,1.033275,0.928362,0.96456,1.007491,1.015041,0.944248,0.967628
2,108264287,5.067358,1.409106,3.679452,2.808014,0.903515,2.377752,2.544621,0.986361,2.760482,...,1.008316,0.943401,1.084057,1.033275,0.928362,0.96456,1.007491,1.015041,0.944248,0.967628
3,101695880,1.77055,1.284824,2.567128,0.904022,0.903515,1.75175,1.192075,0.986361,1.400919,...,1.008316,0.943401,1.084057,1.033275,0.928362,0.96456,1.007491,1.015041,0.944248,0.967628
4,152959594,2.895127,2.70107,2.835527,1.838888,0.903515,1.905599,4.906109,0.986361,1.931377,...,1.008316,0.943401,1.084057,1.033275,0.928362,0.96456,1.007491,1.015041,0.944248,0.967628


Group Aggregation

In [150]:
aggf = ['Addition', 'Least_Misery', 'Most_Pleasure', 'Least_Misery+Most_Pleasure']
AggregationResult = pd.DataFrame(index = aggf, columns= unratedGames).fillna(0).astype(float)
for game in unratedGames:
    AggregationResult.at['Addition', game] = sum(dfHybrid[game])
    AggregationResult.at['Least_Misery', game] = min(dfHybrid[game])
    AggregationResult.at['Most_Pleasure', game] = max(dfHybrid[game])
    AggregationResult.at['Least_Misery+Most_Pleasure', game] = min(dfHybrid[game])+max(dfHybrid[game])

In [151]:
display(AggregationResult)

Unnamed: 0,Fallout 4,Left 4 Dead 2,HuniePop,Path of Exile,Poly Bridge,Left 4 Dead,Team Fortress 2,Tomb Raider,The Banner Saga,BioShock Infinite,...,Kuros,Back to Bed,Legend of Fae,DinerTown Tycoon,The Impossible Game,Khet 2.0,SAMURAI WARRIORS 4-II,Life is Hard,Executive Assault,MirrorMoon EP
Addition,16.443056,10.64147,15.107451,9.847281,4.517577,11.650879,12.410831,4.931807,10.382247,10.623028,...,5.041578,4.717006,5.420284,5.166377,4.641811,4.8228,5.037454,5.075205,4.721242,4.83814
Least_Misery,1.77055,1.284824,2.567128,0.904022,0.903515,1.75175,1.164432,0.986361,1.400919,1.174069,...,1.008316,0.943401,1.084057,1.033275,0.928362,0.96456,1.007491,1.015041,0.944248,0.967628
Most_Pleasure,5.067358,3.296117,3.679452,2.808014,0.903515,2.946106,4.906109,0.986361,2.760482,2.829387,...,1.008316,0.943401,1.084057,1.033275,0.928362,0.96456,1.007491,1.015041,0.944248,0.967628
Least_Misery+Most_Pleasure,6.837908,4.580941,6.24658,3.712036,1.807031,4.697857,6.070541,1.972723,4.161401,4.003456,...,2.016631,1.886803,2.168114,2.066551,1.856724,1.92912,2.014982,2.030082,1.888497,1.935256


Sort Accordingly

In [152]:
AggregationResult = AggregationResult.sort_values(by ='Least_Misery+Most_Pleasure', axis=1, ascending=False)

In [153]:
# AggregationResult = AggregationResult.sort_values(by ='Addition', axis=1, ascending=False)

Choose top 5 games

In [154]:
top5Games  = AggregationResult.columns[:5].values.ravel()
print(top5Games)

['Fallout 4' 'Mass Effect 2' "Garry's Mod" 'Elite Dangerous' 'HuniePop']


In [155]:
new = dfHybrid[top5Games].copy()
new = new.set_axis(top5Games, axis=1)
display(new)

Unnamed: 0,Fallout 4,Mass Effect 2,Garry's Mod,Elite Dangerous,HuniePop
0,2.436201,4.608836,1.748304,2.867617,2.602179
1,4.273821,3.115568,4.844873,3.51309,3.423165
2,5.067358,3.264423,3.813812,3.752626,3.679452
3,1.77055,2.047184,1.377023,2.497487,2.567128
4,2.895127,2.571167,4.933541,3.03575,2.835527


Generate Explanations for the recommended games

In [156]:
from expGenerator import ExpGenerator

exp = ExpGenerator()
exp.generateExp(new)

Fallout 4 is not the top choice for majority, but some will love it. Why not give it a try?
Mass Effect 2 is not the top choice for majority, but some will love it. Why not give it a try?
The majority of the group will love the game: Garry's Mod.
The majority of the group will like Elite Dangerous.
The majority of the group will like HuniePop.
