In [1]:
import pandas as pd

In [2]:
merged_df = pd.read_csv('game_score_with_genres.csv')
merged_df.head()

Unnamed: 0,appid,average_sentiment,game,positive_rate,composite_score,normalized_composite_score,Top 3 Genres,genre_list,review_count
0,10,3.637931,Counter-Strike,100.0,32.546552,97.093146,['Action'],"[""['Action']""]",116
1,20,3.083333,Team Fortress Classic,95.294118,30.746569,91.605392,['Action'],"[""['Action']""]",60
2,30,4.222222,Day of Defeat,100.0,32.955556,98.340108,['Action'],"[""['Action']""]",9
3,40,4.384615,Deathmatch Classic,95.0,31.569231,94.113508,['Action'],"[""['Action']""]",13
4,50,3.583333,Half-Life: Opposing Force,100.0,32.508333,96.976626,['Action'],"[""['Action']""]",12


In [3]:
# Filter games with more than 20 reviews
games_with_reviews = merged_df[merged_df['review_count'] > 20]

# Sort these games by normalized_composite_score in descending order
top_20_games = games_with_reviews.sort_values('normalized_composite_score', ascending=False).head(20)

print(top_20_games[['game', 'normalized_composite_score']])


                                          game  normalized_composite_score
3306                       Blood: Fresh Supply                   99.314024
4370                      Milo and the Magpies                   99.126940
1309  This War of Mine - War Child Charity DLC                   98.844004
5198             Rock Life: The Rock Simulator                   98.577236
1688                                      Furi                   98.522514
999                                 Mini Metro                   98.475610
1574                                    Hylics                   98.463415
122         Supreme Commander: Forged Alliance                   98.434959
576              Brothers - A Tale of Two Sons                   98.427471
2933                      Little Nightmares II                   98.409852
60                                 Psychonauts                   98.399390
5160                                The Looker                   98.362167
5384                    S

In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

# Assuming games_df is already loaded with 'game', 'genres', and 'normalized_composite_score' columns
# Convert genre text to a list if it's a semicolon-separated string
merged_df['genre_list'] = merged_df['Top 3 Genres'].apply(lambda x: x.split(';'))

# Vectorize game titles
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(merged_df['game'])

# Calculate cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

def genre_similarity(genre_list1, genre_list2):
    # Simple approach: count the number of matching genres
    return len(set(genre_list1) & set(genre_list2))

def recommend_games(title, merged_df, cosine_sim):
    idx = merged_df.index[merged_df['game'] == title].tolist()[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1:6] # Top 5 similar games
    
    # Enhance recommendations by genre similarity
    target_genres = merged_df.iloc[idx]['genre_list']
    enhanced_scores = [(merged_df.iloc[i[0]]['game'], genre_similarity(merged_df.iloc[i[0]]['genre_list'], target_genres), i[1]) for i in sim_scores]
    
    # Sort by genre similarity and then by normalized_composite_score
    recommended = sorted(enhanced_scores, key=lambda x: (x[1], merged_df[merged_df['game'] == x[0]]['normalized_composite_score'].values[0]), reverse=True)
    
    return recommended

# Example usage
recommended_games = recommend_games('Little Nightmares II', merged_df, cosine_sim)
for game in recommended_games:
    print(game[0])

Little Nightmares
Little Nightmares II Digital Content Bundle
Neverending Nightmares
Hollow Knight - Gods & Nightmares
A Little to the Left
