In [2]:
import pandas as pd

In [3]:
reviews_df = pd.read_csv('all_clean.csv') # Make sure you have sentiment_score in your reviews dataframe

In [4]:
reviews_df.head()

Unnamed: 0,recommendationid,appid,game,author_steamid,author_playtime_forever,processed_review,sentiment_score
0,147263134,10,Counter-Strike,76561199013220131,2510,cs 1 . 6>cs2,3
1,146253831,10,Counter-Strike,76561198847956367,404175,#1 played pc game online since 1999,5
2,145002893,10,Counter-Strike,76561198995972388,81,"best played with a membrane keyboard , a rolle...",5
3,144308037,10,Counter-Strike,76561198893691388,89,my friend who i play counter-strike with said ...,1
4,142806143,10,Counter-Strike,76561198200827945,26583,counter-strike won't ever be canceled . counte...,5


In [6]:
from scipy.sparse import coo_matrix
from sklearn.neighbors import NearestNeighbors

# Assuming df is your DataFrame with the specified columns
# Map each user and game to a unique numeric value
reviews_df['user_id'] = reviews_df['author_steamid'].astype('category').cat.codes
reviews_df['item_id'] = reviews_df['appid'].astype('category').cat.codes

# Create a sparse matrix for user-item interactions
interaction_matrix = coo_matrix((reviews_df['sentiment_score'], (reviews_df['user_id'], reviews_df['item_id'])))

# Fit KNN model
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(interaction_matrix)

# Example: Get top 5 similar users for the first user
distances, indices = model_knn.kneighbors(interaction_matrix.getrow(0), n_neighbors=6)

# Reverse mapping from index to original user ID
index_to_user_id = {index: user_id for user_id, index in reviews_df['user_id'].items()}
recommended_user_ids = [index_to_user_id[index] for index in indices.flatten()[1:]]

print(f'Recommended users for the first user are: {recommended_user_ids}')


Recommended users for the first user are: [139645, 139653, 139660, 139685, 139671]


## hybrid system

In [8]:
games_df = pd.read_csv('game_score_with_genres.csv')

In [35]:
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import coo_matrix
import numpy as np
import pandas as pd

# Assuming reviews_df and games_df are already defined as per your project's description

# Step 1: Prepare User-User Collaborative Filtering Model
# Convert author_steamid and appid to categorical codes for matrix creation
reviews_df['user_code'] = reviews_df['author_steamid'].astype('category').cat.codes
reviews_df['game_code'] = reviews_df['appid'].astype('category').cat.codes

# Create interaction matrix based on sentiment scores
interaction_matrix = coo_matrix((reviews_df['sentiment_score'], 
                                 (reviews_df['user_code'], reviews_df['game_code'])))

# Fit KNN model for collaborative filtering
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(interaction_matrix)

# Step 2: Prepare Content-Based Filtering Model
# Vectorize game titles for content similarity
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(games_df['game'])

# Function to find similar users
def find_similar_users(user_code, n_neighbors=6):
    distances, indices = model_knn.kneighbors(interaction_matrix.getrow(user_code), n_neighbors=n_neighbors)
    return indices.flatten()[1:]

# Function to find similar games based on content
def find_similar_games(game_index, n_neighbors=5):
    cosine_sim = cosine_similarity(tfidf_matrix[game_index], tfidf_matrix).flatten()
    similar_indices = cosine_sim.argsort()[:-n_neighbors-1:-1]
    return games_df.iloc[similar_indices]

# Hybrid Recommendation Function
def hybrid_recommend(user_id, n_neighbors=5):
    user_code = reviews_df.loc[reviews_df['author_steamid'] == user_id, 'user_code'].iloc[0]
    similar_users = find_similar_users(user_code, n_neighbors)
    
    recommended_games = set()
    for user_code in similar_users:
        # Identify games this user has interacted with
        user_games = reviews_df[reviews_df['user_code'] == user_code]['game_code'].unique()
        for game_code in user_games:
            similar_games = find_similar_games(game_code, n_neighbors)
            recommended_games.update(similar_games['game'].values)
            
    return recommended_games

# Example usage
user_id = 76561198343568022  # Replace with an actual author_steamid
recommendations = hybrid_recommend(user_id)
print(f"Recommended games: {recommendations}")


Recommended games: {'Bloodstained: Ritual of the Night', 'Night of the Dead', 'Bloodstained: Curse of the Moon', 'Bloodstained: Ritual of the Night - "Iga\'s Back Pack" DLC', 'At Dead Of Night'}


In [34]:
row = reviews_df.iloc[2341]
row

recommendationid                                                    51298457
appid                                                                1041460
game                       Bloodstained: Ritual of the Night - "Iga's Bac...
author_steamid                                             76561198343568022
author_playtime_forever                                                    0
processed_review           look , i'm just getting this because i missed ...
sentiment_score                                                            5
user_id                                                               102834
item_id                                                                 3379
user_code                                                             102834
game_code                                                               3379
Name: 2341, dtype: object

In [39]:
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from scipy.sparse import coo_matrix
import numpy as np
import pandas as pd

# Assuming reviews_df and games_df are already defined as per your project's description

# Step 1: Prepare User-User Collaborative Filtering Model
# Convert author_steamid and appid to categorical codes for matrix creation
reviews_df['user_code'] = reviews_df['author_steamid'].astype('category').cat.codes
reviews_df['game_code'] = reviews_df['appid'].astype('category').cat.codes

# Create interaction matrix based on sentiment scores
interaction_matrix = coo_matrix((reviews_df['sentiment_score'], 
                                 (reviews_df['user_code'], reviews_df['game_code'])))

# Fit KNN model for collaborative filtering
model_knn = NearestNeighbors(metric='cosine', algorithm='brute')
model_knn.fit(interaction_matrix)

# Step 2: Prepare Content-Based Filtering Model
# Vectorize game titles for content similarity
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(games_df['game'])


def genre_similarity(genre_list1, genre_list2):
    # Convert genre lists to sets for easy comparison
    set1 = set(genre_list1)
    set2 = set(genre_list2)
    # Calculate intersection and union
    intersection = set1.intersection(set2)
    union = set1.union(set2)
    # Compute Jaccard similarity as the size of intersection divided by the size of union
    similarity = len(intersection) / len(union) if union else 0
    return similarity

# Function to find similar users
def find_similar_users(user_code, n_neighbors=6):
    distances, indices = model_knn.kneighbors(interaction_matrix.getrow(user_code), n_neighbors=n_neighbors)
    return indices.flatten()[1:]

# Function to find similar games based on content
def enhanced_find_similar_games(game_id, n_neighbors=5):
    # Assuming game_id is an int and corresponds to 'appid' in games_df
    target_genres = set(games_df.loc[games_df['appid'] == game_id, 'genre_list'].iloc[0])
    game_indices = range(len(games_df))
    
    # Compute cosine similarity for all games
    cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix[games_df.index[games_df['appid'] == game_id].tolist()[0]])
    
    # Include genre similarity in scores
    enhanced_scores = []
    for i in game_indices:
        genre_sim = genre_similarity(games_df.iloc[i]['genre_list'], target_genres)
        enhanced_scores.append((i, (cosine_sim[i] + genre_sim) / 2))  # Average content and genre similarities
    
    # Sort games based on enhanced score and normalized_composite_score
    sorted_games = sorted(enhanced_scores, key=lambda x: x[1], reverse=True)[:n_neighbors]
    recommended_indices = [i[0] for i in sorted_games]
    
    # Further sort by normalized_composite_score
    recommended_games = games_df.iloc[recommended_indices].sort_values('normalized_composite_score', ascending=False)
    
    return recommended_games['game'].values

def hybrid_recommend(user_id, n_neighbors=5):
    if user_id not in reviews_df['author_steamid'].values:
        return "User ID not found in the dataset."
    
    user_code = reviews_df.loc[reviews_df['author_steamid'] == user_id, 'user_code'].iloc[0]
    similar_users = find_similar_users(user_code, n_neighbors)
    
    recommended_games = set()
    for user_code in similar_users:
        user_games = reviews_df[reviews_df['user_code'] == user_code]['appid'].unique()
        for game_id in user_games:
            similar_games = enhanced_find_similar_games(game_id, n_neighbors)
            recommended_games.update(similar_games)
            
    return list(recommended_games)[:n_neighbors]


# Example usage
user_id = 76561198847956367  # Replace with an actual author_steamid
recommendations = hybrid_recommend(user_id)
print(f"Recommended games: {recommendations}")

Recommended games: ['First Strike', 'Halo: Spartan Strike', 'Counter-Strike: Condition Zero', 'Counter-Strike: Source', 'Counter-Strike']


In [38]:
reviews_df.head()

Unnamed: 0,recommendationid,appid,game,author_steamid,author_playtime_forever,processed_review,sentiment_score,user_id,item_id,user_code,game_code
0,147263134,10,Counter-Strike,76561199013220131,2510,cs 1 . 6>cs2,3,120670,0,120670,0
1,146253831,10,Counter-Strike,76561198847956367,404175,#1 played pc game online since 1999,5,113963,0,113963,0
2,145002893,10,Counter-Strike,76561198995972388,81,"best played with a membrane keyboard , a rolle...",5,119662,0,119662,0
3,144308037,10,Counter-Strike,76561198893691388,89,my friend who i play counter-strike with said ...,1,116601,0,116601,0
4,142806143,10,Counter-Strike,76561198200827945,26583,counter-strike won't ever be canceled . counte...,5,88510,0,88510,0
