In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import euclidean_distances
from ast import literal_eval

A recommender osztály létrehozásakor megkapja a userid-kat, amik alapján létrehozzá a user objektumokat is. mind a recommender, mind a user eltárolja, azokat a filmeket, amiket kedvelt, amik nem tetszettek neki, amiket látott már, és az összes filmet amit feldobott neki korábban. egyelőre ezek még nincsenek pontosan kihasználva, eléggé  egy basic megoldás még csak. lentebb én kézzel hoztam létre a listákat, de futás közben a recommender addLiked metódusát meghívva az adott user-hez hozzá lehet adni a kedvelt filmet. ugyanígy az addDisliked és az addSeen metódusokkal. a reccommend metódus visszaad egy dictionary-t a userId : recommendedMovieId párosokkal, úgy hogy mindenkinek lehetőleg mást ajánljon. a match metódus pedig leellőrzi, hogy van a közös kedvelt film, ha igen visszaadja annak az indexét, ha nem, akkor False.

In [2]:
tags = pd.read_csv('tags.csv')

In [85]:
class Recommender:
    def __init__(self, ids):
        self.n_users = len(ids)
        self.ids = ids
        self.users = {userId:User() for userId in ids}
        self.tags = pd.read_csv('fit.csv', index_col = 0)

        self.tags.vector = self.tags.vector.apply(literal_eval)
        self.seen = []
        self.liked = []
        self.disliked = []
        self.all = []
        
    def calcSimilarity(self):
        likedAndSeen = pd.concat((self.tags.loc[self.liked]['vector'], self.tags.loc[self.seen]['vector']))
        liked_mean = np.array(likedAndSeen.tolist()).mean(axis=0)
        liked_mean = np.concatenate((liked_mean, [1]))
        vectors = np.array(self.tags['vector'].tolist())
        vectors = np.append(vectors, np.array([self.tags.liked.values.tolist()]).T, axis = 1)
        similarities = cosine_similarity([liked_mean], vectors)[0]
        similarities = pd.DataFrame(similarities, index = tags.index)
        return similarities

    def recommend(self):
        similarities = self.calcSimilarity()
        similarities = similarities.drop(index = self.disliked)
        recommended = {user:self.users[user].recommend(similarities, i+1) for i,user in enumerate(self.users)}
        return recommended
    
    def addLiked(self, movieId, userId):
        self.tags.loc[movieId, 'liked'] += 1/(self.n_users-1)
        self.liked += [movieId]
        self.users[userId].liked += [movieId]
        self.all += [movieId]
        self.users[userId].all += [movieId]
        
    def addDisliked(self, movieId, userId):
        self.tags.loc[movieId, 'liked'] -= 1/(self.n_users-1)
        self.disliked += [movieId]
        self.users[userId].disliked += [movieId]
        self.all += [movieId]
        self.users[userId].all += [movieId]
        
    def addSeen(self, movieId, userId):
        self.tags.loc[movieId, 'liked'] += 1/(self.n_users-1)
        self.seen += [movieId]
        self.users[userId].seen += [movieId]
        self.all += [movieId]
        self.users[userId].all += [movieId]
        
    def match(self):
        if self.n_users > 1:
            common = set(self.users[self.ids[0]].liked) & set(self.users[self.ids[1]].liked)
            for i in range(2, self.n_users):
                common = common & set(self.users[self.ids[i]].liked)
            return common.pop() if common else False
        
        return self.users[ids[0]].liked[0] if self.users[ids[0]].liked else False

    
class User:
    def __init__(self):
        self.liked = []
        self.disliked = []
        self.seen = []
        self.all = []
    
    def recommend(self, similarities, n = 1):
        similarities = similarities.drop(index = self.liked + self.seen, errors = 'ignore')
        return similarities.nlargest(n, 0).index.values[-1]


In [86]:
ids = range(3)
rec = Recommender(ids)

In [87]:
tags[tags['title'].str.contains("Basterds")]

Unnamed: 0,title,n_rating,rating_std,rating_mean,vector,years,popularity,year_normed,liked
614,Inglourious Basterds (2009),23077,0.894771,4.011397,"[0.2928795705634613, -1.1195572870269141, -0.0...",2009.0,0.776861,0.775281,0.0


In [88]:
primer = 864
inception = 654
fellowship = 202
twot = 36
rotk = 227
donnie = 1586
americanbeauty = 295
darkknight = 578
inglorious = 614
pulpfiction = 0
hp1 = 200
hp2 = 210
hp3 = 232

In [89]:
rec.addLiked(inception, 0)
rec.addLiked(fellowship, 0)
rec.addLiked(twot, 0)
rec.addLiked(inglorious, 1)
rec.addLiked(darkknight, 1)
rec.addLiked(pulpfiction, 1)
rec.addLiked(hp1, 2)
rec.addLiked(inception, 2)
rec.addLiked(donnie, 2)
rec.addDisliked(fellowship, 1)
rec.addDisliked(hp2, 2)
rec.addDisliked(americanbeauty, 0)


In [90]:
'''rec.users[0].liked = [primer, inception, donnie]
rec.users[1].liked = [fellowship, twot, inglorious, inception]
rec.users[2].liked = [americanbeauty, pulpfiction, inception]

rec.liked = [primer, inception, donnie, fellowship, twot, inglorious, americanbeauty, pulpfiction]'''

'rec.users[0].liked = [primer, inception, donnie]\nrec.users[1].liked = [fellowship, twot, inglorious, inception]\nrec.users[2].liked = [americanbeauty, pulpfiction, inception]\n\nrec.liked = [primer, inception, donnie, fellowship, twot, inglorious, americanbeauty, pulpfiction]'

In [91]:
rec.match()   #az inception mindbe benne van

False

In [92]:
recommendation = rec.recommend()

In [93]:
recommendation

{0: 1586, 1: 1586, 2: 533}

In [94]:
tags.loc[list(recommendation.values())]

Unnamed: 0,title,n_rating,rating_std,rating_mean,vector,years,popularity,year_normed,liked
1586,Donnie Darko (2001),27181,0.937298,3.954067,"[-0.47491378337318074, -0.9341390874969006, -0...",2001.0,0.80581,0.595506,0.0
1586,Donnie Darko (2001),27181,0.937298,3.954067,"[-0.47491378337318074, -0.9341390874969006, -0...",2001.0,0.80581,0.595506,0.0
533,"Pan's Labyrinth (Laberinto del fauno, El) (2006)",19868,0.90945,3.979917,"[-0.24315173546406835, -0.8739280331792609, -0...",2006.0,0.75038,0.707865,0.0
