In [1]:
import numpy as np
from sklearn.neighbors import NearestNeighbors as NN


In [2]:
#metrics

# arguments: pair of vectors of scores
# vectors should be dictionaries in form {anime_id : rating}
# returns: distance

#useful constants:
infty = 1e100

In [3]:
def normalized_dist(x, y) :
    nonzero_indices = x.keys() & y.keys()
    N = len(nonzero_indices)
    if(N == 0) :
        return infty #defined at the beginning, check for that
    cut_x = np.array([x[ind] for ind in nonzero_indices])
    cut_y = np.array([y[ind] for ind in nonzero_indices])
    diff = cut_x - cut_y
    dist = np.sum(diff ** 2) / N
    return dist



In [4]:
def myKNN(x, vectors, metric, K) :
    
    dists = []
    ret = []
    
    N = len(vectors)
    for i in range(0, N) :
        dists.append((metric(x, vectors[i]), i))
    
    dists = sorted(dists)
    
    K = min(K, N)
    for i in range(0, K) :
        ret.append(dists[i][1])
    
    return ret
    

In [5]:
# returns score based on nearest neighbours

# x - index of user
# vectors - users ratings as vector
# dist - metric used
# K - number of neighbours used to calculate the score

def KNN_score(anime_id, user_vec, vectors, dist, K = 5) :
    #Neigh = NN(n_neighbors = K, metric = dist)
    #Neigh.fit(vectors)
    #neighbors = Neigh.kneighbors([user_vec])[1]
    neighbors = myKNN(user_vec, vectors, dist, K)
    neigh_scores = np.array([vectors[nei][anime_id] for nei in neighbors])
    print("scores:", neigh_scores)
    score = np.average(neigh_scores)
    return score
    

In [6]:
records = []
users = 10
animes = 3
for i in range(1, 6) :
    for j in [1, 3] :
        records.append([i, j, i])
for i in range(6, 11):
    for j in [1, 2, 3] :
        records.append([i, j, i - 5])


In [7]:
ratings = [0] * (users + 1)

for i in range(0, users + 1) :
    ratings[i] = dict()

def parse_records(minus_one = -1) :
    for r in records :
        user_id, anime_id, rating = r
        #print(user_id, anime_id, rating)
        if(rating == -1) :
            rating = minus_one
        if(rating != 0) :
            ratings[user_id][anime_id] = rating
        #print(ratings)
        #print(user_id, ratings[user_id])

def cut_records(anime_id) :
    result = []
    for user in range(1, users + 1) :
        if(anime_id in ratings[user]) :
            result.append(ratings[user])
    return result

parse_records()
for u in range(1, users + 1) :
    print(u, ratings[u])
    

1 {1: 1, 3: 1}
2 {1: 2, 3: 2}
3 {1: 3, 3: 3}
4 {1: 4, 3: 4}
5 {1: 5, 3: 5}
6 {1: 1, 2: 1, 3: 1}
7 {1: 2, 2: 2, 3: 2}
8 {1: 3, 2: 3, 3: 3}
9 {1: 4, 2: 4, 3: 4}
10 {1: 5, 2: 5, 3: 5}


In [8]:
def estimate_score(user_id, anime_id, rounded = True, test = False) :
    
    temp = 0

    #rint(ratings[user_id], anime_id)
    if(test == True) :
        temp = ratings[user_id].pop(anime_id, None)
        if(temp == None) :
            print("You sholud test on existing records! (try test = False)")
            return 2137.0
    else :
        if(anime_id in ratings[user_id]) :
            print("Anime already rated (forgot test = False ?)")
            return ratings[user_id][anime_id]
    
    has_watched = cut_records(anime_id)
            
    answer = KNN_score(anime_id, ratings[user_id], has_watched, normalized_dist, K = 3)
    
    if(test == True) :
        ratings[user_id][anime_id] = temp
    
    if(rounded == True) :
        answer = np.rint(answer)
    return answer

In [9]:
for u in range(1, users + 1) :
    print(u, estimate_score(u, 1, test = True))
for u in range(1, 6) :
    print(u, estimate_score(u, 2, test = False))

scores: [1 2 2]
1 2.0
scores: [2 1 3]
2 2.0
scores: [3 2 4]
3 3.0
scores: [4 3 5]
4 4.0
scores: [5 4 4]
5 4.0
scores: [1 2 2]
6 2.0
scores: [2 1 3]
7 2.0
scores: [3 2 4]
8 3.0
scores: [4 3 5]
9 4.0
scores: [5 4 4]
10 4.0
scores: [1 2 3]
1 2.0
scores: [2 1 3]
2 2.0
scores: [3 2 4]
3 3.0
scores: [4 3 5]
4 4.0
scores: [5 4 3]
5 4.0
