In [1]:
import io
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import get_dataset_dir
from collections import defaultdict
import json
from SPARQLWrapper import SPARQLWrapper, JSON
import requests

In [2]:
k = 4
min_movies= 5

In [3]:
def read_item_names():
    file_name = get_dataset_dir() + '/ml-100k/ml-100k/u.item'
    rid_to_name = {}
    with io.open(file_name, 'r', encoding='ISO-8859-1') as f:
        for line in f:
            line = line.split('|')
            rid_to_name[line[0]] = (line[1], line[2])
    return rid_to_name


In [4]:
def get_top_n(predictions, n=5):
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, round(est, 3)))
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]
    return top_n

In [5]:
def print_to_console(user_id, predictions):
    print('User ID: ', user_id)
    for prediction in predictions:
        print('{:4s} {:<60s} {}'.format(prediction['id'], prediction['name'], prediction['rating']))    

In [6]:
def print_to_file(user_id, predictions):     
    result = {
        'user_id': user_id,
        'predictions': predictions
    }
    with open('result.json', 'w') as outfile:
            json.dump(result, outfile, indent=2);

In [7]:
def load_ides(movies):
    ides = {}
    
    API_ENDPOINT = "https://www.wikidata.org/wiki/Special:ItemByTitle"     
    params = {
     'site' : 'enwiki'
    }
        
    for movie in movies:
        params['page'] = movie['name']
        
        res = requests.get(API_ENDPOINT, params=params, allow_redirects=False)
        if 'Location' in res.headers:
            ides[movie['name']] = res.headers['Location'][len('https://www.wikidata.org/wiki/'):]
    return ides

In [8]:
def request_to_wikidata(id):
    sparql = SPARQLWrapper("https://query.wikidata.org/sparql")

    spaqrql_query = """
    SELECT ?movie ?publication_date
    WHERE 
    {
        {SELECT ?required_year
        WHERE
        {
          wd:""" + id + """ wdt:P577 ?publication_date.
          BIND(str(YEAR(?publication_date)) AS ?required_year).
        }
        ORDER BY ASC(?required_year) LIMIT 1}

      ?movie wdt:P31 wd:Q11424;
             wdt:P577 ?publication_date.

      BIND(str(YEAR(?publication_date)) AS ?publication_year).
      FILTER (?publication_year = ?required_year).
    }
    """

    sparql.setQuery(spaqrql_query)

    sparql.setReturnFormat(JSON)
    return sparql.query().convert()

In [9]:
def load_movies_with_same_year(id):
    response = request_to_wikidata(id)
    print(response)

In [None]:
user_id = input('ID: ')

data = Dataset.load_builtin('ml-100k')
trainset = data.build_full_trainset()
sim_options = {
    'name': 'cosine', 
    'user_based': True, 
    'min_support': min_movies
}
algo = KNNWithMeans(k=4, min_k=k, sim_options=sim_options, verbose=True)
algo.fit(trainset)


testset = trainset.build_anti_testset()
testset = list(filter(lambda x: x[0] == user_id, testset))
predictions = algo.test(testset)


top_n = get_top_n(predictions)
top_n = top_n[user_id]
 
movie_names = read_item_names()

predictions = []
for movie_rid, rating in top_n:
    prediction = {}
    prediction['id'] = movie_rid
    prediction['name'] = str(movie_names[movie_rid][0])
    prediction['rating'] = rating
    
    predictions.append(prediction)
    
print_to_console(user_id, predictions)
print_to_file(user_id, predictions)

ides = load_ides(predictions)
print(ides)
for movie_name, id in ides.values():
    load_movies_with_same_year(id)

ID: 15
Computing the cosine similarity matrix...
Done computing similarity matrix.
User ID:  15
1512 World of Apu, The (Apur Sansar) (1959)                       4.287
853  Braindead (1992)                                             4.285
530  Man Who Would Be King, The (1975)                            4.242
837  Meet John Doe (1941)                                         4.043
198  Nikita (La Femme Nikita) (1990)                              4.041
