<a href="https://colab.research.google.com/github/wizard339/education/blob/main/knnbasic_and_asr.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install surprise
!pip install apyori

In [None]:
import pandas as pd
import numpy as np
import surprise
import apyori

In [103]:
np.random.seed(829)

In [104]:
# загрузим встроенный датасет ml-100k
data = surprise.Dataset.load_builtin('ml-100k')
training_set = data.build_full_trainset()

In [105]:
test_set = training_set.build_testset()

In [106]:
# количество пользователей в training_set
print(f'Number of users in the training_set: {training_set.n_users}')

Number of users in the training_set: 943


In [107]:
# количество items в training_set
print(f'Number of items in the training_set: {training_set.n_items}')

Number of items in the training_set: 1682


In [108]:
# среднее значение рейтинга по всем показателям рейтинга в training_set
print(f'Global mean rating in the training_set: {training_set.global_mean}')

Global mean rating in the training_set: 3.52986


In [109]:
from surprise import KNNBasic

In [110]:
sim_options = {
    "name": "cosine",
    "user_based": True,  # compute similarities between users
}

knn = KNNBasic(sim_options=sim_options)

In [111]:
knn.fit(training_set)

Computing the cosine similarity matrix...
Done computing similarity matrix.


<surprise.prediction_algorithms.knns.KNNBasic at 0x7fc70264b6d0>

In [112]:
predictions = knn.test(test_set)

In [113]:
predictions[8].iid

'663'

In [114]:
from surprise import SVD

In [115]:
svd = SVD()

In [116]:
svd.fit(training_set)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7fc70264bb20>

In [117]:
predictions_ = svd.test(test_set)

In [118]:
predictions_[42]

Prediction(uid='186', iid='148', r_ui=4.0, est=3.575883908398154, details={'was_impossible': False})

In [119]:
print(f'Difference between predicted and actual: {np.round((predictions_[42].r_ui - predictions_[42].est), 3)}')

Difference between predicted and actual: 0.424


In [120]:
from surprise.accuracy import rmse

In [121]:
print(f'RMSE for KNNBasic:{rmse(predictions)}')
print(f'RMSE for SVD: {rmse(predictions_)}')

RMSE: 0.9012
RMSE for KNNBasic:0.9012175824620821
RMSE: 0.6747
RMSE for SVD: 0.6746987152322735


In [122]:
df = pd.DataFrame(data.raw_ratings)
df.columns = ['user_id', 'movie_id', 'rating', 3]
df.head()

Unnamed: 0,user_id,movie_id,rating,3
0,196,242,3.0,881250949
1,186,302,3.0,891717742
2,22,377,1.0,878887116
3,244,51,2.0,880606923
4,166,346,1.0,886397596


In [156]:
# создадим словарь для предсказанных movie_id с рейтингом 4 и выше для каждого user_id
pred_movie_dict = dict.fromkeys(df.user_id.unique(), '')

In [157]:
# заполняем словарь фильмами с оценкой 4 и выше
for pred in predictions_:
  if pred.est >= 4:
    pred_movie_dict[pred.uid] += ' ' + pred.iid
    pred_movie_dict[pred.uid] = pred_movie_dict[pred.uid].lstrip()

In [158]:
pred_movie_dict['100']

'313 272'

In [159]:
pred_movie_df = pd.DataFrame.from_dict(pred_movie_dict, orient='index', columns=['movie_id'])

In [160]:
pred_movie_df.head()

Unnamed: 0,movie_id
196,655 306 663 8 285 1007 153
186,566 470 385 71 939 300 79 98 887 159
22,258 510 79 511 173 186 96 435 176 550 648 238 ...
244,707 183 172 154 89 652 238 100 509 157 428 13 ...
166,328 300 347 313 315


In [167]:
# построим ассоциативные правила
association_rules = apyori.apriori(pred_movie_df.movie_id.apply(lambda r: r.split(' ')), 
                                   min_support=0.2, 
                                   min_confidence=0.3, min_lift=2, 
                                   min_length=2)

In [168]:
asr_df = pd.DataFrame(columns = ['from', 'to', 'confidence', 'support', 'lift'])
for item in association_rules:
    pair = item[0] 
    items = [x for x in pair]
    asr_df.loc[len(asr_df), :] =  ' '.join(list(item[2][0][0])), \
                                  ' '.join(list(item[2][0][1])),\
                                  item[2][0][2], item[1], item[2][0][3]

In [170]:
# выведем получившиеся ассоциативные правила
asr_df

Unnamed: 0,from,to,confidence,support,lift
0,172,174,0.85098,0.230117,2.613923
1,172,174 50,0.815686,0.220573,2.913607
