# 상품 기반 협력 필터링

## 참고 자료

* [Item-based Collaborative Filtering : Build Your own Recommender System!](https://www.analyticsvidhya.com/blog/2021/05/item-based-collaborative-filtering-build-your-own-recommender-system/), Saumyab271, 2023.11.08

In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
header = ['user_id','item_id','rating','timestamp']
dataset = pd.read_csv('u.data',sep = '\t',names = header)
print(dataset.head())

   user_id  item_id  rating  timestamp
0      196      242       3  881250949
1      186      302       3  891717742
2       22      377       1  878887116
3      244       51       2  880606923
4      166      346       1  886397596


In [10]:
n_users = dataset.user_id.unique().shape[0]
n_items = dataset.item_id.unique().shape[0]
n_items = dataset['item_id'].max()
A = np.zeros((n_users,n_items))
for line in dataset.itertuples():
    A[line[1]-1,line[2]-1] = line[3]
    
print('n_users: ', n_users)
print('n_items: ', n_items)
print("Original rating matrix : ",A)

n_users:  943
n_items:  1682
Original rating matrix :  [[5. 3. 4. ... 0. 0. 0.]
 [4. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [5. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 5. 0. ... 0. 0. 0.]]


In [11]:
for i in range(len(A)):
  for j in range(len(A[0])):
    if A[i][j]>=3:
      A[i][j]=1
    else:
      A[i][j]=0

print(A)
print(A[0, 0:20])

[[1. 1. 1. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [1. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]]
[1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [6]:
csr_sample = csr_matrix(A)
print(csr_sample)

  (0, 0)	1.0
  (0, 1)	1.0
  (0, 2)	1.0
  (0, 3)	1.0
  (0, 4)	1.0
  (0, 5)	1.0
  (0, 6)	1.0
  (0, 8)	1.0
  (0, 9)	1.0
  (0, 11)	1.0
  (0, 12)	1.0
  (0, 13)	1.0
  (0, 14)	1.0
  (0, 15)	1.0
  (0, 16)	1.0
  (0, 17)	1.0
  (0, 18)	1.0
  (0, 19)	1.0
  (0, 21)	1.0
  (0, 22)	1.0
  (0, 23)	1.0
  (0, 24)	1.0
  (0, 25)	1.0
  (0, 27)	1.0
  (0, 29)	1.0
  :	:
  (942, 624)	1.0
  (942, 654)	1.0
  (942, 671)	1.0
  (942, 684)	1.0
  (942, 716)	1.0
  (942, 720)	1.0
  (942, 721)	1.0
  (942, 731)	1.0
  (942, 738)	1.0
  (942, 762)	1.0
  (942, 764)	1.0
  (942, 793)	1.0
  (942, 795)	1.0
  (942, 807)	1.0
  (942, 815)	1.0
  (942, 823)	1.0
  (942, 824)	1.0
  (942, 839)	1.0
  (942, 927)	1.0
  (942, 942)	1.0
  (942, 1043)	1.0
  (942, 1073)	1.0
  (942, 1187)	1.0
  (942, 1227)	1.0
  (942, 1329)	1.0


## Items Similarity Computation

In [12]:
knn = NearestNeighbors(metric='cosine', algorithm='brute', n_neighbors=3, n_jobs=-1)
knn.fit(csr_sample)

NearestNeighbors(algorithm='brute', metric='cosine', n_jobs=-1, n_neighbors=3)

In [13]:
dataset_sort_des = dataset.sort_values(['user_id', 'timestamp'], ascending=[True, False])
filter1 = dataset_sort_des[dataset_sort_des['user_id'] == 1].item_id
filter1 = filter1.tolist()
filter1 = filter1[:20]
print("Items liked by user: ",filter1)

Items liked by user:  [74, 102, 256, 5, 171, 111, 242, 189, 32, 209, 270, 18, 6, 244, 221, 129, 20, 271, 272, 255]


In [14]:
distances1=[]
indices1=[]
for i in filter1:
  distances , indices = knn.kneighbors(csr_sample[i],n_neighbors=3)
  indices = indices.flatten()
  indices= indices[1:]
  indices1.extend(indices)
print("Items to be recommended: ",indices1)

Items to be recommended:  [356, 500, 758, 512, 883, 893, 473, 17, 311, 566, 771, 283, 614, 105, 904, 163, 510, 501, 642, 406, 473, 17, 578, 475, 312, 845, 688, 778, 681, 550, 275, 879, 365, 371, 685, 928, 719, 283, 373, 331]
