#**3.3 기본 CF 알고리즘**

In [1]:
import os
import pandas as pd
import numpy as np 
from sklearn.model_selection import train_test_split 

#user 데이터 
base_src = 'drive/MyDrive/RecoSys/Data'
u_user_src = os.path.join(base_src,'u.user')
u_cols = ["user_id","age",'sex',"occupation","zip_code"]
users = pd.read_csv(u_user_src,
                    sep = '|',
                    names = u_cols,
                    encoding='latin-1')
users = users.set_index('user_id')

#movie 데이터 
u_item_src = os.path.join(base_src,'u.item')
i_cols = ["movie_id","title","release date","video release date","IMDB URL","unknown","Action","Adventure","Animaiton",
          "Children", "Comedy", "Crime", "Documentary", "Drama", "Fantasy",
          "Film-Noir","Horror", "Musical",'Mystery','Romance','Sci-Fi',"Thriller","War",'Western']
movies = pd.read_csv(u_item_src,
                    sep = '|',
                    names = i_cols,
                    encoding='latin-1')
movies = movies.set_index('movie_id')

#rating 데이터 
#사용자 u.data 파일을 DataFrame으로 읽기 
u_data_src = os.path.join(base_src,'u.data')
r_cols = ["user_id","movie_id",'rating','timestamp']
ratings = pd.read_csv(u_data_src,
                    sep = '\t',
                    names = r_cols,
                    encoding='latin-1')

#RMSE 함수 
def RMSE(y_true, y_pred):
  return np.sqrt(np.mean((np.array(y_true)-np.array(y_pred))**2))

#score(RMSE) 계산 
def score(model):
  #테스트 데이터의 user_id와 movie_id 간 pair를 맞춰 투플형원소 리스트데이터를 만듦 
  id_pairs = zip(x_test['user_id'], x_test['movie_id'])
  #모든 사용자-영화 짝에 대해서 주어진 예측모델에 의해 예측값 계산 및 리스트형 데이터 생성 
  y_pred = np.array([model(user,movie) for (user,movie) in id_pairs])
  #실제 평점값 
  y_true = np.array(x_test['rating'])
  return RMSE(y_true, y_pred)

#데이터 셋 만들기 
x = ratings.copy() 
y = ratings['user_id']
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.25,stratify=y)

rating_matrix = x_train.pivot(index='user_id', columns ='movie_id', values = 'rating')

In [2]:
rating_matrix

movie_id,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,1636,1637,1638,1639,1640,1641,1643,1644,1645,1646,1647,1648,1649,1651,1652,1653,1654,1655,1656,1658,1660,1661,1662,1663,1664,1665,1666,1667,1668,1669,1671,1672,1673,1674,1675,1678,1679,1680,1681,1682
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,,3.0,4.0,3.0,3.0,5.0,4.0,1.0,,3.0,2.0,,5.0,5.0,5.0,5.0,3.0,,5.0,4.0,1.0,4.0,4.0,3.0,4.0,3.0,2.0,4.0,1.0,3.0,3.0,5.0,,2.0,1.0,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,4.0,,,,,,,,,2.0,,,,4.0,,,,,3.0,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
5,4.0,3.0,,,,,,,,,,,,,,,4.0,,,,3.0,,,,,,,,4.0,,,,,,,,,,,4.0,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,,,,,,,,,5.0,,,,,,5.0,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
940,,,,2.0,,,4.0,,3.0,,,4.0,,3.0,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
941,5.0,,,,,,4.0,,,,,,,,4.0,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
942,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,5.0,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [3]:
#코사인 유사도 계산 
from sklearn.metrics.pairwise import cosine_similarity
matrix_dummy = rating_matrix.copy().fillna(0) #결측치를 0으로 대체 
#각 사용자간의 유사도 계산 
user_similarity = cosine_similarity(matrix_dummy, matrix_dummy)
user_similarity = pd.DataFrame(user_similarity, index=rating_matrix.index,
                               columns = rating_matrix.index)

In [4]:
user_similarity 

user_id,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,904,905,906,907,908,909,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,930,931,932,933,934,935,936,937,938,939,940,941,942,943
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,1.000000,0.120043,0.038711,0.051057,0.293520,0.336243,0.354743,0.204083,0.042086,0.296908,0.245676,0.200541,0.309153,0.206082,0.108564,0.277244,0.132440,0.364222,0.104647,0.166107,0.129379,0.243714,0.272997,0.252928,0.272403,0.182094,0.105996,0.206122,0.045983,0.100707,0.103900,0.170020,0.034577,0.002228,0.030014,0.011503,0.204342,0.120561,0.052915,0.024432,...,0.061519,0.090252,0.109169,0.163910,0.246254,0.111695,0.176631,0.238489,0.189478,0.311812,0.026226,0.051928,0.453622,0.094252,0.176740,0.271686,0.049416,0.169016,0.245380,0.074875,0.174136,0.065843,0.078819,0.176029,0.177365,0.244062,0.225417,0.087520,0.234957,0.349336,0.262868,0.067976,0.213285,0.124467,0.159263,0.092745,0.197699,0.040857,0.146039,0.327120
2,0.120043,1.000000,0.097333,0.111803,0.066876,0.230695,0.096684,0.086200,0.088128,0.087242,0.059662,0.065257,0.186541,0.166637,0.349741,0.072042,0.161002,0.085666,0.149832,0.048100,0.156656,0.016289,0.117851,0.168582,0.083939,0.249776,0.177350,0.058751,0.068701,0.190840,0.077615,0.097511,0.131195,0.239237,0.046881,0.098821,0.050882,0.066551,0.229759,0.233500,...,0.196446,0.149588,0.218103,0.200809,0.093761,0.131937,0.252301,0.035324,0.000000,0.107147,0.053509,0.156135,0.109783,0.222463,0.041456,0.216302,0.274846,0.125936,0.113010,0.170856,0.181211,0.064713,0.308327,0.061941,0.075847,0.089905,0.233096,0.324391,0.034125,0.081172,0.093039,0.248723,0.306357,0.299716,0.227615,0.197742,0.172002,0.095060,0.124977,0.074104
3,0.038711,0.097333,1.000000,0.306932,0.000000,0.068522,0.058077,0.083427,0.082112,0.055581,0.061700,0.037639,0.158656,0.052342,0.082027,0.029287,0.034824,0.025516,0.147781,0.062423,0.078650,0.042279,0.038236,0.081030,0.049515,0.068535,0.027482,0.039009,0.091944,0.080197,0.183826,0.121162,0.247360,0.111771,0.110273,0.131453,0.016508,0.013495,0.214814,0.258767,...,0.018049,0.101167,0.000000,0.144546,0.087458,0.000000,0.027866,0.012225,0.047229,0.090384,0.000000,0.188154,0.046233,0.000000,0.008406,0.196951,0.388472,0.059285,0.073331,0.169560,0.083394,0.189738,0.152994,0.027719,0.013124,0.048432,0.004654,0.180985,0.000000,0.048157,0.042201,0.037424,0.129563,0.057819,0.106535,0.014985,0.126439,0.088570,0.096691,0.034971
4,0.051057,0.111803,0.306932,1.000000,0.013577,0.057006,0.048655,0.112583,0.062189,0.021827,0.073118,0.023946,0.104416,0.056960,0.046386,0.038220,0.073849,0.000000,0.230917,0.011031,0.084967,0.065749,0.033785,0.125438,0.071752,0.070704,0.043709,0.075204,0.070901,0.112544,0.098791,0.088501,0.108999,0.042796,0.133052,0.054393,0.035008,0.061528,0.203973,0.194950,...,0.052628,0.069580,0.000000,0.102921,0.074589,0.000000,0.098490,0.025924,0.045525,0.079617,0.000000,0.030693,0.033014,0.000000,0.000000,0.127218,0.067402,0.059463,0.095208,0.084707,0.047749,0.115432,0.195498,0.105807,0.023193,0.018674,0.024673,0.062576,0.031971,0.039891,0.024859,0.000000,0.047139,0.125398,0.077018,0.039723,0.147306,0.080496,0.092601,0.031783
5,0.293520,0.066876,0.000000,0.013577,1.000000,0.206892,0.253521,0.132240,0.007928,0.164163,0.231546,0.106837,0.227387,0.203307,0.041392,0.191838,0.090373,0.190668,0.018923,0.194058,0.125963,0.335253,0.274944,0.085866,0.305187,0.097141,0.076148,0.280888,0.054229,0.121149,0.021783,0.107723,0.000000,0.003357,0.004112,0.000000,0.129417,0.187511,0.000000,0.007363,...,0.021956,0.029566,0.051010,0.104960,0.224088,0.000000,0.119273,0.243718,0.124192,0.292448,0.045449,0.000000,0.280058,0.061874,0.116349,0.142588,0.000000,0.208777,0.324454,0.100488,0.164117,0.075676,0.000000,0.175341,0.095793,0.163061,0.105679,0.034035,0.246057,0.314874,0.202812,0.034144,0.048760,0.080992,0.076798,0.049624,0.164570,0.090641,0.130691,0.255923
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,0.092745,0.197742,0.014985,0.039723,0.049624,0.073537,0.100031,0.121466,0.052185,0.032969,0.097309,0.127262,0.089080,0.113734,0.320482,0.104234,0.086758,0.072648,0.044291,0.135150,0.153163,0.059185,0.093443,0.145347,0.111610,0.276880,0.085583,0.056796,0.039664,0.100737,0.000000,0.137990,0.036586,0.000000,0.062253,0.000000,0.107225,0.111428,0.036515,0.000000,...,0.064237,0.163485,0.188043,0.217793,0.027067,0.073865,0.241331,0.000000,0.030562,0.104917,0.000000,0.000000,0.095369,0.262895,0.019148,0.219954,0.000000,0.173361,0.160852,0.234425,0.187578,0.000000,0.086563,0.213090,0.038925,0.039175,0.188824,0.226845,0.056339,0.122116,0.036715,0.346306,0.238697,0.246934,0.400991,1.000000,0.083333,0.159864,0.016651,0.099570
940,0.197699,0.172002,0.126439,0.147306,0.164570,0.289520,0.211799,0.149584,0.055664,0.245436,0.213850,0.152938,0.261238,0.298224,0.132994,0.241208,0.156990,0.216756,0.189159,0.066341,0.096827,0.179729,0.215464,0.217251,0.192870,0.231832,0.085583,0.222978,0.188405,0.222461,0.047798,0.186862,0.179121,0.044200,0.110522,0.045644,0.167693,0.077647,0.249138,0.169650,...,0.084311,0.287071,0.128720,0.132678,0.252622,0.116954,0.219016,0.237480,0.136255,0.297760,0.058535,0.135217,0.240101,0.163538,0.130643,0.287364,0.159206,0.159674,0.198846,0.063828,0.241397,0.040591,0.258293,0.145513,0.066172,0.141030,0.136649,0.247383,0.211271,0.262441,0.245459,0.073535,0.140144,0.163687,0.133089,0.083333,1.000000,0.060043,0.207216,0.208179
941,0.040857,0.095060,0.088570,0.080496,0.090641,0.089968,0.037921,0.125351,0.144526,0.064335,0.037809,0.045244,0.056720,0.155637,0.087643,0.047390,0.217669,0.017526,0.056096,0.168827,0.287187,0.072842,0.170054,0.165580,0.196412,0.207065,0.115619,0.089518,0.013396,0.102069,0.000000,0.288044,0.000000,0.014928,0.009141,0.000000,0.076893,0.072996,0.061663,0.090039,...,0.000000,0.092026,0.158774,0.136969,0.061705,0.000000,0.145150,0.029389,0.041288,0.172159,0.000000,0.000000,0.158781,0.046896,0.030314,0.137418,0.000000,0.198380,0.206872,0.217534,0.098236,0.000000,0.056586,0.123280,0.078879,0.000000,0.067130,0.094586,0.067957,0.124815,0.042273,0.154637,0.154211,0.161113,0.182385,0.159864,0.060043,1.000000,0.000000,0.078067
942,0.146039,0.124977,0.096691,0.092601,0.130691,0.209911,0.209054,0.132591,0.034758,0.181162,0.106826,0.084763,0.252485,0.084079,0.044290,0.208275,0.030956,0.156611,0.078359,0.160304,0.046610,0.084353,0.111031,0.097000,0.180955,0.140507,0.030537,0.130300,0.184929,0.213869,0.105955,0.070208,0.135168,0.029439,0.123941,0.079802,0.113730,0.097569,0.115903,0.171511,...,0.056155,0.113426,0.044731,0.113798,0.217459,0.038436,0.137618,0.172965,0.108140,0.182941,0.045485,0.090061,0.128606,0.023121,0.042843,0.160319,0.182081,0.135788,0.124164,0.027054,0.174950,0.053456,0.130190,0.067350,0.112778,0.163078,0.086878,0.225003,0.153561,0.123965,0.143110,0.042971,0.079040,0.119926,0.072701,0.016651,0.207216,0.000000,1.000000,0.148773


In [5]:
#주어진 영화의 가중평균 rating을 계산하는 함수 
def CF_simple(user_id, movie_id):
  if movie_id in rating_matrix.columns:
    sim_scores = user_similarity[user_id].copy()
    movie_ratings = rating_matrix[movie_id].copy()
    #주어진 영화에 대해 평가하지 않은 사람들을 제외
    none_rating_idx = movie_ratings[movie_ratings.isnull()].index 
    movie_ratings = movie_ratings.dropna()
    sim_scores = sim_scores.drop(none_rating_idx)
    #각 평점을 유사도로 가중평균 / 전체 유사도의 합 
    mean_rating = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
  else: 
    mean_rating = 3.0
  return mean_rating 

In [6]:
#정확도 계산 
score(CF_simple) #1.0168231929262246 : 성능이 좋아짐 

1.016349985826479

#**3.4 이웃을 고려한 CF**

In [7]:
#score(RMSE) 계산 : 유사집단의 크기를 미리 정하기 위해서 기존 score 함수에 neighbor_size 인자값 추가 
def score(model, neighbor_size=0):
  #테스트 데이터의 user_id와 movie_id 간 pair를 맞춰 투플형원소 리스트데이터를 만듦 
  id_pairs = zip(x_test['user_id'], x_test['movie_id'])
  #모든 사용자-영화 짝에 대해서 주어진 예측모델에 의해 예측값 계산 및 리스트형 데이터 생성 
  y_pred = np.array([model(user,movie,neighbor_size) for (user,movie) in id_pairs])
  #실제 평점값 
  y_true = np.array(x_test['rating'])
  return RMSE(y_true, y_pred)

#데이터 셋 만들기 
x = ratings.copy() 
y = ratings['user_id']
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.25,stratify=y)

rating_matrix = x_train.pivot(index='user_id', columns ='movie_id', values = 'rating')

In [8]:
#KNN 
def CF_knn(user_id, movie_id, neighbor_size=0):
  if movie_id in rating_matrix.columns: 
    sim_scores = user_similarity[user_id].copy()
    movie_ratings = rating_matrix[movie_id].copy()
    none_rating_idx = movie_ratings[movie_ratings.isnull()].index
    movie_ratings = movie_ratings.dropna()
    sim_scores = sim_scores.drop(none_rating_idx)

    if neighbor_size == 0:
      mean_rating = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
    else: 
      if len(sim_scores) > 1: 
        neighbor_size = min(neighbor_size, len(sim_scores)) #neighbor_size < len(sim_scores)인 경우를 대비 
        sim_scores = np.array(sim_scores)
        movie_ratings = np.array(movie_ratings)
        user_idx = np.argsort(sim_scores) #오름차순으로 index 값을 뽑아냄 
        sim_scores = sim_scores[user_idx][-neighbor_size:] #최상위 n개 
        movie_ratings = movie_ratings[user_idx][-neighbor_size:]
        mean_rating = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
      else: 
        mean_rating = 3.0
  else: 
    mean_rating = 3.0
  return mean_rating

#정확도 계산 
score(CF_knn, neighbor_size=30) #1.0088218054656353

1.0081783998852112

In [9]:
#실제 주어진 사용자에 대해 추천을 받는 기능 구현
rating_matrix = ratings.pivot(index='user_id', columns ='movie_id', values = 'rating') #전체데이터 사용 : 실제 추천할 때에는 train, test 나눌 필요 X
matrix_dummy = rating_matrix.copy().fillna(0) 
user_similarity = cosine_similarity(matrix_dummy, matrix_dummy)
user_similarity = pd.DataFrame(user_similarity, index=rating_matrix.index,
                               columns = rating_matrix.index)

In [10]:
def recom_movie(user_id, n_items, neighbor_size=30):
  user_movie = rating_matrix.loc[user_id].copy() #특정 user의 영화 평가 
  for movie in rating_matrix.columns: 
    if pd.notnull(user_movie.loc[movie]): #null이 아닌경우(사용자가 영화를 평가한 경우)
      user_movie.loc[movie] = 0 #추천에서 제외 
    else: 
      user_movie.loc[movie] = CF_knn(user_id, movie, neighbor_size)
  
  movie_sort = user_movie.sort_values(ascending=False)[:n_items]
  recom_movies = movies.loc[movie_sort.index]
  recommendations = recom_movies['title']
  return recommendations

recom_movie(user_id=729,n_items=5, neighbor_size = 30)

movie_id
1189                      Prefontaine (1997)
1293                         Star Kid (1997)
1467    Saint of Fort Washington, The (1993)
1500               Santa with Muscles (1996)
22                         Braveheart (1995)
Name: title, dtype: object

#**3.5 최적의 이웃 크기 결정**

In [11]:
rating_matrix = x_train.pivot(index='user_id', columns ='movie_id', values = 'rating')
matrix_dummy = rating_matrix.copy().fillna(0) 
user_similarity = cosine_similarity(matrix_dummy, matrix_dummy)
user_similarity = pd.DataFrame(user_similarity, index=rating_matrix.index,
                               columns = rating_matrix.index)

In [12]:
#neighbor size가 10~60인 경우에 대해서 RMSE를 계산하고 이를 출력한다.
for neighbor_size in [10,20,30,40,50,60]:
  print('Neighbor size = %d : RMSE = %.4f'%(neighbor_size, score(CF_knn, neighbor_size)))

Neighbor size = 10 : RMSE = 1.0311
Neighbor size = 20 : RMSE = 1.0166
Neighbor size = 30 : RMSE = 1.0141
Neighbor size = 40 : RMSE = 1.0141
Neighbor size = 50 : RMSE = 1.0144
Neighbor size = 60 : RMSE = 1.0147


#**3.6 사용자의 평가경향을 고려한 CF**

In [13]:
#full matrix에서 각 사용자의 평점평균을 구한다. 
rating_mean = rating_matrix.mean(axis=1)
#영화 평점과 각 사용자의 평균과의 차이(평점 차이)를 구한다. 
rating_bias = (rating_matrix.T - rating_mean).T

#사용자 평가 경향을 고려한 함수
def CF_knn_bias(user_id, movie_id, neighbor_size=0):
  if movie_id in rating_bias.columns: 
    sim_scores = user_similarity[user_id].copy()
    movie_ratings = rating_bias[movie_id].copy()
    none_rating_idx = movie_ratings[movie_ratings.isnull()].index
    movie_ratings = movie_ratings.drop(none_rating_idx)
    sim_scores = sim_scores.drop(none_rating_idx)

    if neighbor_size == 0:
      prediction = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
      prediction = prediction + rating_mean[user_id]
    else:
      if len(sim_scores) > 1:
        neighbor_size = min(neighbor_size, len(sim_scores))
        sim_scores = np.array(sim_scores)
        movie_ratings = np.array(movie_ratings)
        user_idx = np.argsort(sim_scores)
        sim_scores = sim_scores[user_idx][-neighbor_size:]
        movie_ratings = movie_ratings[user_idx][-neighbor_size:]
        prediction = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
        prediction = prediction + rating_mean[user_id]
      else: 
        prediction = rating_mean[user_id] #사용자의 평가경향 고려 
  else: 
    prediction = rating_mean[user_id]
  return prediction 

score(CF_knn_bias, 30) #0.9425141020069246

0.9473899306320235

#**3.7 그 외의 CF 정확도 개선 방법**

In [14]:
#평가값이 있으면 1, 평가값이 없으면 0으로 변환 
rating_binary_1 = np.array(rating_matrix > 0).astype(float)
rating_binary_2 = rating_binary_1.T
#대각선 : 각 유저가 평가한 영화 개수, 대각선X : A와 B가 공통으로 평가한 영화의 개수 
counts = np.dot(rating_binary_1, rating_binary_2)
counts = pd.DataFrame(counts, 
                      index = rating_matrix.index,
                      columns = rating_matrix.index).fillna(0)

In [15]:
counts.shape #(943, 943) train에 있는 user개수 

(943, 943)

In [16]:
counts

user_id,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,904,905,906,907,908,909,910,911,912,913,914,915,916,917,918,919,920,921,922,923,924,925,926,927,928,929,930,931,932,933,934,935,936,937,938,939,940,941,942,943
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,204.0,10.0,3.0,3.0,48.0,50.0,88.0,18.0,3.0,32.0,41.0,20.0,91.0,29.0,22.0,39.0,12.0,71.0,4.0,19.0,19.0,34.0,43.0,18.0,26.0,23.0,5.0,18.0,9.0,10.0,5.0,12.0,2.0,2.0,4.0,3.0,22.0,24.0,2.0,7.0,...,8.0,6.0,8.0,31.0,23.0,6.0,17.0,26.0,17.0,41.0,3.0,1.0,97.0,8.0,27.0,47.0,3.0,30.0,41.0,12.0,19.0,4.0,3.0,26.0,11.0,14.0,23.0,10.0,52.0,56.0,45.0,7.0,29.0,9.0,23.0,8.0,30.0,5.0,17.0,48.0
2,10.0,46.0,6.0,4.0,1.0,18.0,6.0,3.0,4.0,8.0,9.0,4.0,30.0,6.0,19.0,7.0,6.0,10.0,3.0,3.0,7.0,1.0,7.0,4.0,2.0,21.0,3.0,3.0,4.0,5.0,3.0,8.0,4.0,6.0,1.0,2.0,2.0,4.0,8.0,8.0,...,6.0,6.0,7.0,13.0,0.0,5.0,9.0,2.0,1.0,5.0,2.0,5.0,9.0,9.0,2.0,21.0,7.0,10.0,3.0,8.0,10.0,2.0,9.0,9.0,2.0,2.0,9.0,15.0,4.0,3.0,9.0,6.0,21.0,9.0,12.0,7.0,8.0,1.0,8.0,2.0
3,3.0,6.0,41.0,8.0,0.0,3.0,8.0,4.0,1.0,6.0,5.0,2.0,32.0,2.0,6.0,4.0,2.0,1.0,4.0,3.0,10.0,1.0,4.0,2.0,2.0,7.0,2.0,6.0,6.0,4.0,4.0,4.0,8.0,4.0,7.0,3.0,1.0,4.0,8.0,9.0,...,4.0,11.0,2.0,11.0,3.0,3.0,4.0,1.0,1.0,3.0,0.0,10.0,2.0,1.0,0.0,14.0,8.0,4.0,2.0,6.0,4.0,6.0,7.0,4.0,2.0,0.0,3.0,7.0,0.0,2.0,1.0,2.0,10.0,4.0,8.0,2.0,8.0,2.0,7.0,1.0
4,3.0,4.0,8.0,18.0,1.0,3.0,10.0,5.0,1.0,3.0,6.0,1.0,12.0,2.0,1.0,5.0,1.0,2.0,3.0,4.0,3.0,3.0,3.0,5.0,2.0,3.0,1.0,4.0,3.0,2.0,2.0,3.0,5.0,4.0,2.0,1.0,3.0,3.0,2.0,5.0,...,2.0,3.0,1.0,4.0,2.0,2.0,2.0,2.0,1.0,5.0,0.0,3.0,4.0,0.0,0.0,7.0,1.0,2.0,4.0,3.0,2.0,2.0,4.0,5.0,1.0,1.0,3.0,1.0,2.0,3.0,1.0,1.0,3.0,2.0,4.0,1.0,6.0,2.0,5.0,2.0
5,48.0,1.0,0.0,1.0,131.0,23.0,60.0,13.0,2.0,20.0,38.0,8.0,72.0,13.0,8.0,23.0,2.0,38.0,3.0,13.0,20.0,36.0,31.0,12.0,13.0,10.0,3.0,20.0,0.0,7.0,1.0,5.0,0.0,0.0,0.0,0.0,12.0,30.0,0.0,0.0,...,5.0,0.0,2.0,12.0,14.0,0.0,8.0,24.0,8.0,24.0,5.0,0.0,45.0,2.0,12.0,13.0,0.0,21.0,39.0,11.0,15.0,3.0,0.0,29.0,3.0,11.0,11.0,3.0,38.0,43.0,35.0,3.0,8.0,5.0,12.0,4.0,18.0,3.0,8.0,35.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
939,8.0,7.0,2.0,1.0,4.0,11.0,10.0,4.0,0.0,5.0,7.0,1.0,17.0,2.0,15.0,6.0,4.0,5.0,0.0,5.0,6.0,6.0,3.0,4.0,4.0,14.0,4.0,2.0,1.0,4.0,0.0,7.0,1.0,0.0,4.0,0.0,5.0,11.0,1.0,1.0,...,5.0,5.0,3.0,13.0,1.0,1.0,11.0,0.0,0.0,4.0,0.0,1.0,8.0,7.0,0.0,14.0,1.0,12.0,8.0,8.0,7.0,0.0,0.0,10.0,2.0,0.0,6.0,6.0,6.0,11.0,3.0,10.0,10.0,7.0,15.0,37.0,3.0,2.0,1.0,6.0
940,30.0,8.0,8.0,6.0,18.0,32.0,40.0,10.0,3.0,28.0,15.0,8.0,52.0,22.0,10.0,20.0,6.0,28.0,2.0,10.0,10.0,12.0,27.0,17.0,16.0,15.0,0.0,13.0,6.0,12.0,3.0,4.0,3.0,1.0,3.0,4.0,9.0,7.0,6.0,7.0,...,5.0,8.0,4.0,12.0,19.0,2.0,8.0,21.0,10.0,26.0,2.0,1.0,37.0,4.0,10.0,26.0,3.0,10.0,18.0,4.0,14.0,2.0,3.0,6.0,6.0,8.0,9.0,9.0,28.0,22.0,28.0,3.0,15.0,5.0,10.0,3.0,80.0,5.0,11.0,18.0
941,5.0,1.0,2.0,2.0,3.0,6.0,5.0,5.0,1.0,1.0,4.0,1.0,8.0,5.0,5.0,2.0,5.0,1.0,0.0,2.0,7.0,3.0,2.0,7.0,3.0,5.0,1.0,4.0,1.0,4.0,0.0,5.0,1.0,0.0,1.0,1.0,4.0,0.0,1.0,1.0,...,2.0,4.0,4.0,5.0,2.0,0.0,2.0,1.0,1.0,9.0,0.0,1.0,6.0,1.0,0.0,5.0,1.0,3.0,6.0,7.0,5.0,0.0,0.0,2.0,2.0,0.0,3.0,1.0,3.0,5.0,0.0,2.0,7.0,4.0,7.0,2.0,5.0,16.0,2.0,2.0
942,17.0,8.0,7.0,5.0,8.0,23.0,31.0,5.0,1.0,9.0,10.0,8.0,35.0,6.0,7.0,17.0,2.0,24.0,4.0,7.0,7.0,10.0,13.0,7.0,16.0,9.0,1.0,10.0,8.0,8.0,5.0,3.0,5.0,1.0,5.0,4.0,3.0,8.0,5.0,8.0,...,4.0,7.0,1.0,6.0,11.0,2.0,5.0,10.0,9.0,13.0,2.0,7.0,16.0,2.0,5.0,18.0,5.0,10.0,10.0,2.0,9.0,1.0,5.0,6.0,3.0,8.0,6.0,10.0,16.0,10.0,18.0,3.0,7.0,4.0,6.0,1.0,11.0,2.0,59.0,12.0


In [17]:
def CF_knn_bias_sig(user_id, movie_id, neighbor_size=0):
  if movie_id in rating_bias: 
    sim_scores = user_similarity[user_id].copy()
    movie_ratings = rating_bias[movie_id].copy()

    #no_rating : null인걸 True로 표시하라 
    no_rating = movie_ratings.isnull() 

    common_counts = counts[user_id]
    low_significance = common_counts < SIG_LEVEL #SIG_LEVEL : 공통 평가 영화 개수 
    none_rating_idx = movie_ratings[no_rating | low_significance].index

    movie_ratings = movie_ratings.drop(none_rating_idx)
    sim_scores = sim_scores.drop(none_rating_idx)

    if neighbor_size == 0:
      prediction = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
      prediction = prediction + rating_mean[user_id]
    else:
      if len(sim_scores) > MIN_RATINGS:
        neighbor_size = min(neighbor_size, len(sim_scores))
        sim_scores = np.array(sim_scores)
        movie_ratings = np.array(movie_ratings)
        user_idx = np.argsort(sim_scores)
        sim_scores = sim_scores[user_idx][-neighbor_size:]
        movie_ratings = movie_ratings[user_idx][-neighbor_size:]
        prediction = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
        prediction = prediction + rating_mean[user_id]
      else: 
        prediction = rating_mean[user_id] #사용자의 평가경향 고려 
  else: 
    prediction = rating_mean[user_id]
  return prediction 

In [18]:
SIG_LEVEL = 3
MIN_RATINGS = 3 
score(CF_knn_bias_sig, 30) #0.9421966831664823

0.9471402279374662

In [19]:
def CF_knn_bias_sig(user_id, movie_id, neighbor_size=0):
  if movie_id in rating_bias: 
    sim_scores = user_similarity[user_id].copy()
    movie_ratings = rating_bias[movie_id].copy()

    #no_rating : null인걸 True로 표시하라 
    no_rating = movie_ratings.isnull() 

    common_counts = counts[user_id]
    low_significance = common_counts < SIG_LEVEL #SIG_LEVEL : 공통 평가 영화 개수 
    none_rating_idx = movie_ratings[no_rating | low_significance].index

    movie_ratings = movie_ratings.drop(none_rating_idx)
    sim_scores = sim_scores.drop(none_rating_idx)

    if neighbor_size == 0:
      prediction = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
      prediction = prediction + rating_mean[user_id]
    else:
      if len(sim_scores) > MIN_RATINGS:
        neighbor_size = min(neighbor_size, len(sim_scores))
        sim_scores = np.array(sim_scores)
        movie_ratings = np.array(movie_ratings)
        user_idx = np.argsort(sim_scores)
        sim_scores = sim_scores[user_idx][-neighbor_size:]
        movie_ratings = movie_ratings[user_idx][-neighbor_size:]
        prediction = np.dot(sim_scores, movie_ratings) / sim_scores.sum()
        prediction = prediction + rating_mean[user_id]
      else: 
        prediction = rating_mean[user_id] #사용자의 평가경향 고려 
  else: 
    prediction = rating_mean[user_id]
  #예측값이 1~5사이가 아닐 때 
  if prediction <= 1:
    precition = 1
  elif prediction >= 5:
    predction = 5
  
  return prediction 

In [20]:
SIG_LEVEL = 3
MIN_RATINGS = 3
score(CF_knn_bias_sig, 30) #0.9421966831664823

0.9471402279374662

#**3.8 사용자 기반 CF와 아이템 기반 CF**

In [27]:
 #score(RMSE) 계산 
def score(model):
  #테스트 데이터의 user_id와 movie_id 간 pair를 맞춰 투플형원소 리스트데이터를 만듦 
  id_pairs = zip(x_test['user_id'], x_test['movie_id'])
  #모든 사용자-영화 짝에 대해서 주어진 예측모델에 의해 예측값 계산 및 리스트형 데이터 생성 
  y_pred = np.array([model(user,movie) for (user,movie) in id_pairs])
  #실제 평점값 
  y_true = np.array(x_test['rating'])
  return RMSE(y_true, y_pred)

#데이터 셋 만들기 
x = ratings.copy() 
y = ratings['user_id']
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.25,stratify=y)

rating_matrix = x_train.pivot(index='user_id', columns ='movie_id', values = 'rating')

In [28]:
#아이템 기반 CF 
rating_matrix_t = np.transpose(rating_matrix)

matrix_dummy = rating_matrix_t.copy().fillna(0)

item_similarity = cosine_similarity(matrix_dummy,matrix_dummy)
item_similarity = pd.DataFrame(item_similarity, 
                               index = rating_matrix_t.index,
                               columns = rating_matrix_t.index)

In [29]:
item_similarity #movie_id 간의 similarity

movie_id,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,1637,1638,1639,1640,1641,1642,1643,1644,1645,1646,1647,1650,1651,1652,1653,1654,1655,1656,1657,1658,1659,1662,1663,1664,1665,1666,1667,1668,1669,1670,1671,1672,1673,1674,1675,1677,1678,1680,1681,1682
movie_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
1,1.000000,0.278975,0.235548,0.351751,0.173255,0.073354,0.477382,0.348390,0.353142,0.227550,0.358596,0.351291,0.301622,0.240165,0.411039,0.157932,0.210488,0.016328,0.125404,0.168252,0.263470,0.389383,0.252036,0.333659,0.426759,0.156414,0.160506,0.411225,0.280373,0.133061,0.300085,0.208125,0.223624,0.027998,0.117921,0.061668,0.043200,0.233222,0.246641,0.156293,...,0.027209,0.027209,0.048867,0.027209,0.027209,0.016997,0.027209,0.0,0.027209,0.016325,0.027209,0.027209,0.027209,0.0,0.0,0.0,0.054418,0.040813,0.040813,0.022639,0.068022,0.0,0.0,0.077740,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.040813,0.0,0.000000,0.040813,0.0,0.0,0.054418,0.054418
2,0.278975,1.000000,0.204305,0.385787,0.249841,0.086337,0.241768,0.215584,0.184438,0.120564,0.304546,0.320614,0.167966,0.099857,0.203677,0.047376,0.271152,0.060380,0.032903,0.047977,0.210659,0.362216,0.196420,0.308597,0.214505,0.107077,0.288278,0.348553,0.458537,0.074458,0.350993,0.087318,0.367965,0.073815,0.055517,0.240710,0.106500,0.446750,0.244310,0.149658,...,0.092231,0.092231,0.057616,0.092231,0.092231,0.057616,0.092231,0.0,0.092231,0.055339,0.092231,0.092231,0.092231,0.0,0.0,0.0,0.092231,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.087839,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.092231,0.092231
3,0.235548,0.204305,1.000000,0.280262,0.155435,0.024210,0.268037,0.137774,0.248329,0.124859,0.279044,0.286562,0.190119,0.129553,0.197897,0.094102,0.379909,0.070548,0.073168,0.082683,0.107592,0.240966,0.207196,0.305503,0.214630,0.169502,0.229294,0.175068,0.244125,0.100381,0.193296,0.140281,0.392887,0.123208,0.034595,0.054276,0.015554,0.164773,0.258266,0.148632,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.107763,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.020526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.035921,0.0,0.0,0.000000,0.107763
4,0.351751,0.385787,0.280262,1.000000,0.236357,0.079122,0.349582,0.360626,0.328108,0.240807,0.454196,0.474361,0.302889,0.194269,0.236531,0.131363,0.336127,0.089858,0.109241,0.180944,0.187630,0.469241,0.327282,0.320422,0.303658,0.281642,0.289088,0.375682,0.316077,0.174626,0.368256,0.325259,0.397558,0.063186,0.065227,0.166710,0.165793,0.306226,0.377254,0.269027,...,0.043345,0.043345,0.027078,0.043345,0.043345,0.077848,0.043345,0.0,0.043345,0.026007,0.043345,0.043345,0.043345,0.0,0.0,0.0,0.065018,0.086691,0.000000,0.000000,0.086691,0.0,0.0,0.049538,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.108363,0.043345,0.0,0.0,0.065018,0.086691
5,0.173255,0.249841,0.155435,0.236357,1.000000,0.015807,0.256219,0.186722,0.235605,0.019858,0.270600,0.264173,0.109830,0.059359,0.174800,0.075897,0.302389,0.023031,0.068014,0.024705,0.129910,0.246844,0.179669,0.200916,0.202219,0.147560,0.157968,0.255967,0.241672,0.062263,0.210626,0.095376,0.202284,0.096533,0.033881,0.188461,0.035544,0.327459,0.187585,0.195228,...,0.070360,0.070360,0.043953,0.070360,0.070360,0.043953,0.070360,0.0,0.070360,0.042216,0.070360,0.070360,0.070360,0.0,0.0,0.0,0.105540,0.000000,0.105540,0.000000,0.000000,0.0,0.0,0.060308,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1677,0.040813,0.000000,0.035921,0.043345,0.000000,0.000000,0.059576,0.096855,0.083091,0.000000,0.095433,0.078058,0.073171,0.084365,0.052303,0.000000,0.000000,0.000000,0.103572,0.000000,0.000000,0.030842,0.080112,0.098894,0.056725,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.129099,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,1.000000,0.0,0.0,0.000000,0.000000
1678,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,1.0,1.0,0.000000,0.000000
1680,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,1.0,1.0,0.000000,0.000000
1681,0.054418,0.092231,0.000000,0.065018,0.000000,0.000000,0.059576,0.000000,0.066473,0.000000,0.038173,0.046835,0.000000,0.000000,0.052303,0.000000,0.000000,0.000000,0.069048,0.039014,0.000000,0.077106,0.040056,0.098894,0.000000,0.000000,0.000000,0.034565,0.073472,0.000000,0.072718,0.000000,0.062500,0.000000,0.000000,0.000000,0.000000,0.000000,0.068802,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.0,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.0,0.0,1.000000,0.000000


In [31]:
def CF_IBCF(user_id, movie_id):
  if movie_id in item_similarity.columns: 
    sim_scores = item_similarity[movie_id]
    user_rating = rating_matrix_t[user_id]
    none_rating_idx = user_rating[user_rating.isnull()].index
    user_rating = user_rating.dropna()
    sim_scores = sim_scores.drop(none_rating_idx)
    mean_rating = np.dot(sim_scores, user_rating) / sim_scores.sum()
  else: 
    mean_rating = 3.0

  return mean_rating 

score(CF_IBCF) #1.0151004695180692

1.0151004695180692