# Item to Item Recommend Example (Collaborative Filtering 사용 예제)
- kaggle의 Movielens data 사용

## Dataset
- ratings.csv : user가 movie에 평가 매긴 data
- movies.csv : 영화 정보 data

In [16]:
import pandas as pd
import numpy as np

In [17]:
rating_data=pd.read_csv('./ratings.csv')
movie_data=pd.read_csv('./movies.csv')

In [18]:
rating_data.head(2)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,31,2.5,1260759144
1,1,1029,3.0,1260759179


In [19]:
movie_data.head(2)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy


## 2. EDA
### 2-1 ) 불필요한 column 제거

In [20]:
rating_data.drop('timestamp', axis=1, inplace=True)
rating_data.head(2)

Unnamed: 0,userId,movieId,rating
0,1,31,2.5
1,1,1029,3.0


### 2-2 ) movie_Id를 기준으로 merge

In [21]:
user_movie_rating=pd.merge(rating_data, movie_data, on='movieId')

In [22]:
user_movie_rating.head(2)

Unnamed: 0,userId,movieId,rating,title,genres
0,1,31,2.5,Dangerous Minds (1995),Drama
1,7,31,3.0,Dangerous Minds (1995),Drama


## 3. Pivot Table 


In [23]:
movie_user_rating= user_movie_rating.pivot_table('rating', index='title',columns='userId')

In [24]:
movie_user_rating.head()

userId,1,2,3,4,5,6,7,8,9,10,...,662,663,664,665,666,667,668,669,670,671
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Great Performances"" Cats (1998)",,,,,,,,,,,...,,,,,,,,,,
$9.99 (2008),,,,,,,,,,,...,,,,,,,,,,
'Hellboy': The Seeds of Creation (2004),,,,,,,,,,,...,,,,,,,,,,
'Neath the Arizona Skies (1934),,,,,,,,,,,...,,,,,,,,,,
'Round Midnight (1986),,,,,,,,,,,...,,,,,,,,,,


## 4. Null값 채우기
- NaN값을 0으로 채우기

In [25]:
movie_user_rating.fillna(0,inplace=True)
movie_user_rating.head(3)

userId,1,2,3,4,5,6,7,8,9,10,...,662,663,664,665,666,667,668,669,670,671
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Great Performances"" Cats (1998)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$9.99 (2008),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Hellboy': The Seeds of Creation (2004),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## 5. Cosine Similarity 구하기
- 평점이 비슷한 아이템을 유사한 아이템이라고 생각
- 자기 자신의 경우 유사도가 1
- 유사도가 가까울수록 1에 가까운 값 나타냄

In [27]:
from sklearn.metrics.pairwise import cosine_similarity

In [28]:
item_based_collabor=cosine_similarity(movie_user_rating)
item_based_collabor

array([[1.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 1.        , 0.        , ..., 0.05821787, 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.05821787, 0.        , ..., 1.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 1.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        1.        ]])

In [29]:
reco_vector=pd.DataFrame(data=item_based_collabor, index=movie_user_rating.index, columns=movie_user_rating.index)

In [30]:
reco_vector.head()

title,"""Great Performances"" Cats (1998)",$9.99 (2008),'Hellboy': The Seeds of Creation (2004),'Neath the Arizona Skies (1934),'Round Midnight (1986),'Salem's Lot (2004),'Til There Was You (1997),"'burbs, The (1989)",'night Mother (1986),(500) Days of Summer (2009),...,Zulu (1964),Zulu (2013),[REC] (2007),eXistenZ (1999),loudQUIETloud: A Film About the Pixies (2006),xXx (2002),xXx: State of the Union (2005),¡Three Amigos! (1986),À nous la liberté (Freedom for Us) (1931),İtirazım Var (2014)
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"""Great Performances"" Cats (1998)",1.0,0.0,0.0,0.164399,0.020391,0.0,0.014046,0.0,0.0,0.003166,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
$9.99 (2008),0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.079474,0.0,0.15633,...,0.0,0.0,0.0,0.0,0.0,0.013899,0.0,0.058218,0.0,0.0
'Hellboy': The Seeds of Creation (2004),0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.217357,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Neath the Arizona Skies (1934),0.164399,0.0,0.0,1.0,0.124035,0.0,0.085436,0.0,0.0,0.019259,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
'Round Midnight (1986),0.020391,0.0,0.0,0.124035,1.0,0.0,0.010597,0.143786,0.0,0.136163,...,0.0,0.0,0.0,0.121567,0.0,0.0,0.0,0.0,0.0,0.0


## 6. 영화 추천

In [42]:
def get_item(title):
    return reco_vector[title].sort_values(ascending=False)[1:6]

In [43]:
get_item('Godfather, The (1972)')

title
Godfather: Part II, The (1974)               0.773685
Goodfellas (1990)                            0.620349
One Flew Over the Cuckoo's Nest (1975)       0.568244
American Beauty (1999)                       0.557997
Star Wars: Episode IV - A New Hope (1977)    0.546750
Name: Godfather, The (1972), dtype: float64