# Sample Code

## 載入資料

In [1]:
import pandas as pd
import numpy as np



In [2]:
## Sample Data

metadata = pd.DataFrame({
    'asin': ['A1', 'A2', 'AB3', 'B4', 'B5'],
    'title': ['Hello World', 'Hello Python', 'Python Tutorial', 'Java Tutorial', 'C++ Tutorial']
})
metadata

Unnamed: 0,asin,title
0,A1,Hello World
1,A2,Hello Python
2,AB3,Python Tutorial
3,B4,Java Tutorial
4,B5,C++ Tutorial


In [3]:
ratings = pd.DataFrame([
    {'asin': 'A1','reviewerID': 'USER1', 'overall': 5, 'unixReviewTime': 1424304000 ,'DATE': '2015-02-19'},
    {'asin': 'AB3','reviewerID': 'USER1', 'overall': 5, 'unixReviewTime': 1424304000 ,'DATE': '2015-02-19'},
    {'asin': 'B4','reviewerID': 'USER2', 'overall': 5, 'unixReviewTime': 1424304000 ,'DATE': '2015-02-19'},
    {'asin': 'AB3','reviewerID': 'USER2', 'overall': 5, 'unixReviewTime': 1424304000 ,'DATE': '2015-02-19'},
    {'asin': 'A1','reviewerID': 'USER1', 'overall': 5, 'unixReviewTime': 1537315200 ,'DATE': '2018-09-19'},
    {'asin': 'A2','reviewerID': 'USER1', 'overall': 5, 'unixReviewTime': 1537315200 ,'DATE': '2018-09-19'},
    {'asin': 'B5','reviewerID': 'USER2', 'overall': 5, 'unixReviewTime': 1537315200 ,'DATE': '2018-09-19'},
    {'asin': 'AB3','reviewerID': 'USER2', 'overall': 5, 'unixReviewTime': 1537315200 ,'DATE': '2018-09-19'},
])
ratings

Unnamed: 0,asin,reviewerID,overall,unixReviewTime,DATE
0,A1,USER1,5,1424304000,2015-02-19
1,AB3,USER1,5,1424304000,2015-02-19
2,B4,USER2,5,1424304000,2015-02-19
3,AB3,USER2,5,1424304000,2015-02-19
4,A1,USER1,5,1537315200,2018-09-19
5,A2,USER1,5,1537315200,2018-09-19
6,B5,USER2,5,1537315200,2018-09-19
7,AB3,USER2,5,1537315200,2018-09-19


In [4]:
metadata.head()

Unnamed: 0,asin,title
0,A1,Hello World
1,A2,Hello Python
2,AB3,Python Tutorial
3,B4,Java Tutorial
4,B5,C++ Tutorial


In [5]:
ratings.head()

Unnamed: 0,asin,reviewerID,overall,unixReviewTime,DATE
0,A1,USER1,5,1424304000,2015-02-19
1,AB3,USER1,5,1424304000,2015-02-19
2,B4,USER2,5,1424304000,2015-02-19
3,AB3,USER2,5,1424304000,2015-02-19
4,A1,USER1,5,1537315200,2018-09-19


## 資料整理

In [6]:
ratings['DATE'] = pd.to_datetime(ratings['unixReviewTime'], unit='s')

## 資料切分

In [7]:
ratings_trainings = ratings[
    (ratings['DATE'] < '2018-09-01')
]
ratings_testings = ratings[
    (ratings['DATE'] >= '2018-09-01') & 
    (ratings['DATE'] <= '2018-09-30')
]
ratings_testings_by_user = ratings_testings.groupby('reviewerID').agg(list).reset_index()[['reviewerID', 'asin']].to_dict('records')
ratings_testings_by_user = { rating['reviewerID']: rating['asin'] for rating in ratings_testings_by_user }
users = list(ratings_testings_by_user.keys())

In [8]:
ratings_trainings

Unnamed: 0,asin,reviewerID,overall,unixReviewTime,DATE
0,A1,USER1,5,1424304000,2015-02-19
1,AB3,USER1,5,1424304000,2015-02-19
2,B4,USER2,5,1424304000,2015-02-19
3,AB3,USER2,5,1424304000,2015-02-19


In [9]:
ratings_testings

Unnamed: 0,asin,reviewerID,overall,unixReviewTime,DATE
4,A1,USER1,5,1537315200,2018-09-19
5,A2,USER1,5,1537315200,2018-09-19
6,B5,USER2,5,1537315200,2018-09-19
7,AB3,USER2,5,1537315200,2018-09-19


In [10]:
users

['USER1', 'USER2']

In [11]:
ratings_testings_by_user

{'USER1': ['A1', 'A2'], 'USER2': ['B5', 'AB3']}

## 產生推薦

In [12]:
def recommender(training_data, users=[], k=2):
    '''
    * training_data: dataframe 輸入的訓練資料集（2018-09-01 以前資料）
    * users: [] 需要被推薦的使用者
    * k: int 每個使用者需要推薦的商品數
    * recommendations: dict
      {
          使用者一： [推薦商品一, 推薦商品二, ...],
          使用者二： [...], ...
      }
    '''
    recommendations = {}
    '''
    Your Code
    '''
    return recommendations

ratings_by_user = recommender(ratings_trainings, users)
ratings_by_user

{}

## 結果評估

In [13]:
def evaluate(ratings_testings_by_user={}, ratings_by_user={}, method=None):
    '''
    * ratings_testings_by_user: dict 真實被購買的商品資料（2018-09-01 以後資料）
    * ratings_by_user: dict 利用訓練資料學習的推薦商品
    * method: str
    * score: float
    '''
    total = 0
    for d in ratings_testings_by_user:
        if d in ratings_by_user:
            total += len(set(ratings_by_user[d]) & set(ratings_testings_by_user[d]))

    score = total / len(ratings_testings)
    return score

evaluate(ratings_testings_by_user, ratings_by_user)

0.0

In [14]:
ratings_testings_by_user

{'USER1': ['A1', 'A2'], 'USER2': ['B5', 'AB3']}

In [15]:
ratings_by_user

{}