In [1]:
from rule_recommender.utils import DatasetLoader, Dataset

In [2]:
import datetime
import pandas as pd

In [3]:
dataset_loader = DatasetLoader()

In [4]:
dataset_loader.download_dataset()
dataset_loader.prepare_data()

In [5]:
dataset = Dataset(dataset_loader)

In [6]:
train, test = dataset.get_train_test()

In [7]:
def popularity_recommender(training_data, top_k=5, week_delta=1, rate_low_bound=1, base_date=datetime.date(2018,9,1)):
    # data filter
    df = training_data[training_data.DATE>pd.Timestamp(base_date-datetime.timedelta(days=7*week_delta))]
    # rate filter
    df = df[df.overall>rate_low_bound]
    # times_filter
    v_count = df.asin.value_counts()
    # discard 1 times stuff
    recommend_list = v_count[v_count>1].index[:top_k].values.tolist()
    return recommend_list

In [8]:
def recommender(training_data, users=[]):
    top_k = [1,3,5,10,15,20]
    rate_low_bound = [0,1,2,3,4]
    week_delta = list(range(1,14))
    
    result = [popularity_recommender(training_data,top_k=k,week_delta=delta,rate_low_bound=rate) 
                for delta in week_delta 
                  for rate in rate_low_bound 
                      for k in top_k]
        
    return result

In [9]:
def evaluate(ratings_testings_by_user={}, recommend_list=[], method=None):
    '''
    * ratings_testings_by_user: dict 真實被購買的商品資料（2018-09-01 以後資料）
    * recommend_list: list 利用訓練資料學習的推薦商品
    * method: str
    * score: float
    '''
    total = 0
    for d in ratings_testings_by_user:
        total += len(set(recommend_list) & set(ratings_testings_by_user[d]))

    score = total / len(ratings_testings_by_user)
    return score



In [10]:
def recommend_and_evaluation(training_data,
                             top_k = [1,3,5,10,15,20],
                             rate_low_bound = [0,1,2,3,4],
                             week_delta = list(range(1,14)),
                             base_date=datetime.date(2018,9,1)
                            ):
    for delta in week_delta:
        for rate in rate_low_bound:
            for k in top_k:
                result = popularity_recommender(training_data,top_k=k,week_delta=delta,rate_low_bound=rate,base_date=base_date)
                score = round(evaluate(dataset.ratings_testings_by_user, result),3)
                print("date_range:{start_date}~{end_date}, rating_minimun_thresold:{rate},top_k:{top_k} socre:{score}".format(start_date=base_date-datetime.timedelta(days=7*delta),
                                                                                                                              end_date=base_date,
                                                                                                                              rate=rate,
                                                                                                                              top_k=k,
                                                                                                                              score=score
                                                                                                                             ))

In [11]:
recommend_and_evaluation(train)

date_range:2018-08-25~2018-09-01, rating_minimun_thresold:0,top_k:1 socre:0.084
date_range:2018-08-25~2018-09-01, rating_minimun_thresold:0,top_k:3 socre:0.098
date_range:2018-08-25~2018-09-01, rating_minimun_thresold:0,top_k:5 socre:0.134
date_range:2018-08-25~2018-09-01, rating_minimun_thresold:0,top_k:10 socre:0.158
date_range:2018-08-25~2018-09-01, rating_minimun_thresold:0,top_k:15 socre:0.17
date_range:2018-08-25~2018-09-01, rating_minimun_thresold:0,top_k:20 socre:0.182
date_range:2018-08-25~2018-09-01, rating_minimun_thresold:1,top_k:1 socre:0.084
date_range:2018-08-25~2018-09-01, rating_minimun_thresold:1,top_k:3 socre:0.098
date_range:2018-08-25~2018-09-01, rating_minimun_thresold:1,top_k:5 socre:0.115
date_range:2018-08-25~2018-09-01, rating_minimun_thresold:1,top_k:10 socre:0.156
date_range:2018-08-25~2018-09-01, rating_minimun_thresold:1,top_k:15 socre:0.17
date_range:2018-08-25~2018-09-01, rating_minimun_thresold:1,top_k:20 socre:0.183
date_range:2018-08-25~2018-09-01, ra

In [12]:
recommend_and_evaluation(training_data=train,
                         top_k = [1,3,5],
                         rate_low_bound = [1],
                         week_delta = list(range(1,14)),
                         base_date=datetime.date(2018,9,1))

date_range:2018-08-25~2018-09-01, rating_minimun_thresold:1,top_k:1 socre:0.084
date_range:2018-08-25~2018-09-01, rating_minimun_thresold:1,top_k:3 socre:0.098
date_range:2018-08-25~2018-09-01, rating_minimun_thresold:1,top_k:5 socre:0.115
date_range:2018-08-18~2018-09-01, rating_minimun_thresold:1,top_k:1 socre:0.003
date_range:2018-08-18~2018-09-01, rating_minimun_thresold:1,top_k:3 socre:0.101
date_range:2018-08-18~2018-09-01, rating_minimun_thresold:1,top_k:5 socre:0.113
date_range:2018-08-11~2018-09-01, rating_minimun_thresold:1,top_k:1 socre:0.084
date_range:2018-08-11~2018-09-01, rating_minimun_thresold:1,top_k:3 socre:0.087
date_range:2018-08-11~2018-09-01, rating_minimun_thresold:1,top_k:5 socre:0.103
date_range:2018-08-04~2018-09-01, rating_minimun_thresold:1,top_k:1 socre:0.084
date_range:2018-08-04~2018-09-01, rating_minimun_thresold:1,top_k:3 socre:0.087
date_range:2018-08-04~2018-09-01, rating_minimun_thresold:1,top_k:5 socre:0.103
date_range:2018-07-28~2018-09-01, rating