In [12]:
using DataFrames, TableOperations, Tables, Random
using Recommenders: Movielens100k, load_dataset, ratio_split, Randomwalk, evaluate_u2i, MeanPrecision, MeanRecall, MeanNDCG, fit!, predict_u2i,

In [2]:
ml100k = Movielens100k()
download(ml100k)
rating, user, movie = load_dataset(ml100k);

In [3]:
rating = rating |> TableOperations.transform(Dict(:rating=>x->1.))

TableOperations.Transforms{true, CSV.File, Dict{Symbol, var"#1#2"}}(CSV.File("/Users/keisuke.yanagi/workspace/Recommender.jl/src/dataset/../../dataset/movielens100k/u.data"):
Size: 100000 x 4
Tables.Schema:
 :userid     Int64
 :movieid    Int64
 :rating     Int64
 :timestamp  Int64, Dict(:rating => var"#1#2"()))

In [4]:
Random.seed!(1234);
train_valid_table, test_table = ratio_split(rating, 0.8)

train_table, valid_table = ratio_split(train_valid_table, 0.8)
length(Tables.rows(train_table)), length(Tables.rows(valid_table)), length(Tables.rows(test_table))


(64000, 16000, 20000)

In [5]:
prec10 = MeanPrecision(10)
recall10 = MeanRecall(10)
ndcg10 = MeanNDCG(10)
metrics = [prec10, recall10, ndcg10]

3-element Vector{Recommenders.MeanMetric}:
 Recommenders.MeanMetric{Recommenders.Precision}(Recommenders.Precision(10, "precision"))
 Recommenders.MeanMetric{Recommenders.Recall}(Recommenders.Recall(10, "recall"))
 Recommenders.MeanMetric{Recommenders.NDCG}(Recommenders.NDCG(10, "ndcg"))

In [6]:
using TreeParzen

In [7]:
space = Dict(
    :terminate_prob=>HP.QuantUniform(:terminate_prob, 0.1, 0.9, 0.1),
    :total_walk_length=>HP.LogUniform(:total_walk_length, log(100), log(1e5)),
    :min_high_visited_candidates=>HP.Choice(:min_high_visited_candidates, [10, 50, 100, 250, 500, 1000]),
    :high_visited_count_threshold=>HP.Choice(:high_visited_count_threshold, [2, 4, 8, 16, 32, 64, Inf]),
    :pixie_walk_length_scaling=>HP.Choice(:pixie_walk_length_scaling, [true, false]),
    :pixie_multi_hit_boosting=>HP.Choice(:pixie_multi_hit_boosting, [true, false])
)

Dict{Symbol, TreeParzen.Types.AbstractDelayed} with 6 entries:
  :pixie_walk_length_scaling    => Choice(Param(:pixie_walk_length_scaling, Ran…
  :pixie_multi_hit_boosting     => Choice(Param(:pixie_multi_hit_boosting, Rand…
  :terminate_prob               => QuantUniform(:terminate_prob, QuantUniform(0…
  :total_walk_length            => LogUniform(:total_walk_length, LogUniform(4.…
  :min_high_visited_candidates  => Choice(Param(:min_high_visited_candidates, R…
  :high_visited_count_threshold => Choice(Param(:high_visited_count_threshold, …

In [24]:
function invert_output(params)
    terminate_prob = params[:terminate_prob]
    total_walk_length = round(Int, params[:total_walk_length])
    min_high_visited_candidates = convert(Int, params[:min_high_visited_candidates])
    high_visited_count_threshold = params[:high_visited_count_threshold]
    if high_visited_count_threshold < Inf
        high_visited_count_threshold = convert(Int, high_visited_count_threshold)
    end
    pixie_walk_length_scaling = params[:pixie_walk_length_scaling]
    pixie_multi_hit_boosting = params[:pixie_multi_hit_boosting]

    model = Randomwalk()
    result = evaluate_u2i(
        model,
        train_table,
        valid_table, metrics,
        10,
        col_user=:userid,
        col_item=:movieid,
        col_rating=:rating,
        drop_history=true,
        terminate_prob=terminate_prob,
        total_walk_length=total_walk_length,
        min_high_visited_candidates=min_high_visited_candidates,
        high_visited_count_threshold=high_visited_count_threshold,
        pixie_walk_length_scaling=pixie_walk_length_scaling,
        pixie_multi_hit_boosting=pixie_multi_hit_boosting
    )
    @info params, result
    return -result[:ndcg10]
end

invert_output (generic function with 1 method)

In [25]:
best = fmin(invert_output, space, 100, logging_interval=-1)

┌ Info: (Dict{Symbol, Any}(:pixie_walk_length_scaling => false, :pixie_multi_hit_boosting => false, :terminate_prob => 0.4, :total_walk_length => 8418.626505476654, :min_high_visited_candidates => 1000, :high_visited_count_threshold => 64.0), (ndcg10 = 0.19117675883864965, precision10 = 0.15329787234042494, recall10 = 0.14270662871994222))
└ @ Main In[24]:29
┌ Info: (Dict{Symbol, Any}(:pixie_walk_length_scaling => true, :pixie_multi_hit_boosting => true, :terminate_prob => 0.8, :total_walk_length => 29101.080159576297, :min_high_visited_candidates => 50, :high_visited_count_threshold => Inf), (ndcg10 = 0.21381181471898275, precision10 = 0.1667021276595735, recall10 = 0.1588470890926771))
└ @ Main In[24]:29
┌ Info: (Dict{Symbol, Any}(:pixie_walk_length_scaling => false, :pixie_multi_hit_boosting => true, :terminate_prob => 0.5, :total_walk_length => 19465.099600589405, :min_high_visited_candidates => 10, :high_visited_count_threshold => 4.0), (ndcg10 = 0.20112198501695466, precision10 =

Dict{Symbol, Any} with 6 entries:
  :pixie_walk_length_scaling    => false
  :pixie_multi_hit_boosting     => false
  :terminate_prob               => 0.9
  :total_walk_length            => 51307.2
  :min_high_visited_candidates  => 500
  :high_visited_count_threshold => 64.0

In [26]:
model = Randomwalk()

terminate_prob = best[:terminate_prob]
total_walk_length = round(Int, best[:total_walk_length])
min_high_visited_candidates = convert(Int, best[:min_high_visited_candidates])
high_visited_count_threshold = best[:high_visited_count_threshold]
if high_visited_count_threshold < Inf
    high_visited_count_threshold = convert(Int, high_visited_count_threshold)
end
pixie_walk_length_scaling = best[:pixie_walk_length_scaling]
pixie_multi_hit_boosting = best[:pixie_multi_hit_boosting]

result = evaluate_u2i(
    model,
    train_valid_table,
    test_table, metrics,
    10,
    col_user=:userid,
    col_item=:movieid,
    col_rating=:rating,
    drop_history=true,
    terminate_prob=terminate_prob,
    total_walk_length=total_walk_length,
    min_high_visited_candidates=min_high_visited_candidates,
    high_visited_count_threshold=high_visited_count_threshold,
    pixie_walk_length_scaling=pixie_walk_length_scaling,
    pixie_multi_hit_boosting=pixie_multi_hit_boosting
)

(ndcg10 = 0.3254783289314545,
 precision10 = 0.26341463414634075,
 recall10 = 0.18283663305964448,)