In [1]:
using DataFrames, TableOperations, Tables, Random
using Recommenders: Movielens1M, load_dataset, ratio_split, ItemkNN, evaluate_u2i, MeanPrecision, MeanRecall, MeanNDCG

In [2]:
ml1M = Movielens1M()
download(ml1M)
rating, user, movie = load_dataset(ml1M);

In [3]:
rating = rating |> TableOperations.transform(Dict(:rating=>x->1.))

TableOperations.Transforms{true, CSV.File{false}, Dict{Symbol, var"#1#2"}}(CSV.File("/Users/keisuke.yanagi/workspace/Recommenders.jl/src/dataset/../../dataset/movielens1m/ratings.dat"):
Size: 1000209 x 4
Tables.Schema:
 :userid     Int64
 :movieid    Int64
 :rating     Int64
 :timestamp  Int64, Dict(:rating => var"#1#2"()))

In [4]:
Random.seed!(1234);
train_valid_table, test_table = ratio_split(rating, 0.8)

train_table, valid_table = ratio_split(train_valid_table, 0.8)
length(Tables.rows(train_table)), length(Tables.rows(valid_table)), length(Tables.rows(test_table))


(640134, 160033, 200042)

In [5]:
prec10 = MeanPrecision(10)
recall10 = MeanRecall(10)
ndcg10 = MeanNDCG(10)
metrics = [prec10, recall10, ndcg10]

3-element Vector{Recommenders.MeanMetric}:
 Recommenders.MeanMetric{Recommenders.Precision}(Recommenders.Precision(10, "precision"))
 Recommenders.MeanMetric{Recommenders.Recall}(Recommenders.Recall(10, "recall"))
 Recommenders.MeanMetric{Recommenders.NDCG}(Recommenders.NDCG(10, "ndcg"))

In [6]:
using TreeParzen

In [7]:
space = Dict(
    :topk=>HP.QuantUniform(:topk, 10., 500., 1.),
    :shrink=>HP.LogUniform(:shrink, log(1e-3), log(1e3)),
    :weighting=>HP.Choice(:weighting, 
        [
            Dict(:weighting=>:dummy, :weighting_at_inference=>false),
            Dict(:weighting=>:tfidf, :weighting_at_inference=>false),
            Dict(:weighting=>:bm25, :weighting_at_inference=>HP.Choice(:weighting_at_inference, [true, false]))
        ]
    ),
    :normalize=>HP.Choice(:normalize, [true, false]),
    :normalize_similarity=>HP.Choice(:normalize_similarity, [true, false])
)

Dict{Symbol, TreeParzen.Types.AbstractDelayed} with 5 entries:
  :weighting            => Choice(Param(:weighting, RandIndex(3)), Dict{Symbol,…
  :topk                 => QuantUniform(:topk, QuantUniform(10.0, 500.0, 1.0))
  :normalize            => Choice(Param(:normalize, RandIndex(2)), Bool[1, 0])
  :normalize_similarity => Choice(Param(:normalize_similarity, RandIndex(2)), B…
  :shrink               => LogUniform(:shrink, LogUniform(-6.90776, 6.90776))

In [8]:
function invert_output(params)
    k = convert(Int, params[:topk])
    model = ItemkNN(k, params[:shrink],params[:weighting][:weighting],params[:weighting][:weighting_at_inference],params[:normalize],params[:normalize_similarity])
    result = evaluate_u2i(model, train_table, valid_table, metrics, 10, col_user=:userid, col_item=:movieid, col_rating=:rating, drop_history=true)
    @info params, result
    return -result[:ndcg10]
end

invert_output (generic function with 1 method)

In [9]:
best = fmin(invert_output, space, 100, logging_interval=-1)

┌ Info: (Dict{Symbol, Any}(:weighting => Dict{Symbol, Any}(:weighting => :bm25, :weighting_at_inference => false), :topk => 486.0, :normalize => true, :normalize_similarity => false, :shrink => 0.015910234017397357), (ndcg10 = 0.21896030345055192, precision10 = 0.1900198938992067, recall10 = 0.11545900221409211))
└ @ Main In[8]:5
┌ Info: (Dict{Symbol, Any}(:weighting => Dict{Symbol, Any}(:weighting => :dummy, :weighting_at_inference => false), :topk => 417.0, :normalize => false, :normalize_similarity => false, :shrink => 0.5673740885280141), (ndcg10 = 0.1619520908466506, precision10 = 0.14142904509284215, recall10 = 0.0754406617408898))
└ @ Main In[8]:5
┌ Info: (Dict{Symbol, Any}(:weighting => Dict{Symbol, Any}(:weighting => :tfidf, :weighting_at_inference => false), :topk => 390.0, :normalize => true, :normalize_similarity => false, :shrink => 9.556256657892986), (ndcg10 = 0.2158780204181549, precision10 = 0.18746684350132897, recall10 = 0.11287998740571903))
└ @ Main In[8]:5
┌ Info:

Dict{Symbol, Any} with 5 entries:
  :weighting            => Dict{Symbol, Any}(:weighting=>:tfidf, :weighting_at_…
  :topk                 => 43.0
  :normalize            => true
  :normalize_similarity => true
  :shrink               => 0.453735

In [10]:
best_model = ItemkNN(convert(Int, best[:topk]), best[:shrink],best[:weighting][:weighting],best[:weighting][:weighting_at_inference],best[:normalize],best[:normalize_similarity])
evaluate_u2i(best_model, train_valid_table, test_table, metrics, 10, col_user=:userid, col_item=:movieid, col_rating=:rating, drop_history=true)

(ndcg10 = 0.37343299247542416,
 precision10 = 0.3311309819506524,
 recall10 = 0.14892059741684313,)