In [1]:
using Pkg; Pkg.activate(dirname(pwd()))

[32m[1m  Activating[22m[39m project at `/media/yuehhua/Workbench/workspace/machine-learning.jl`


# Model selection

In [2]:
using RDatasets
using MLJ

## Load data

In [3]:
iris = dataset("datasets", "iris")
first(iris, 6)

Unnamed: 0_level_0,SepalLength,SepalWidth,PetalLength,PetalWidth,Species
Unnamed: 0_level_1,Float64,Float64,Float64,Float64,Cat…
1,5.1,3.5,1.4,0.2,setosa
2,4.9,3.0,1.4,0.2,setosa
3,4.7,3.2,1.3,0.2,setosa
4,4.6,3.1,1.5,0.2,setosa
5,5.0,3.6,1.4,0.2,setosa
6,5.4,3.9,1.7,0.4,setosa


## Preprocessing

In [4]:
y, X = unpack(iris, ==(:Species), colname -> true);
first(X, 6)

Unnamed: 0_level_0,SepalLength,SepalWidth,PetalLength,PetalWidth
Unnamed: 0_level_1,Float64,Float64,Float64,Float64
1,5.1,3.5,1.4,0.2
2,4.9,3.0,1.4,0.2
3,4.7,3.2,1.3,0.2
4,4.6,3.1,1.5,0.2
5,5.0,3.6,1.4,0.2
6,5.4,3.9,1.7,0.4


## Model

In [5]:
LogisticClassifier = @load LogisticClassifier pkg=MLJLinearModels

import MLJLinearModels ✔


┌ Info: For silent loading, specify `verbosity=0`. 
└ @ Main /home/yuehhua/.julia/packages/MLJModels/lDzCR/src/loading.jl:168


MLJLinearModels.LogisticClassifier

In [6]:
info(LogisticClassifier)

(name = "LogisticClassifier",
 package_name = "MLJLinearModels",
 is_supervised = true,
 abstract_type = Probabilistic,
 deep_properties = (),
 docstring = "Classifier corresponding to the loss function ``L(...",
 fit_data_scitype =
     Tuple{Table{<:AbstractVector{<:Continuous}}, AbstractVector{<:Finite}},
 human_name = "logistic classifier",
 hyperparameter_ranges =
     (nothing, nothing, nothing, nothing, nothing, nothing, nothing),
 hyperparameter_types = ("Real",
                         "Real",
                         "Union{String, Symbol}",
                         "Bool",
                         "Bool",
                         "Bool",
                         "Union{Nothing, MLJLinearModels.Solver}"),
 hyperparameters = (:lambda,
                    :gamma,
                    :penalty,
                    :fit_intercept,
                    :penalize_intercept,
                    :scale_penalty_with_samples,
                    :solver),
 implemented_methods = [:Logisti

In [7]:
clsfier = LogisticClassifier()

LogisticClassifier(
  lambda = 1.0, 
  gamma = 0.0, 
  penalty = :l2, 
  fit_intercept = true, 
  penalize_intercept = false, 
  scale_penalty_with_samples = true, 
  solver = nothing)

In [8]:
r = range(clsfier, :lambda, lower=1e-3, upper=1.0, scale=:log);

In [9]:
tuning_model = TunedModel(model=clsfier,
                          resampling = CV(nfolds=5),
                          tuning = Grid(),
                          range = r,
                          measure = accuracy)

ProbabilisticTunedModel(
  model = LogisticClassifier(
        lambda = 1.0, 
        gamma = 0.0, 
        penalty = :l2, 
        fit_intercept = true, 
        penalize_intercept = false, 
        scale_penalty_with_samples = true, 
        solver = nothing), 
  tuning = Grid(
        goal = nothing, 
        resolution = 10, 
        shuffle = true, 
        rng = Random._GLOBAL_RNG()), 
  resampling = CV(
        nfolds = 5, 
        shuffle = false, 
        rng = Random._GLOBAL_RNG()), 
  measure = Accuracy(), 
  weights = nothing, 
  class_weights = nothing, 
  operation = nothing, 
  range = NumericRange(0.001 ≤ lambda ≤ 1.0; origin=0.5005, unit=0.4995) on log scale, 
  selection_heuristic = MLJTuning.NaiveSelection(nothing), 
  train_best = true, 
  repeats = 1, 
  n = nothing, 
  acceleration = CPU1{Nothing}(nothing), 
  acceleration_resampling = CPU1{Nothing}(nothing), 
  check_measure = true, 
  cache = true)

## Training

In [10]:
best_clsfier = machine(clsfier, X, y) |> fit!

┌ Info: Training machine(LogisticClassifier(lambda = 1.0, …), …).
└ @ MLJBase /home/yuehhua/.julia/packages/MLJBase/rQDaq/src/machines.jl:487
┌ Info: Solver: MLJLinearModels.LBFGS()
└ @ MLJLinearModels /home/yuehhua/.julia/packages/MLJLinearModels/2qDvV/src/mlj/interface.jl:76


Machine trained 1 time; caches data
  model: LogisticClassifier(lambda = 1.0, …)
  args: 
    1:	Source @921 ⏎ `Table{AbstractVector{Continuous}}`
    2:	Source @983 ⏎ `AbstractVector{Multiclass{3}}`


## Predict

In [11]:
ŷ = predict_mode(best_clsfier, rows=1:100)

100-element CategoricalArrays.CategoricalArray{String,1,UInt8}:
 "setosa"
 "setosa"
 "setosa"
 "setosa"
 "setosa"
 "setosa"
 "setosa"
 "setosa"
 "setosa"
 "setosa"
 "setosa"
 "setosa"
 "setosa"
 ⋮
 "versicolor"
 "versicolor"
 "versicolor"
 "virginica"
 "versicolor"
 "versicolor"
 "versicolor"
 "versicolor"
 "versicolor"
 "versicolor"
 "versicolor"
 "versicolor"

## Evaluation

In [12]:
accuracy(ŷ, y[1:100])

0.83