This repository has been archived by the owner on Apr 26, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 6
/
README.jl
100 lines (55 loc) · 2 KB
/
README.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
using JLBoost, RDatasets
iris = dataset("datasets", "iris")
iris[!, :is_setosa] = iris[!, :Species] .== "setosa"
target = :is_setosa
features = setdiff(names(iris), ["Species", "is_setosa"])
# fit one tree
# ?jlboost for more details
xgtreemodel = jlboost(iris, target)
typeof(trees(xgtreemodel))
typeof(xgtreemodel.loss)
typeof(xgtreemodel.target)
xgtreemodel2 = jlboost(iris, target; nrounds = 2, max_depth = 2)
xgtreemodel3 = jlboost(iris, target; nrounds = 2, max_leaves = 8, max_depth = 0)
iris.pred1 = JLBoost.predict(xgtreemodel, iris)
iris.pred2 = JLBoost.predict(xgtreemodel2, iris)
iris.pred1_plus_2 = JLBoost.predict(vcat(xgtreemodel, xgtreemodel2), iris)
JLBoost.AUC(-iris.pred1, iris.is_setosa)
JLBoost.gini(-iris.pred1, iris.is_setosa)
feature_importance(xgtreemodel2, iris)
nrow(df) # returns the number of rows
ncol(df)
view(df, rows, cols)
using DataFrames
using JLBoost
df = DataFrame(x = rand(100) * 100)
df[!, :y] = 2*df.x .+ rand(100)
target = :y
features = [:x]
warm_start = fill(0.0, nrow(df))
using LossFunctions: L2DistLoss
loss = L2DistLoss()
jlboost(df, target, features, warm_start, loss; max_depth=2) # default max_depth = 6
JLBoost.save(xgtreemodel, "model.jlb")
JLBoost.save(trees(xgtreemodel), "model_tree.jlb")
JLBoost.load("model.jlb")
JLBoost.load("model_tree.jlb")
using JLBoost, RDatasets, JDF
iris = dataset("datasets", "iris")
iris[!, :is_setosa] = iris[!, :Species] .== "setosa"
target = :is_setosa
features = setdiff(names(iris), [:Species, :is_setosa])
savejdf("iris.jdf", iris)
irisdisk = JDFFile("iris.jdf")
# fit using on disk JDF format
xgtree1 = jlboost(irisdisk, target, features)
xgtree2 = jlboost(iris, target, features; nrounds = 2, max_depth = 2)
# predict using on disk JDF format
iris.pred1 = predict(xgtree1, irisdisk)
iris.pred2 = predict(xgtree2, irisdisk)
# AUC
AUC(-predict(xgtree1, irisdisk), irisdisk[!, :is_setosa])
# gini
gini(-predict(xgtree1, irisdisk), irisdisk[!, :is_setosa])
# clean up
rm("iris.jdf", force=true, recursive=true)