-
Notifications
You must be signed in to change notification settings - Fork 28
/
ml_xgb.clj
119 lines (117 loc) · 4.46 KB
/
ml_xgb.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
(ns zero-one.geni.ml-xgb
(:require
[zero-one.geni.interop :as interop]
[zero-one.geni.utils :refer [coalesce]])
(:import
(ml.dmlc.xgboost4j.scala.spark XGBoostClassifier
XGBoostRegressor)))
(defn xgboost-classifier [params]
(let [defaults {:num-round 1,
:dmlc-worker-connect-retry 5,
:subsample 1.0,
:num-early-stopping-rounds 0,
:cache-training-set false,
:allow-non-zero-for-missing false,
:checkpoint-path "",
:verbosity 1,
:scale-pos-weight 1.0,
:raw-prediction-col "rawPrediction",
:lambda 1.0,
:silent 0,
:num-workers 1,
:min-child-weight 1.0,
:rabit-timeout -1,
:colsample-bylevel 1.0,
:nthread 1,
:max-bin 16,
:seed 0,
:label-col "label",
:tree-method "auto",
:normalize-type "tree",
:checkpoint-interval -1,
:sample-type "uniform",
:probability-col "probability",
:gamma 0.0,
:alpha 0.0,
:skip-drop 0.0,
:rabit-ring-reduce-threshold 32768,
:train-test-ratio 1.0,
:grow-policy "depthwise",
:lambda-bias 0.0,
:use-external-memory false,
:rate-drop 0.0,
:tree-limit 0,
:objective "reg:squarederror",
:missing 0.0,
:max-depth 6,
:custom-eval nil,
:sketch-eps 0.03,
:custom-obj nil,
:max-delta-step 0.0,
:colsample-bytree 1.0,
:prediction-col "prediction",
:timeout-request-workers 1800000,
:features-col "features",
:eta 0.3,
:base-score 0.5}
max-bin (coalesce (:max-bin params)
(:max-bins params)
(:max-bin defaults))
props (-> defaults
(merge params)
(assoc :max-bins max-bin))]
(interop/instantiate XGBoostClassifier props)))
(defn xgboost-regressor [params]
(let [defaults {:num-round 1,
:dmlc-worker-connect-retry 5,
:subsample 1.0,
:num-early-stopping-rounds 0,
:cache-training-set false,
:allow-non-zero-for-missing false,
:checkpoint-path "",
:verbosity 1,
:scale-pos-weight 1.0,
:lambda 1.0,
:silent 0,
:num-workers 1,
:min-child-weight 1.0,
:rabit-timeout -1,
:colsample-bylevel 1.0,
:nthread 1,
:max-bin 16,
:seed 0,
:label-col "label",
:tree-method "auto",
:normalize-type "tree",
:checkpoint-interval -1,
:sample-type "uniform",
:gamma 0.0,
:alpha 0.0,
:skip-drop 0.0,
:rabit-ring-reduce-threshold 32768,
:train-test-ratio 1.0,
:grow-policy "depthwise",
:lambda-bias 0.0,
:use-external-memory false,
:rate-drop 0.0,
:tree-limit 0,
:objective "reg:squarederror",
:missing 0.0,
:max-depth 6,
:custom-eval nil,
:sketch-eps 0.03,
:custom-obj nil,
:max-delta-step 0.0,
:colsample-bytree 1.0,
:prediction-col "prediction",
:timeout-request-workers 1800000,
:features-col "features",
:eta 0.3,
:base-score 0.5}
max-bin (coalesce (:max-bin params)
(:max-bins params)
(:max-bin defaults))
props (-> defaults
(merge params)
(assoc :max-bins max-bin))]
(interop/instantiate XGBoostRegressor props)))