-
Notifications
You must be signed in to change notification settings - Fork 28
/
ml_regression.clj
129 lines (122 loc) · 5.65 KB
/
ml_regression.clj
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
(ns zero-one.geni.ml-regression
(:require
[zero-one.geni.interop :as interop]
[zero-one.geni.utils :refer [coalesce]])
(:import
(org.apache.spark.ml.regression AFTSurvivalRegression
DecisionTreeRegressor
GBTRegressor
GeneralizedLinearRegression
IsotonicRegression
LinearRegression
RandomForestRegressor)))
(defn linear-regression [params]
(let [defaults {:max-iter 100,
:tol 1.0E-6,
:elastic-net-param 0.0,
:reg-param 0.0,
:aggregation-depth 2,
:fit-intercept true,
:label-col "label",
:standardization true,
:epsilon 1.35,
:loss "squaredError",
:prediction-col "prediction",
:features-col "features",
:solver "auto"}
std (coalesce (:standardisation params)
(:standardization params)
(:standardization defaults))
props (-> defaults
(merge params)
(assoc :standardization std))]
(interop/instantiate LinearRegression props)))
(defn generalised-linear-regression [params]
(let [defaults {:max-iter 25,
:variance-power 0.0,
:family "gaussian",
:tol 1.0E-6,
:reg-param 0.0,
:fit-intercept true,
:label-col "label",
:prediction-col "prediction",
:features-col "features",
:solver "irls"}
props (merge defaults params)]
(interop/instantiate GeneralizedLinearRegression props)))
(def generalized-linear-regression generalised-linear-regression)
(def glm generalised-linear-regression)
(defn decision-tree-regressor [params]
(let [defaults {:max-bins 32,
:min-info-gain 0.0,
:impurity "variance",
:cache-node-ids false,
:seed 926680331,
:label-col "label",
:checkpoint-interval 10,
:max-depth 5,
:max-memory-in-mb 256,
:prediction-col "prediction",
:features-col "features",
:min-instances-per-node 1}
props (merge defaults params)]
(interop/instantiate DecisionTreeRegressor props)))
(defn random-forest-regressor [params]
(let [defaults {:max-bins 32,
:subsampling-rate 1.0,
:min-info-gain 0.0,
:impurity "variance",
:cache-node-ids false,
:seed 235498149,
:label-col "label",
:feature-subset-strategy "auto",
:checkpoint-interval 10,
:max-depth 5,
:max-memory-in-mb 256,
:prediction-col "prediction",
:features-col "features",
:min-instances-per-node 1,
:num-trees 20}
props (merge defaults params)]
(interop/instantiate RandomForestRegressor props)))
(defn gbt-regressor [params]
(let [defaults {:max-bins 32,
:subsampling-rate 1.0,
:max-iter 20,
:step-size 0.1,
:min-info-gain 0.0,
:impurity "variance",
:cache-node-ids false,
:seed -131597770,
:label-col "label",
:feature-subset-strategy "all",
:checkpoint-interval 10,
:loss-type "squared",
:max-depth 5,
:max-memory-in-mb 256,
:prediction-col "prediction",
:features-col "features",
:min-instances-per-node 1}
props (merge defaults params)]
(interop/instantiate GBTRegressor props)))
(defn aft-survival-regression [params]
(let [q-probs [0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99]
defaults {:max-iter 100,
:tol 1.0E-6,
:quantile-probabilities q-probs,
:aggregation-depth 2,
:fit-intercept true,
:label-col "label",
:censor-col "censor",
:prediction-col "prediction",
:features-col "features"}
props (-> (merge defaults params))]
(interop/instantiate AFTSurvivalRegression props)))
(defn isotonic-regression [params]
(let [defaults {:prediction-col "prediction",
:features-col "features",
:isotonic true,
:label-col "label",
:feature-index 0}
props (-> (merge defaults params))]
(interop/instantiate IsotonicRegression props)))