-
Notifications
You must be signed in to change notification settings - Fork 2
/
kann.h
271 lines (231 loc) · 10.2 KB
/
kann.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
/*
The MIT License
Copyright (c) 2016, 2017 Broad Institute
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#ifndef KANN_H
#define KANN_H
#define KANN_VERSION "r536"
#define KANN_F_IN 0x1 /* input */
#define KANN_F_OUT 0x2 /* output */
#define KANN_F_TRUTH 0x4 /* truth output */
#define KANN_F_COST 0x8 /* final cost */
#define KANN_C_CEB 1 /* binary cross-entropy cost, used with sigmoid */
#define KANN_C_CEM 2 /* multi-class cross-entropy cost, used with softmax */
#define KANN_C_CEB_NEG 3 /* binary cross-enytopy-like cost, used with tanh */
#define KANN_C_MSE 4 /* mean square error */
#define KANN_RNN_VAR_H0 0x1 /* take the initial hidden values as variables */
#define KANN_RNN_NORM 0x2 /* apply layer normalization */
#define NOREDUCE 0
#define ALTREDUCE 1
#define ALLREDUCE 2
#define ALTREDUCE_ONEWAY 3
#define ALLREDUCE_ONEWAY 4
#define ALLREDUCE_IN 5
#define CNN_REDUCE 6
#include "kautodiff.h"
typedef struct {
int n; /* number of nodes in the computational graph */
kad_node_t **v; /* list of nodes */
float *x, *g, *c; /* collated variable values, gradients and constant values */
void *mt; /* auxiliary data for multi-threading; NULL if multi-threading disabled */
} kann_t;
struct {
int cnt; /* number of nodes in the computational graph */
} drand_t;
typedef struct {
int n_row;
int *sze;
int **idx;
} kann_dok_t;
extern int kann_verbose;
#define kann_size_var(a) kad_size_var((a)->n, (a)->v)
#define kann_size_const(a) kad_size_const((a)->n, (a)->v)
#define kann_dim_in(a) kann_feed_dim((a), KANN_F_IN, 0)
#define kann_dim_out(a) kann_feed_dim((a), KANN_F_TRUTH, 0)
#define kann_srand(seed) kad_srand(0, (seed))
#define kann_drand() kad_drand(0)
#define kann_set_batch_size(ann, B) kad_sync_dim((ann)->n, (ann)->v, (B))
#ifdef __cplusplus
extern "C" {
#endif
/**
* Generate a network from a computational graph
*
* A network must have at least one scalar cost node (i.e. whose n_d==0). It
* may optionally contain other cost nodes or output nodes not leading to the
* primary cost node.
*
* @param cost cost node (must be a scalar, i.e. cost->n_d==0)
* @param n_rest number of other nodes without predecessors
* @param ... other nodes (of type kad_node_t*) without predecessors
*
* @return network on success, or NULL otherwise
*/
kann_t *kann_new(kad_node_t *cost, int n_rest, ...);
/**
* Unroll an RNN
*
* @param a network
* @param len number of unrolls
*
* @return an unrolled network, or NULL if the network is not an RNN
*/
kann_t *kann_unroll(kann_t *a, ...);
kann_t *kann_unroll_array(kann_t *a, int *len);
kann_t *kann_clone(kann_t *a, int batch_size);
void kann_delete(kann_t *a); /* delete a network generated by kann_new() or kann_layer_final() */
void kann_delete_unrolled(kann_t *a); /* delete a network generated by kann_unroll() */
/**
* Enable/disable multi-threading (requiring pthread)
*
* KANN splits a mini-batch to $n_threads mini-mini-batches and puts each of
* them on one thread. So far, only kann_cost() takes the advantage of
* multi-threading.
*
* @param ann network
* @param n_threads number of threads; <=1 to completely disable multi-threading
* @param max_batch_size max mini-batch size; shall no smaller than n_threads
*/
void kann_mt(kann_t *ann, int n_threads, int max_batch_size);
/**
* Bind float arrays to feed nodes
*
* @param a network
* @param ext_flag required external flags
* @param ext_label required external label
* @param x pointers (size equal to the number of matching feed nodes)
*
* @return number of matching feed nodes
*/
int kann_feed_bind(kann_t *a, uint32_t ext_flag, int32_t ext_label, float **x);
/**
* Compute the cost and optionally gradients
*
* @param a network
* @param cost_label required external label
* @param cal_grad whether to compute gradients
*
* @return cost
*/
float kann_cost(kann_t *a, int cost_label, int cal_grad);
int kann_eval(kann_t *a, uint32_t ext_flag, int ext_label);
int kann_class_error(const kann_t *ann, int *base);
/**
* Find a node
*
* @param a network
* @param ext_flag required external flags; set to 0 to match all flags
* @param ext_label required external label
*
* @return >=0 if found; -1 if not found; -2 if found multiple
*/
int kann_find(const kann_t *a, uint32_t ext_flag, int32_t ext_label);
/**
* Get the size of a feed node, assuming mini-batch size 1
*
* @param a network
* @param ext_flag required external flags
* @param ext_label required external label
*
* @return size>=0; -1 if not found; -2 if found multiple
*/
int kann_feed_dim(const kann_t *a, uint32_t ext_flag, int32_t ext_label);
/**
* Get an RNN ready for continuous feeding
*
* @param a network
*/
void kann_rnn_start(kann_t *a);
void kann_rnn_end(kann_t *a);
/**
* Switch between training and prediction networks (effective only when there are switch nodes)
*
* @param a network
* @param is_train 0 for prediction network and non-zero for training net
*/
void kann_switch(kann_t *a, int is_train);
/**
* RMSprop update
*
* @param n number of variables
* @param h0 learning rate
* @param h per-variable learning rate; NULL if not applicable
* @param decay RMSprop decay; use 0.9 if unsure
* @param g gradient, of size n
* @param t variables to change
* @param r memory, of size n
*/
void kann_RMSprop(int n, float h0, const float *h, float decay, const float *g, float *t, float *r);
void kann_shuffle(int n, int *s);
float kann_grad_clip(float thres, int n, float *g);
/* common layers */
kad_node_t *kann_layer_input(int n1);
kad_node_t *kann_layer_dense(kad_node_t *in, int n1);
kad_node_t *kann_layer_dense_nobias(kad_node_t *in, int n1);
kad_node_t *kann_layer_dense_mpi(kad_node_t *in, int n1, int split_mode);
kad_node_t *kann_layer_dropout(kad_node_t *t, float r);
kad_node_t *kann_layer_layernorm(kad_node_t *in);
kad_node_t *kann_layer_rnn(kad_node_t *in, int n1, int rnn_flag);
kad_node_t *kann_layer_lstm(kad_node_t *in, int n1, int rnn_flag);
kad_node_t *kann_layer_gru(kad_node_t *in, int n1, int rnn_flag);
kad_node_t *kann_layer_conv2d(kad_node_t *in, int n_flt, int k_rows, int k_cols, int stride_r, int stride_c, int pad_r, int pad_c);
kad_node_t *kann_layer_conv2d_mpi(kad_node_t *in, int n_flt, int k_rows, int k_cols, int stride_r, int stride_c, int pad_r, int pad_c);
kad_node_t *kann_layer_conv1d(kad_node_t *in, int n_flt, int k_size, int stride, int pad);
kad_node_t *kann_layer_cost(kad_node_t *t, int n_out, int cost_type);
kad_node_t *kann_layer_cost_mpi(kad_node_t *t, int n_out, int cost_type, int split_mode);
kad_node_t *kann_new_leaf(uint8_t flag, float x0_01, int n_d, ...); /* flag can be KAD_CONST or KAD_VAR */
kad_node_t *kann_new_scalar(uint8_t flag, float x);
kad_node_t *kann_new_weight(int n_row, int n_col);
kad_node_t *kann_new_bias(int n);
kad_node_t *kann_new_weight_conv2d(int n_out, int n_in, int k_row, int k_col);
kad_node_t *kann_new_weight_conv1d(int n_out, int n_in, int kernel_len);
kad_node_t *kann_new_leaf2(int *offset, kad_node_p *par, uint8_t flag, float x0_01, int n_d, ...);
kad_node_t *kann_layer_dense2(int *offset, kad_node_p *par, kad_node_t *in, int n1);
kad_node_t *kann_layer_dense2_nobias(int *offset, kad_node_p *par, kad_node_t *in, int n1);
kad_node_t *kann_layer_dense2_mpi(int *offset, kad_node_p *par, kad_node_t *in, int n1, int split_mode);
kad_node_t *kann_layer_dropout2(int *offset, kad_node_p *par, kad_node_t *t, float r);
kad_node_t *kann_layer_layernorm2(int *offset, kad_node_t **par, kad_node_t *in);
kad_node_t *kann_layer_rnn2(int *offset, kad_node_t **par, kad_node_t *in, kad_node_t *h0, int rnn_flag);
kad_node_t *kann_layer_gru2(int *offset, kad_node_t **par, kad_node_t *in, kad_node_t *h0, int rnn_flag);
/* operations on network with a single input node and a single output node */
int kann_train_fnn1(kann_t *ann, float lr, int mini_size, int max_epoch, int max_drop_streak, float frac_val, int n, float **_x, float **_y);
int kann_train_fnn1_mpi(kann_t *ann, float lr, int mini_size, int max_epoch, int max_drop_streak, float frac_val, int n, float **_x, float **_y, int rank, int batches);
float kann_cost_fnn1(kann_t *a, int n, float **x, float **y);
const float *kann_apply1(kann_t *a, float *x);
/* model I/O */
void kann_save_fp(FILE *fp, kann_t *ann);
void kann_save(const char *fn, kann_t *ann);
kann_t *kann_load_fp(FILE *fp);
kann_t *kann_load(const char *fn);
void kann_drand_init(int seed);
int kann_drand_fetch(double *val);
int kann_drand_count();
void kann_allreduce_insert(kann_t *ann, int bs);
void kann_allreduce_check(kann_t *ann);
void kann_allreduce_free(kann_t *ann);
kann_t* kann_insert_alt_mlp_mpi(kad_node_t *t, int n_out, int loss_type, int n_h_layers, int tot_neurons, float h_dropout, int size, int bs, int rank);
kann_t* kann_insert_all_mlp_mpi(kad_node_t *t, int n_out, int loss_type, int n_h_layers, int tot_neurons, float h_dropout, int size, int bs, int rank);
int kann_train_fnn1_mpi_onehot(kann_t *ann, float lr, int mini_size, int max_epoch, int max_drop_streak,
float frac_val, int n, float **_x, int n_val_row, int n_val_col, float **_val, int rank, int batches,
int max_user_id, int max_item_id, kann_dok_t *dok, int num_neg, int top_K);
#ifdef __cplusplus
}
#endif
#endif