/
dvc.lock
47 lines (47 loc) · 1.26 KB
/
dvc.lock
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
split:
cmd: python src/split.py data/data.xml
deps:
- path: data/data.xml
md5: a304afb96060aad90176268345e10355
- path: src/split.py
md5: ffa32f4104c363040f27d2bd22db127d
outs:
- path: data/splitted
md5: 1ce9051bf386e57c03fe779d476d93e7.dir
featurize:
cmd: python src/featurization.py data/splitted data/features
deps:
- path: data/splitted
md5: 1ce9051bf386e57c03fe779d476d93e7.dir
- path: src/featurization.py
md5: a56570e715e39134adb4fdc779296373
params:
params.yaml:
max_features: 6000
ngram_range.hi: 2
ngram_range.lo: 1
outs:
- path: data/features
md5: 703b744b08c7d5c7ad3fdef4a69c6def.dir
train:
cmd: python src/train.py data/features model.pkl
deps:
- path: data/features
md5: 703b744b08c7d5c7ad3fdef4a69c6def.dir
- path: src/train.py
md5: 999db74263dccc2798804478d5dea37e
outs:
- path: model.pkl
md5: ebd24ca55feb3707b0f44d0269739b1e
evaluate:
cmd: python src/evaluate.py model.pkl data/features auc.json
deps:
- path: data/features
md5: 703b744b08c7d5c7ad3fdef4a69c6def.dir
- path: model.pkl
md5: ebd24ca55feb3707b0f44d0269739b1e
- path: src/evaluate.py
md5: fea9c7bb1242b5e43d682c5d43e7c1d1
outs:
- path: auc.json
md5: 2531e18bb924968943718aebfa193a2e