forked from Qwicen/node
-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.sh
2672 lines (2169 loc) · 140 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Just run like 1000 es to test functionality of rs
CUDA_VISIBLE_DEVICES=0 python main.py --name 0225 --dataset a9a --random_search 10 --early_stopping_rounds 10000
CUDA_VISIBLE_DEVICES=0 python main.py --name 0225 --dataset a9a --random_search 50 --early_stopping_rounds 10000
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name testing --dataset a9a --early_stopping_rounds 10000
# Run in vws
python main.py --name test2 --dataset a9a --early_stopping_rounds 500 --fp16 0 --random_search 5
# Search in v or q to do the random search
python main.py --name test3 --dataset a9a --early_stopping_rounds 500 --random_search 5
python main.py --name test4 --dataset epsilon --early_stopping_rounds 200 --random_search 1
## Search in a9a
python main.py --name 0225_a9a --dataset a9a --random_search 100 --cpu 4 --gpus 1 --mem 8
python main.py --name 0226_epsilon --dataset epsilon --random_search 100 --cpu 4 --gpus 1 --mem 10 --random_search 2
# Rerun a specific job
./my_sbatch --cpu 4 --gpus 1 --mem 10 python main.py --name 0226_epsilon_s5_nt8192_bs512_mt0.0003_as2000_cfSMTemp
# Rerun 5 datasets for once to download datasets
for dset in 'year' 'higgs' 'microsoft' 'yahoo' 'click'; do
python main.py --name 0226_${dset} --dataset ${dset} --random_search 1 --cpu 4 --gpus 1 --mem 10
done
## TORUN: to see if they perform reasonable!
for dset in 'year' 'higgs' 'microsoft' 'yahoo' 'click' 'epsilon'; do
python main.py --name 0226_${dset} --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8
done
## After fixing bugs, run more random search!
for dset in 'year' 'higgs' 'microsoft' 'yahoo' 'click' 'epsilon'; do
python main.py --name 0302_${dset} --dataset ${dset} --random_search 15 --cpu 4 --gpus 1 --mem 12
done
for dset in 'year' 'higgs' 'microsoft' 'yahoo' 'click' 'epsilon'; do
mv "results/${dset}_prev.csv" "results/${dset}.csv"
done
## Run python scripts
python main.py --name 0305 --dataset mimic2
# Run mimic2
dset='mimic2'
python main.py --name 0306_${dset} --dataset ${dset} --random_search 50 --cpu 4 --gpus 1 --mem 8
# Run baselines in all datasets with ebm-o100-i100
dset='mimic2'
model_name='ebm-o100-i100'
./my_sbatch --cpu 20 --gpus 0 --mem 8 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
for dset in 'year' 'higgs' 'microsoft' 'yahoo' 'click' 'epsilon'; do
model_name='ebm-o100-i100'
./my_sbatch --cpu 20 --gpus 0 --mem 16 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
done
# They die in wierd situations....
for dset in 'yahoo' 'microsoft' 'epsilon'; do
model_name='ebm-o100-i100'
./my_sbatch --cpu 20 --gpus 0 --mem 32 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
done
dset='yahoo'
model_name='ebm-o100-i100'
./my_sbatch --cpu 20 --gpus 0 --mem 50 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
for dset in 'epsilon'; do
model_name='ebm-o100-i100'
./my_sbatch --cpu 20 --gpus 0 --mem 32 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
done
# Run more for only these 3 datasets
for dset in 'adult' 'mimic3' 'compas'; do
python main.py --name 0310_${dset} --dataset ${dset} --random_search 30 --cpu 4 --gpus 1 --mem 8
model_name='ebm-o100-i100'
./my_sbatch --cpu 20 --gpus 0 --mem 16 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
done
for dset in 'mimic3' 'compas'; do
model_name='ebm-o100-i100'
./my_sbatch --cpu 20 --gpus 0 --mem 16 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
done
# THink: how to make NODE GAM deep? How can we penalize (either hard or soft) to make the later layers only choose the ones with similar features?
- Maybe an inner product (like attention)? So the more similar of picking the feature for that node, the better to focus on which end to take. But I can not make it as input-dependent!
- One way to achieve is to say one subsequent tree focuses on one set of features. But which set of features? One thing is I can do a soft attention (with one vector and an attention value on each previous leafs, )
- Idea: follow the attention/encoder architecture in table features?
# Finish the attention! Search multi-layer!
dset='mimic2'
python main.py --name 0312_${dset} --dataset ${dset} --random_search 50 --cpu 4 --gpus 1 --mem 8
# Do a test: if fp16 speeds up or not? No!
dset='mimic2'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0312_${dset} --dataset ${dset} --num_trees 1024 --num_layers 6 --tree_dim 1 --fp16 0
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0312_${dset}_fp16 --dataset ${dset} --num_trees 1024 --num_layers 6 --tree_dim 1 --fp16 1
# Do a grid search
# (1) List the best hyparprameter so far found on MIMIC2
dset='mimic2'
./my_sbatch -p p100 --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0316_${dset} --dataset ${dset} --num_trees 1024 --batch_size 2048 --num_layers 2 --tree_dim 0 --output_dropout 0.5 --colsample_bytree 0.3 --att_normalize 1
# (2) Here search which number of tree works best. Here change bs in 1024
#dset='mimic2'
for dset in 'compas' 'adult'; do
for num_trees in '1024' '2048' '4096' '8192' '16400'; do
./my_sbatch -p p100 --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0316_${dset}_nt${num_trees} --dataset ${dset} --num_trees ${num_trees} --batch_size 1024 --num_layers 2 --tree_dim 0 --output_dropout 0.5 --colsample_bytree 0.3 --att_normalize 1
done
# (3) Search if batch size makes a difference?
for bs in '128' '256' '512' '1024' '2048' '4096'; do
./my_sbatch -p p100 --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0316_${dset}_bs${bs} --dataset ${dset} --num_trees 1024 --batch_size ${bs} --num_layers 2 --tree_dim 0 --output_dropout 0.5 --colsample_bytree 0.3 --att_normalize 1
done
# (4) Search if num_layers make a difference?
for nl in '1' '2' '3' '4' '5' '6' '7' '8'; do
./my_sbatch -p p100 --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0316_${dset}_nl${nl} --dataset ${dset} --num_trees 1024 --batch_size 1024 --num_layers ${nl} --tree_dim 0 --output_dropout 0.5 --colsample_bytree 0.3 --att_normalize 1
done
# (5) search normalize makes a difference?
./my_sbatch -p p100 --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0316_${dset}_nn --dataset ${dset} --num_trees 1024 --batch_size 2048 --num_layers 2 --tree_dim 0 --output_dropout 0.5 --colsample_bytree 0.3 --att_normalize 0
# (6) colsample_bytree
for cs in '1' '0.8' '0.5' '0.3' '0.1' '0.0001'; do
./my_sbatch -p p100 --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0316_${dset}_cs${cs} --dataset ${dset} --num_trees 1024 --batch_size 2048 --num_layers 2 --tree_dim 0 --output_dropout 0.5 --colsample_bytree ${cs}
done
# (7) output_dropout
for od in '0.' '0.1' '0.3' '0.5' '0.8'; do
./my_sbatch -p p100 --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0316_${dset}_od${od} --dataset ${dset} --num_trees 1024 --batch_size 2048 --num_layers 2 --tree_dim 0 --output_dropout ${od} --colsample_bytree 0.3
done
# (8) tree_dim
for tree_dim in '0' '1' '2' '3'; do
./my_sbatch -p p100 --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0316_${dset}_td${tree_dim} --dataset ${dset} --num_trees 1024 --batch_size 2048 --num_layers 2 --tree_dim ${tree_dim} --output_dropout 0.5 --colsample_bytree 0.3
done
# (9) l2 penalty of response?
#for l2_lambda in '0' '1e-3' '1e-4' '1e-5' '1e-6' '1e-7'; do
#for l2_lambda in '0.01' '0.1'; do
for l2_lambda in '1e-6' '1e-7' '1e-8' '1e-9' '0'; do
./my_sbatch -p p100 --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0316_${dset}_lda${l2_lambda} --dataset ${dset} --num_trees 1024 --batch_size 2048 --num_layers 2 --tree_dim 0 --output_dropout 0.5 --colsample_bytree 0.3 --l2_lambda ${l2_lambda}
done
done
for dset in 'mimic2' 'adult' 'compas'; do
for l2_lambda in '1e-10' '1e-11' '1e-12'; do
./my_sbatch -p p100 --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0316_${dset}_lda${l2_lambda} --dataset ${dset} --num_trees 1024 --batch_size 2048 --num_layers 2 --tree_dim 0 --output_dropout 0.5 --colsample_bytree 0.3 --l2_lambda ${l2_lambda}
done
done
#### Run more hyperparameters search
for dset in 'mimic2' 'compas' 'adult'; do
fold='1'
python main.py --name 0317_${dset}_f${fold} --dataset ${dset} --random_search 40 --cpu 4 --gpus 1 --mem 8 --fold ${fold}
model_name='ebm-o100-i100'
for fold in '1' '2' '3' '4'; do
./my_sbatch --cpu 20 --gpus 0 --mem 10 -p cpu python -u baselines.py --name 0308_${dset}_${model_name}_f${fold} --dataset ${dset} --model_name ${model_name} --fold ${fold}
done
done
# Rerun those failed model
for f in \
'0321_mimic2_ODST_s54_nl4_nt1024_d6_td0_lr0.001'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name ${f}
done
for f in \
'0321_mimic2_GAMAtt_s2_d0.3_cs0.5_nl4_nt4000_td0_an1_at32'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name ${f}
done
## Memory explode
'0317_mimic2_f1_s84_bs1024_as20000_d0.5_cs0.5_nl4_nt12000_td0_an1_lda0.0'
# Run a few models using Attn
dset='mimic2'
for arch in 'GAM' 'GAMAtt'; do
./my_sbatch -p p100 --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0318_${dset}_${arch} --dataset ${dset} --num_trees 1024 --batch_size 2048 --num_layers 2 --tree_dim 0 --output_dropout 0.5 --colsample_bytree 0.3 --att_normalize 0 --arch ${arch}
done
arch='GAM'
dset='mimic2'
./my_sbatch -p p100 --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0318_${dset}_${arch}_fp16 --dataset ${dset} --num_trees 1024 --batch_size 2048 --num_layers 2 --tree_dim 0 --output_dropout 0.5 --colsample_bytree 0.3 --att_normalize 0 --arch ${arch} --fp16 1
# Run all folds with the best mimic2 (l2) parameter
arch='GAM'
dset='mimic2'
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0319_${dset}_${arch}_best_f${fold} --dataset ${dset} --seed 1 --num_trees 4000 --batch_size 1024 --num_layers 2 --tree_dim 0 --output_dropout 0.5 --colsample_bytree 1e-5 --arch ${arch} --fp16 0 --fold ${fold} --early_stopping_rounds 15000
done
# Run a version of fp16=1. See performance / run time is slower or faster
arch='GAM'
dset='mimic2'
for fold in '0' '1' '2'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0319_${dset}_${arch}_best_f${fold}_fp16 --dataset ${dset} --seed 1 --num_trees 4000 --batch_size 1024 --num_layers 2 --tree_dim 0 --output_dropout 0.5 --colsample_bytree 1e-5 --arch ${arch} --fp16 1 --fold ${fold} --early_stopping_rounds 15000
done
# TODO:
# (1) Wait for server to empty. See for attention architecture, if it works better? Run fp16 to save memory...
for dset in 'mimic2'; do
python main.py --name 0319_${dset} --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1
done
for dset in 'compas' 'adult'; do
python main.py --name 0319_${dset} --dataset ${dset} --random_search 50 --cpu 4 --gpus 1 --mem 8 --fp16 1
done
model_name='xgb-o50'
for dset in 'mimic2' 'compas' 'adult'; do
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 30 --gpus 0 --mem 10 -p cpu python -u baselines.py --name 0308_${dset}_${model_name}_f${fold} --dataset ${dset} --model_name ${model_name} --fold ${fold}
done
done
# Add lr! Do random search to make sure this is still ok compared to previous methods
for dset in 'mimic2'; do
python main.py --name 0320_${dset} --dataset ${dset} --random_search 5 --cpu 4 --gpus 1 --mem 8 --fp16 1
done
# Change lr schedule! Rerun search!
for arch in 'GAM' 'GAMAtt'; do
for dset in 'compas' 'adult'; do
python main.py --name 0321_${dset}_${arch} --dataset ${dset} --random_search 10 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
for arch in 'GAM' 'GAMAtt'; do
for dset in 'support2' 'churn'; do
python main.py --name 0321_${dset}_${arch} --dataset ${dset} --random_search 2 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
for arch in 'GAM' 'GAMAtt'; do
for dset in 'support2' 'churn' ; do
python main.py --name 0321_${dset}_${arch} --dataset ${dset} --random_search 2 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
for arch in 'GAM' 'GAMAtt'; do
for dset in 'support2' 'churn' ; do
python main.py --name 0321_${dset}_${arch} --dataset ${dset} --random_search 2 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
# Run everything for once to download dataset!
for arch in 'GAM'; do
for dset in 'credit' 'mimic3' 'click' 'yahoo' 'microsoft' 'higgs' 'epsilon' 'year'; do
python main.py --name 0321_${dset}_${arch} --dataset ${dset} --random_search 1 --cpu 4 --gpus 1 --mem 8 --fp16 0 --arch ${arch}
done
done
# Run the best parameter again!
arch='GAM'
dset='mimic2'
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0322_${dset}_${arch}_best_f${fold} --dataset ${dset} --seed 1 --num_trees 4000 --batch_size 1024 --num_layers 2 --tree_dim 0 --output_dropout 0.5 --colsample_bytree 1e-5 --arch ${arch} --fp16 0 --fold ${fold} --early_stopping_rounds 15000
done
# Run random search for every dataset. The current combination around 320
# Run 100 for each dataset. Enable fp16 (since it goes deeper, it could be faster?).
for dset in 'adult' 'mimic3'; do
for arch in 'GAM' 'GAMAtt'; do
python main.py --name 0321_${dset}_${arch} --dataset ${dset} --random_search 50 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch} --early_stopping_rounds 11000 --lr_decay_steps 5000
done
done
for dset in 'mimic3' 'credit' 'churn' 'support2'; do
for fold in '0'; do
for model_name in 'xgb-o50' 'ebm-o100-i100'; do
./my_sbatch --cpu 20 --gpus 0 --mem 10 -p cpu python -u baselines.py --name 0308_${dset}_${model_name}_f${fold} --dataset ${dset} --model_name ${model_name} --fold ${fold}
done
done
done
model_name='xgb-o50'
for dset in 'mimic2' 'compas' 'adult'; do
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 30 --gpus 0 --mem 10 -p cpu python -u baselines.py --name 0308_${dset}_${model_name}_f${fold} --dataset ${dset} --model_name ${model_name} --fold ${fold}
done
done
# Run a different depth!!
arch='GAM'
dset='mimic2'
fold='0'
for depth in '1' '2' '4' '6' '8'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0324_${dset}_${arch}_best_f${fold}_d${depth} --dataset ${dset} --seed 1 --num_trees 4000 --num_layers 2 --tree_dim 0 --output_dropout 0.5 --colsample_bytree 1e-5 --arch ${arch} --fp16 1 --fold ${fold} --early_stopping_rounds 11000 --lr_decay_steps 5000 --depth ${depth}
done
# (Wait) wait MIMIC result and select the range of depth
for dset in 'mimic2' 'adult'; do
for arch in 'GAM' 'GAMAtt'; do
python main.py --name 0322_${dset}_${arch} --dataset ${dset} --random_search 50 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
After seeing the graph, I feel my preprocessing has huge impacts of the graph
(1) Before, I only do target transform without doing the quantile transform on categorical features. That makes the feature value quite close in general (e.g. 0.008, 0.012). By making quantile transform the value becomes much larger: -2, 0
(2) Before, I have quantile noise arond 1e-3. This inductive bias makes things smooth. But my datasets are usually large (except compas?), so doing it with noise induce unnecessary smoothing prior which might make things a bit too flat. So I make it as 1e-4. Making it lower would remove this inductive bias completely, and may not be good?
(3) (which I hasn not touched) this quantile transform make the outlier less sensitive. This makes graph look robust in low-sample regions. This is probably similar to the binning effect in EBM.
### Alright: so the date has to be completely different
### Make it as 0325
### The results is stored in _new
for dset in 'mimic2' 'adult'; do
for arch in 'GAM' 'GAMAtt'; do
python main.py --name 0325_${dset}_${arch} --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
### To see the graph, see how preprocessing changes the graph!
dset='mimic2'
arch='GAMAtt'
fold='0'
quantile_noise='0'
for quantile_dist in 'normal'; do
for n_quantiles in '3000'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0326_${dset}_${arch}_best_f${fold}_${quantile_dist}_qn${quantile_noise}_nq${n_quantiles} --dataset ${dset} --seed 1 --num_trees 500 --num_layers 4 --batch_size 2048 --addi_tree_dim 1 --depth 5 --output_dropout 0.2 --colsample_bytree 1 --lr 0.02 --last_as_output 0 --dim_att 128 --arch ${arch} --fp16 1 --fold ${fold} --quantile_dist ${quantile_dist} --quantile_noise ${quantile_noise} --n_quantiles ${n_quantiles}
done
done
# Add l2 for output penalty. Change to _new2
### Run more for search! Especially for last_as_output=1
#for dset in 'mimic2' 'adult'; do
for dset in 'compas' 'mimic3'; do
for arch in 'GAM' 'GAMAtt'; do
python main.py --name 0327_${dset}_${arch} --dataset ${dset} --random_search 15 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
## Rerun things: they die cuz no enough memory. Reduce batch size to 1024
for name in \
'0327_adult_GAM_s2_nl1_nt8000_td2_d4_od0.2_cs0.5_lr0.01_lo0_la0.0001' \
'0327_mimic2_GAMAtt_s83_nl1_nt8000_td1_d4_od0.0_cs1.0_lr0.01_lo0_la1e-06_da128' \
; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name ${name} --batch_size 512
done
###### Run all methods and baselines in other 3 datasets I gather.
for dset in 'support2' 'churn' 'credit'; do
for arch in 'GAM' 'GAMAtt'; do
python main.py --name 0327_${dset}_${arch} --dataset ${dset} --random_search 25 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
for model_name in 'ebm-o100-i100' 'xgb-o50'; do
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 20 --gpus 0 --mem 10 -p cpu python -u baselines.py --name 0308_${dset}_${model_name}_f${fold} --dataset ${dset} --model_name ${model_name} --fold ${fold}
done
done
done
##### Run the best among 7 datasets!
# mimic2
0326_mimic2_GAMAtt_best_f0_normal_qn0_nq3000
dset='mimic2'
arch='GAMAtt'
quantile_noise='0'
n_quantiles='3000'
quantile_dist='normal'
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0330_${dset}_${arch}_best_f${fold}_${quantile_dist}_qn${quantile_noise}_nq${n_quantiles} --dataset ${dset} --seed 1 --num_trees 500 --num_layers 4 --batch_size 2048 --addi_tree_dim 1 --depth 5 --output_dropout 0.2 --colsample_bytree 1 --lr 0.02 --last_as_output 0 --dim_att 128 --arch ${arch} --fp16 1 --fold ${fold} --quantile_dist ${quantile_dist} --quantile_noise ${quantile_noise} --n_quantiles ${n_quantiles}
done
# Adult best
# 0327_adult_GAM_s17_nl3_nt333_td2_d4_od0.1_cs1.0_lr0.005_lo1_la0.0
# num_quantiles=1000
n_quantiles='1000'
dset='adult'
arch='GAM'
batch_size='2048'
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0330_${dset}_${arch}_best_f${fold} --dataset ${dset} --seed 17 --num_trees 333 --num_layers 3 --batch_size ${batch_size} --addi_tree_dim 2 --depth 4 --output_dropout 0.1 --colsample_bytree 1 --lr 0.005 --last_as_output 1 --l2_lambda 0 --arch ${arch} --fp16 1 --fold ${fold} --n_quantiles ${n_quantiles}
done
# MIMIC3 best
# 0327_mimic3_GAMAtt_s6_nl4_nt2000_td0_d3_od0.1_cs0.1_lr0.01_lo0_la1e-07_da128
n_quantiles='2000'
dset='mimic3'
arch='GAMAtt'
batch_size='2048'
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0330_${dset}_${arch}_best_f${fold} --dataset ${dset} --seed 6 --num_trees 2000 --num_layers 4 --batch_size ${batch_size} --addi_tree_dim 0 --depth 3 --output_dropout 0.1 --colsample_bytree 0.1 --lr 0.01 --last_as_output 0 --l2_lambda 1e-7 --dim_att 128 --arch ${arch} --fp16 1 --fold ${fold} --n_quantiles ${n_quantiles}
done
# COMPAS best
# 0327_compas_GAMAtt_s82_nl2_nt2000_td1_d2_od0.1_cs1.0_lr0.005_lo0_la0.0_da64 -0.73683 -0.74334
n_quantiles='2000'
dset='compas'
arch='GAMAtt'
batch_size='2048'
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0330_${dset}_${arch}_best_f${fold} --dataset ${dset} --seed 82 --num_trees 2000 --num_layers 2 --batch_size ${batch_size} --addi_tree_dim 1 --depth 2 --output_dropout 0.1 --colsample_bytree 1 --lr 0.005 --last_as_output 0 --l2_lambda 0 --dim_att 64 --arch ${arch} --fp16 1 --fold ${fold} --n_quantiles ${n_quantiles}
done
# CHURN best
# 0327_churn_GAM_s6_nl2_nt1000_td1_d4_od0.0_cs0.5_lr0.01_lo1_la0.0
n_quantiles='2000'
dset='churn'
arch='GAM'
batch_size='2048'
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0330_${dset}_${arch}_best_f${fold} --dataset ${dset} --seed 6 --num_trees 1000 --num_layers 2 --batch_size ${batch_size} --addi_tree_dim 1 --depth 4 --output_dropout 0 --colsample_bytree 0.5 --lr 0.01 --last_as_output 1 --l2_lambda 0 --arch ${arch} --fp16 1 --fold ${fold} --n_quantiles ${n_quantiles}
done
# Credit best
0327_credit_GAMAtt_s61_nl3_nt666_td2_d4_od0.1_cs0.5_lr0.005_lo0_la1e-07_da16
n_quantiles='2000'
dset='credit'
arch='GAMAtt'
batch_size='2048'
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0330_${dset}_${arch}_best_f${fold} --dataset ${dset} --seed 61 --num_trees 666 --num_layers 3 --batch_size ${batch_size} --addi_tree_dim 2 --depth 4 --output_dropout 0.1 --colsample_bytree 0.5 --lr 0.005 --last_as_output 0 --l2_lambda 1e-7 --dim_att 16 --arch ${arch} --fp16 1 --fold ${fold} --n_quantiles ${n_quantiles}
done
# Support2 best
0327_support2_GAMAtt_s20_nl3_nt333_td2_d2_od0.2_cs1.0_lr0.01_lo0_la1e-05_da16 -0.82073 -0.81433
n_quantiles='2000'
dset='support2'
arch='GAMAtt'
batch_size='2048'
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0330_${dset}_${arch}_best_f${fold} --dataset ${dset} --seed 20 --num_trees 333 --num_layers 3 --batch_size ${batch_size} --addi_tree_dim 2 --depth 2 --output_dropout 0.2 --colsample_bytree 1 --lr 0.01 --last_as_output 0 --l2_lambda 1e-5 --dim_att 16 --arch ${arch} --fp16 1 --fold ${fold} --n_quantiles ${n_quantiles}
done
for dset in 'compas'; do
for model_name in 'ebm-o100-i100' 'xgb-o50'; do
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 20 --gpus 0 --mem 10 -p cpu python -u baselines.py --name 0308_${dset}_${model_name}_f${fold} --dataset ${dset} --model_name ${model_name} --fold ${fold}
done
done
done
dset='credit'
model_name='xgb-o50'
fold='0'
./my_sbatch --cpu 20 --gpus 0 --mem 10 -p cpu python -u baselines.py --name 0308_${dset}_${model_name}_f${fold} --dataset ${dset} --model_name ${model_name} --fold ${fold}
### ODST random search
for dset in 'compas' 'adult' 'mimic3' 'mimic2' 'support2' 'churn' 'credit'; do
for arch in 'ODST'; do
python main.py --name 0331_${dset}_${arch} --dataset ${dset} --random_search 1 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
# Instead, let's run the GAM for MIMIC2, MIMIC3, credit, support2, compas
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0330_${dset}_${arch}_best_f${fold} --dataset ${dset} --seed 20 --num_trees 333 --num_layers 3 --batch_size ${batch_size} --addi_tree_dim 2 --depth 2 --output_dropout 0.2 --colsample_bytree 1 --lr 0.01 --last_as_output 0 --l2_lambda 1e-5 --dim_att 16 --arch ${arch} --fp16 1 --fold ${fold} --n_quantiles ${n_quantiles}
for fold in '0' '1' '2' '3' '4'; do
dset='mimic2'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0401_${dset}_ODST_best_f${fold} --load_from_hparams 0331_mimic2_ODST_s82_nl2_nt1024_d6_td1_lr0.001 --fold ${fold}
dset='adult'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0401_${dset}_ODST_best_f${fold} --load_from_hparams 0331_adult_ODST_s84_nl2_nt512_d8_td2_lr0.001 --fold ${fold}
dset='mimic3'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0401_${dset}_ODST_best_f${fold} --load_from_hparams 0331_mimic3_ODST_s68_nl4_nt512_d6_td1_lr0.001 --fold ${fold}
dset='support2'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0401_${dset}_ODST_best_f${fold} --load_from_hparams 0331_support2_ODST_s69_nl4_nt512_d8_td2_lr0.001 --fold ${fold}
dset='compas'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0401_${dset}_ODST_best_f${fold} --load_from_hparams 0331_compas_ODST_s26_nl4_nt512_d8_td1_lr0.001 --fold ${fold}
dset='churn'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0401_${dset}_ODST_best_f${fold} --load_from_hparams 0331_churn_ODST_s43_nl2_nt512_d6_td2_lr0.001 --fold ${fold}
dset='credit'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0401_${dset}_ODST_best_f${fold} --load_from_hparams 0331_credit_ODST_s28_nl4_nt512_d8_td1_lr0.001 --fold ${fold}
dset='mimic2'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0402_${dset}_bestGAM_f${fold} --load_from_hparams 0327_mimic2_GAM_s60_nl4_nt250_td2_d2_od0.1_cs1e-05_lr0.005_lo1_la0.0 --fold ${fold}
dset='mimic3'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0402_${dset}_bestGAM_f${fold} --load_from_hparams 0327_mimic3_GAM_s19_nl1_nt2000_td2_d2_od0.2_cs0.1_lr0.01_lo0_la0.0 --fold ${fold}
dset='compas'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0402_${dset}_bestGAM_f${fold} --load_from_hparams 0327_compas_GAM_s81_nl2_nt4000_td0_d2_od0.2_cs1.0_lr0.01_lo0_la1e-07 --fold ${fold}
dset='credit'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0402_${dset}_bestGAM_f${fold} --load_from_hparams 0327_credit_GAM_s64_nl1_nt4000_td0_d3_od0.0_cs1.0_lr0.01_lo0_la1e-06 --fold ${fold}
dset='support2'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0402_${dset}_bestGAM_f${fold} --load_from_hparams 0327_support2_GAM_s44_nl4_nt1000_td0_d3_od0.0_cs1.0_lr0.005_lo1_la0.0 --fold ${fold}
done
# Search more for annoying less performing datasets!
for dset in 'mimic2' 'mimic3' 'credit' 'adult'; do
for arch in 'GAM' 'GAMAtt'; do
python main.py --name 0327_${dset}_${arch} --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
# After searching more, only adult gets 1 slightly better. Use that one!
for fold in '0' '1' '2' '3' '4'; do
dset='adult'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0330_${dset}_best_f${fold} --load_from_hparams 0327_adult_GAM_s56_nl3_nt1333_td1_d4_od0.3_cs0.5_lr0.01_lo1_la0.0 --fold ${fold}
done
for dset in 'year' 'higgs' 'microsoft' 'yahoo' 'click' 'epsilon'; do
model_name='xgb-o50'
./my_sbatch --cpu 20 --gpus 0 --mem 16 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
done
dset='epsilon'
model_name='xgb-o50'
./my_sbatch --cpu 20 --gpus 0 --mem 32 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
for dset in 'yahoo' 'epsilon'; do
model_name='ebm-o20-i20'
./my_sbatch --cpu 20 --gpus 0 --mem 16 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
done
# regression dataset
for dset in 'year' 'microsoft' 'yahoo'; do
for arch in 'GAM' 'GAMAtt'; do
python main.py --name 0328_${dset}_${arch} --dataset ${dset} --random_search 10 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
#for dset in 'adult' 'mimic2' 'mimic3' 'credit' 'click' 'epsilon' 'higgs' 'rossmann'; do
for dset in 'wine'; do
for arch in 'GAM' 'GAMAtt'; do
python main.py --name 0329_${dset}_${arch} --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
dset='wine'
arch='GAM'
python main.py --name 0329_${dset}_${arch} --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
## Run the missing value in those tables:
(1) baselines
model_name='ebm-o5-i5'
for dset in 'epsilon'; do
./my_sbatch --cpu 5 --gpus 0 --mem 100 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
done
model_name='xgb-o10-nj1'
for dset in 'epsilon'; do
./my_sbatch --cpu 10 --gpus 0 --mem 80 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
done
for model_name in 'xgb-o50'; do
for dset in 'higgs' 'year'; do
./my_sbatch --cpu 15 --gpus 0 --mem 30 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
done
done
for model_name in 'ebm-o100-i100' 'xgb-o50'; do
for dset in 'rossmann'; do
./my_sbatch --cpu 30 --gpus 0 --mem 50 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
done
done
for model_name in 'xgb-o50' 'ebm-o100-i100'; do
for dset in 'year'; do
./my_sbatch --cpu 25 --gpus 0 --mem 100 -p cpu python -u baselines.py --name 0308_${dset}_${model_name} --dataset ${dset} --model_name ${model_name}
done
done
for model_name in 'ebm-o100-i100' 'xgb-o50'; do
for dset in 'wine'; do
for fold in '1' '2' '3' '4'; do
./my_sbatch --cpu 10 --gpus 0 --mem 5 -p cpu python -u baselines.py --name 0308_${dset}_${model_name}_f${fold} --dataset ${dset} --model_name ${model_name} --fold ${fold}
done
done
done
# Run ODST wine!
dset='wine'
arch='ODST'
python main.py --name 0329_${dset}_${arch} --dataset ${dset} --random_search 30 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
# wine best
wine_best='0329_wine_GAM_s46_nl3_nt666_td2_d2_od0.3_cs0.1_lr0.005_lo0_la0.0001'
for fold in '0' '1' '2' '3' '4'; do
dset='wine'
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0330_${dset}_best_f${fold} --load_from_hparams ${wine_best} --fold ${fold}
done
## TODO: compare 0329 and 0328 on adult/mimic2/mimic3/credit to see if my new added bias works better!
# Rerun things after I do custom data noise for each dataset!
# Start with 0403!!!!
for dset in 'mimic2' 'mimic3' 'credit' 'adult'; do
for arch in 'GAM' 'GAMAtt'; do
python main.py --name 0403_${dset}_${arch} --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
### Annoyingly, the new qn does not have a dip in PFratio and perform worse
# Make a direct comparison!
mimic2_best='0329_mimic2_GAM_s57_nl2_nt4000_td0_d2_od0.0_cs0.5_lr0.01_lo0_la1e-05_ib1'
for qn in '1e-6' '0'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0409_mimic2_best_${qn} --load_from_hparams ${mimic2_best} --quantile_noise ${qn}
done
wine_best='0329_wine_GAM_s46_nl3_nt666_td2_d2_od0.3_cs0.1_lr0.005_lo0_la0.0001'
for qn in '1e-8' '0'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0409_wine_best_${qn} --load_from_hparams ${wine_best} --quantile_noise ${qn}
done
adult_best='0330_adult_best_f0'
for qn in '1e-3' '0'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0409_adult_best_qn${qn} --load_from_hparams ${adult_best} --quantile_noise ${qn}
done
mimic3_best='0403_mimic3_GAM_s55_nl2_nt250_td0_d2_od0.0_cs1e-05_lr0.01_lo0_la1e-06'
for qn in '1e-7' '0'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0409_mimic3_best_qn${qn} --load_from_hparams ${mimic3_best} --quantile_noise ${qn}
done
## Two changes: (1) min_temp, (2) custum noise
## Start with 0404
for dset in 'mimic2' 'mimic3' 'credit' 'adult' 'wine'; do
for arch in 'GAM' 'GAMAtt'; do
python main.py --name 0404_${dset}_${arch} --dataset ${dset} --random_search 30 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
## Run specifically for 0404 that previous hparams work best for these 5 dsets!
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0404 --load_from_hparams ${adult_best} --quantile_noise ${qn}
'0403_mimic2_GAMAtt_s18_nl4_nt125_td2_d4_od0.0_cs0.5_lr0.005_lo0_la0.0001_da8' \
'0327_credit_GAMAtt_s61_nl3_nt666_td2_d4_od0.1_cs0.5_lr0.005_lo0_la1e-07_da16' \
'0329_wine_GAM_s46_nl3_nt666_td2_d2_od0.3_cs0.1_lr0.005_lo0_la0.0001' \
'0327_year_GAMAtt_s33_nl4_nt250_td2_d2_od0.0_cs0.5_lr0.01_lo0_la0.0_da16' \
'0329_epsilon_GAMAtt_s39_nl3_nt333_td2_d6_od0.0_cs0.1_lr0.01_lo0_la0.0_da8' \
'0328_yahoo_GAMAtt_s58_nl4_nt250_td0_d6_od0.1_cs0.5_lr0.005_lo0_la0.0_da16' \
'0328_microsoft_GAMAtt_s68_nl3_nt333_td0_d4_od0.2_cs0.5_lr0.01_lo0_la0.0_da64' \
'0329_higgs_GAMAtt_s72_nl3_nt333_td1_d6_od0.3_cs0.1_lr0.005_lo0_la1e-06_da16' \
'0329_click_GAMAtt_s23_nl4_nt500_td0_d2_od0.3_cs1e-05_lr0.005_lo0_la0.0001_da32' \
'0403_adult_GAM_s82_nl2_nt4000_td1_d4_od0.2_cs0.1_lr0.01_lo0_la1e-05' \
'0403_credit_GAM_s78_nl4_nt500_td0_d4_od0.2_cs0.5_lr0.005_lo0_la0.0' \
'0327_year_GAMAtt_s44_nl4_nt500_td2_d4_od0.2_cs0.1_lr0.005_lo0_la0.0_da16' \
'0329_epsilon_GAMAtt_s52_nl2_nt2000_td1_d2_od0.3_cs0.5_lr0.005_lo0_la1e-05_da16' \
'0327_yahoo_GAMAtt_s40_nl3_nt2666_td0_d4_od0.2_cs0.1_lr0.005_lo0_la0.0_da16' \
'0328_microsoft_GAM_s13_nl1_nt4000_td0_d4_od0.0_cs0.1_lr0.005_lo0_la0.0' \
'0329_higgs_GAMAtt_s78_nl3_nt1333_td2_d2_od0.3_cs0.5_lr0.005_lo0_la0.0001_da8' \
'0329_click_GAMAtt_s45_nl4_nt500_td2_d4_od0.0_cs0.1_lr0.01_lo0_la0.0_da16' \
'0329_rossmann_GAMAtt_s41_nl2_nt500_td1_d6_od0.2_cs0.5_lr0.01_lo0_la0.0_da8' \
'0329_rossmann_GAM_s50_nl1_nt2000_td1_d4_od0.3_cs0.1_lr0.01_lo0_la1e-06' \
'0330_compas_GAMAtt_best_f0' \
'0327_churn_GAM_s6_nl2_nt1000_td1_d4_od0.0_cs0.5_lr0.01_lo1_la0.0' \
'0327_support2_GAMAtt_s20_nl3_nt333_td2_d2_od0.2_cs1.0_lr0.01_lo0_la1e-05_da16' \
'0327_compas_GAM_s81_nl2_nt4000_td0_d2_od0.2_cs1.0_lr0.01_lo0_la1e-07' \
'0327_churn_GAMAtt_s46_nl4_nt2000_td0_d2_od0.2_cs0.1_lr0.01_lo1_la1e-05_da64' \
'0327_support2_GAM_s44_nl4_nt1000_td0_d3_od0.0_cs1.0_lr0.005_lo1_la0.0' \
for d in \
; do
postfix=${d:4}
if [ -a logs/hparams/${d} ]; then
# echo ${d}
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0404${postfix} --load_from_hparams ${d}
fi
done
## (R) for more 40 times for compas
for dset in 'compas' 'churn' 'support2'; do
arch='GAM'
python main.py --name 0404_${dset}_${arch} --dataset ${dset} --random_search 15 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
arch='GAMAtt'
python main.py --name 0404_${dset}_${arch} --dataset ${dset} --random_search 25 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
## (R) Try uniform for MIMIC2/Adult and see if the best can recover mean imputation and best perf
for dset in 'mimic2' 'adult'; do
for arch in 'GAM' 'GAMAtt'; do
python main.py --name 0405_${dset}_${arch}_uniform --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch} --quantile_dist uniform
done
done
## (R) Test GAMAtt2 and last linear w/ GAM and GAMAtt
for dset in 'mimic2' 'adult'; do
for arch in 'GAMAtt2'; do
python main.py --name 0406_${dset}_${arch} --dataset ${dset} --random_search 30 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
arch='GAMAtt'
python main.py --name 0407_${dset}_${arch}_lastl --dataset ${dset} --random_search 30 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch} --add_last_linear 1
done
## (R) see if multi-task learning helps!!
for dset in 'sarcos' 'sarcos0' 'sarcos1'; do
for arch in 'GAM' 'GAMAtt'; do
python main.py --name 0404_${dset}_${arch} --dataset ${dset} --random_search 25 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
## (R) sarcos gets normalization wrong :(. Change to normalize per task. Rerun
## (R) run in the next block with testing last_l and GAM!
for dset in 'sarcos'; do
for arch in 'GAM' 'GAMAtt2'; do
python main.py --name 0405_${dset}_${arch} --dataset ${dset} --random_search 25 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
# (R) Random search for GAM and GAMAtt2 for lastl!
for dset in 'mimic2' 'credit' 'adult' 'wine'; do
for arch in 'GAM' 'GAMAtt2'; do
python main.py --name 0410_${dset}_${arch}_lastl --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch} --add_last_linear 1
done
done
## Conclusions
# (1) For GAM, last_l significantly improve the performance!
# (2) For GAMAtt, last_l mostly improves avg (except wine cuz only 9 runs succeed, and works much better in adult) but for the best method sometimes the original GAMAtt/GAMAtt2 (like MIMIC2, Wine, but not Adult, Credit) performs better.
# (3) GAM v.s. GAMAtt2: unclear. Adult/Credit the GAM_lastl is better, but in MIMIC2/Wine the GAMAtt2 is the best. And in MIMIC2 the best GAMAtt have the PFratio drop story but not GAM!
## Strategy: (1) make last_l better for GAMAtt2! (2) Search for last_l=0 as well
for dset in 'mimic2' 'credit' 'adult' 'wine'; do
for arch in 'GAM' 'GAMAtt2'; do
python main.py --name 0410_${dset}_${arch}_lastl --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch} --add_last_linear 1
done
done
#### Make output as new6: search last_layer in GAMAtt2. Run MORE!
for dset in 'mimic2' 'mimic3' 'credit' 'adult' 'wine' 'compas' 'churn' 'support2' 'wine'; do
arch='GAM'
python main.py --name 0411_${dset}_${arch} --dataset ${dset} --random_search 16 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
arch='GAMAtt2' # Search more
python main.py --name 0411_${dset}_${arch} --dataset ${dset} --random_search 24 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
# (Wait) Rerun sarcos!
for dset in 'sarcos' 'sarcos0' 'sarcos1' 'sarcos2'; do
arch='GAM'
python main.py --name 0411_${dset}_${arch} --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
arch='GAMAtt2' # Search more
python main.py --name 0411_${dset}_${arch} --dataset ${dset} --random_search 30 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
for dset in 'year' 'epsilon' 'microsoft' 'click' 'higgs' 'rossmann' 'yahoo'; do
arch='GAM'
python main.py --name 0411_${dset}_${arch} --dataset ${dset} --random_search 16 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
arch='GAMAtt2' # Search more
python main.py --name 0411_${dset}_${arch} --dataset ${dset} --random_search 24 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
## Annoying! I forget to cut the val set properly :(
# So it affects all classification datasets....
# Starting with new7.....
# Too tired of doing hyperparameter search. Will instead just choose the best hparams from previous search and run the best as 0413!
'0406_mimic2_GAMAtt2_s13_nl2_nt250_td1_d4_od0.1_cs0.5_lr0.01_lo0_la1e-05_da8' \
'0410_adult_GAM_lastl_s46_nl3_nt666_td1_d4_od0.1_cs0.5_lr0.01_lo0_la0.0' \
'0411_mimic3_GAM_s97_nl3_nt1333_td0_d6_od0.2_cs1e-05_lr0.005_lo0_la1e-07' \
'0411_compas_GAM_s32_nl4_nt1000_td0_d2_od0.1_cs0.5_lr0.01_lo0_la0.0' \
'0404_churn_GAMAtt_s93_nl3_nt166_td0_d2_od0.2_cs1e-05_lr0.01_lo0_la1e-07_da8' \
'0411_credit_GAMAtt2_s87_nl5_nt400_td2_d2_od0.2_cs0.1_lr0.01_lo0_la0.0_da8_ll1' \
'0404_support2_GAMAtt_s71_nl5_nt100_td0_d2_od0.2_cs0.5_lr0.005_lo0_la1e-06_da8' \
'0411_wine_GAM_s16_nl3_nt1333_td2_d4_od0.1_cs0.5_lr0.01_lo0_la1e-07' \
for d in \
'0415_support2_GAMAtt2_s62_nl2_nt1000_td0_d6_od0.3_cs1e-05_lr0.01_lo0_la1e-07_pt0_pr0_mn0_da32_ll1' \
; do
postfix=${d:4}
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0416_f${fold}_best${postfix} --load_from_hparams ${d} --fold ${fold}
done
done
# Annoying; try to beat GAMAtt in churn and support2
# Since now I update val set in new7/8, I can not compare them directly.
# What I can do is to rerun things that are great in new val set. Then compare them in test set!!
for d in \
'0404_support2_GAMAtt_s71_nl5_nt100_td0_d2_od0.2_cs0.5_lr0.005_lo0_la1e-06_da8' \
'0404_support2_GAMAtt_s29_nl4_nt1000_td2_d6_od0.3_cs0.5_lr0.01_lo0_la1e-07_da8' \
'0404_churn_GAMAtt_s93_nl3_nt166_td0_d2_od0.2_cs1e-05_lr0.01_lo0_la1e-07_da8' \
'0404_churn_GAMAtt_s23_nl5_nt100_td0_d6_od0.1_cs0.1_lr0.01_lo0_la0.0_da8' \
; do
postfix=${d:4}
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0415${postfix} --load_from_hparams ${d} --arch GAMAtt2
done
for dset in 'churn' 'support2'; do
arch='GAMAtt2' # Search more
python main.py --name 0415_${dset}_${arch} --dataset ${dset} --random_search 40 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
## (R) see which architecture is best for higgs! And do pre-training
## Add new8: random search for pretraining as well
dset='higgs'
for data_subsample in '1e3'; do
arch='GAMAtt2' # Search more
python main.py --name 0414_${dset}_${arch}_ds${data_subsample} --dataset ${dset} --random_search 60 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch} --pretrain 1 --data_subsample ${data_subsample}
arch='GAMAtt2' # Search more
python main.py --name 0414_${dset}_${arch}_ds${data_subsample} --dataset ${dset} --random_search 30 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch} --pretrain 0 --data_subsample ${data_subsample}
done
dset='higgs'
arch='GAMAtt2' # Search more
data_subsample='1e3'
python main.py --name 0416_${dset}_${arch}_ds${data_subsample} --dataset ${dset} --random_search 60 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch} --pretrain 1 --data_subsample ${data_subsample}
# Rerun stuffs here
'0413_f1_best_churn_GAMAtt_s93_nl3_nt166_td0_d2_od0.2_cs1e-05_lr0.01_lo0_la1e-07_da8' \
for d in \
'0413_f0_best_mimic2_GAMAtt2_s13_nl2_nt250_td1_d4_od0.1_cs0.5_lr0.01_lo0_la1e-05_da8' \
; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name $d
done
- I can do a soft-self-attention to make graph sparse/jumpy?
## Compare sarcos:
## - Still, the multi-task is not as good as single task lol
## - Idea: Improve MTL by per-task early stopping by looking at multiple valiation loss?
# Idea1: I can do MTL not just by a weighted linear layer in the end. I can have task-specific GAMTree per-task.
# Idea3: I can add soft weight penalty across weights per task to encourage similar graph.
for d in \
'0308_wine_ebm-o100-i100' \
'0308_wine_ebm-o100-i100_f1' \
'0308_wine_ebm-o100-i100_f2' \
'0308_wine_ebm-o100-i100_f3' \
'0308_wine_ebm-o100-i100_f4' \
; do
cp logs/hparams/$d ./logs/$d/hparams.json
done
for d in logs/0416_higgs*_pt1_*; do
name=${d:5}
echo ${name}
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name ${name}_ft --load_from_pretrain ${name} --lr 0.0005
# postfix=${d:4}
# if [ -a logs/hparams/${d} ]; then
# ./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0404${postfix} --load_from_hparams ${d}
# fi
done
# Rerun tons of things in new val set to see if GAMAtt2 can outperform old GAMAtt
# Also, run more in rossmann to see if I can outperform xgb
for dset in 'year' 'microsoft' 'higgs' 'rossmann'; do
arch='GAMAtt2' # Search more
python main.py --name 0417_${dset}_${arch} --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
# Even less
dset='higgs'
arch='GAMAtt2' # Search more
for data_subsample in '500'; do
python main.py --name 0414_${dset}_${arch}_ds${data_subsample} --dataset ${dset} --random_search 50 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch} --pretrain 1 --data_subsample ${data_subsample}
python main.py --name 0414_${dset}_${arch}_ds${data_subsample} --dataset ${dset} --random_search 30 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch} --pretrain 0 --data_subsample ${data_subsample}
done
for dset in 'year' 'microsoft' 'higgs' 'rossmann'; do
arch='GAMAtt2' # Search more
python main.py --name 0417_${dset}_${arch} --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
# Run sarcos for much less data
data_subsample='1e3'
for dset in 'sarcos' 'sarcos0'; do
arch='GAM'
python main.py --name 0418_${dset}_${data_subsample}_${arch} --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch} --data_subsample ${data_subsample}
arch='GAMAtt2' # Search more
python main.py --name 0418_${dset}_${data_subsample}_${arch} --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch} --data_subsample ${data_subsample}
done
# (TORUN) These 3 datasets have inferior performance. Run more here!
dset='wine'
arch='GAMAtt2'
python main.py --name 0418_${dset}_${arch} --dataset ${dset} --random_search 40 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
dset='compas'
arch='GAMAtt2'
python main.py --name 0418_${dset}_${arch} --dataset ${dset} --random_search 40 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
dset='support2'
arch='GAMAtt2'
python main.py --name 0418_${dset}_${arch} --dataset ${dset} --random_search 40 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
dset='churn'
arch='GAMAtt2'
python main.py --name 0418_${dset}_${arch} --dataset ${dset} --random_search 40 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
dset='wine'
arch='GAM'
python main.py --name 0418_${dset}_${arch} --dataset ${dset} --random_search 25 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
dset='compas'
arch='GAM'
python main.py --name 0418_${dset}_${arch} --dataset ${dset} --random_search 25 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
dset='support2'
arch='GAM'
python main.py --name 0418_${dset}_${arch} --dataset ${dset} --random_search 25 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
dset='churn'
arch='GAM'
python main.py --name 0418_${dset}_${arch} --dataset ${dset} --random_search 25 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
# Note p100! Just to see if last_dropout helps!
for dset in 'mimic2' 'adult'; do
for arch in 'GAM' 'GAMAtt' 'GAMAtt2'; do
python main.py --name 0420_${dset}_${arch} --dataset ${dset} --random_search 20 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch} --partition p100
done
done
# Try to win in these last 2 datasets! And still can not design which is the best
for dset in 'support2' 'wine'; do
for arch in 'GAM' 'GAMAtt' 'GAMAtt2'; do
python main.py --name 0420_${dset}_${arch} --dataset ${dset} --random_search 25 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
for dset in 'rossmann'; do
for arch in 'GAM' 'GAMAtt' 'GAMAtt2'; do
python main.py --name 0420_${dset}_${arch} --dataset ${dset} --random_search 25 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
for dset in 'support2' 'wine'; do
for arch in 'GAM'; do
python main.py --name 0420_${dset}_${arch} --dataset ${dset} --random_search 25 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch}
done
done
#(7) Run more in rossmann (0424)
# - Not much improvement
#Run the final model for support2/wine
dset='support2'
support2_cur_best='0420_support2_GAM_s43_nl4_nt125_td1_d2_od0.1_ld0.0_cs1e-05_lr0.01_lo0_la1e-06_pt0_pr0_mn0_ol0_ll1'
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0420_${dset}_f${fold}_best${postfix} --load_from_hparams ${support2_cur_best} --fold ${fold}
done
dset='wine'
wine_cur_best='0420_wine_GAM_s31_nl5_nt800_td1_d2_od0.0_ld0.1_cs0.5_lr0.005_lo0_la1e-05_pt0_pr0_mn0_ol0_ll1'
for fold in '0' '1' '2' '3' '4'; do
./my_sbatch --cpu 4 --gpus 1 --mem 8 python -u main.py --name 0420_${dset}_f${fold}_best${postfix} --load_from_hparams ${wine_cur_best} --fold ${fold}
done
(5) Run another pretraining loss w/ only 500 samples (0421). Hope it improves!
* Problem should be there is almost no possibility for mask pretraining. Change to MSE
dset='higgs'
arch='GAMAtt2' # Search more
for data_subsample in '500'; do
for pretrain in '2'; do
python main.py --name 0421_${dset}_${arch}_ds${data_subsample} --dataset ${dset} --random_search 30 --cpu 4 --gpus 1 --mem 8 --fp16 1 --arch ${arch} --pretrain ${pretrain} --data_subsample ${data_subsample}
done
done
pretrain='0'
for d in is_running/0421*; do
name=${d:11} # Remove directory name