forked from PasaLab/forestlayer
/
uci_adult.py
61 lines (50 loc) · 1.8 KB
/
uci_adult.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# -*- coding:utf-8 -*-
"""
UCI_ADULT Example.
"""
# Copyright 2017 Authors NJU PASA BigData Laboratory.
# Authors: Qiu Hu <huqiu00#163.com>
# License: Apache-2.0
from __future__ import print_function
from forestlayer.datasets import uci_adult
from forestlayer.layers import Graph, AutoGrowingCascadeLayer
from forestlayer.utils.storage_utils import get_data_save_base
from forestlayer.estimators.estimator_configs import ExtraRandomForestConfig, RandomForestConfig
import forestlayer as fl
import time
import numpy as np
import os.path as osp
fl.init()
(x_train, y_train, x_test, y_test) = uci_adult.load_data()
start_time = time.time()
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
print(x_train.shape[1], 'features')
est_configs = [
ExtraRandomForestConfig(),
ExtraRandomForestConfig(),
ExtraRandomForestConfig(),
ExtraRandomForestConfig(),
RandomForestConfig(),
RandomForestConfig(),
RandomForestConfig(),
RandomForestConfig(),
]
agc = AutoGrowingCascadeLayer(est_configs=est_configs,
early_stopping_rounds=4,
max_layers=0,
stop_by_test=True,
n_classes=2,
data_save_rounds=0,
data_save_dir=osp.join(get_data_save_base(), 'uci_adult', 'auto_cascade'),
keep_in_mem=False,
distribute=True,
dis_level=2,
verbose_dis=False,
dtype=np.float32,
seed=0)
model = Graph()
model.add(agc)
model.fit_transform(x_train, y_train, x_test, y_test)
end_time = time.time()
print('time cost: {}'.format(end_time - start_time))