/
run_turs_given_data.py
75 lines (56 loc) · 2.3 KB
/
run_turs_given_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from exp_utils import *
import os
# global parameters
max_num_bin_numeric = 100
flt_format = '%.3f'
max_dp = 20
append_to_current_csv = False
max_num_subgroups = 500
max_iter = 500
random_st = 1
k_fold = 10
print("start:", max_num_bin_numeric, " bins")
if len(sys.argv) == 3:
data_chosen = sys.argv[1]
cv_fold = int(sys.argv[2])
else:
data_chosen = sys.argv[1]
cv_fold = None
if cv_fold is None:
file_name = "./res/" + data_chosen + ".csv"
else:
file_name = "./res/" + data_chosen + "_fold" + str(cv_fold) + ".csv"
print("filename: ", file_name)
# full result pandas dataframe
res_pd_full = pd.DataFrame()
# get the datasets
all_files = os.listdir("./datasets")
print("all_files:", all_files)
datasets_path = []
datasets_names = []
for file in all_files:
if '.csv' in file or '.txt' in file or '.data' in file:
datasets_path.append("./datasets/" + file)
datasets_names.append( str.split(file, ".")[0] )
datasets_without_header_row = ["waveform", "backnote", "chess", "contracept", "iris", "ionosphere",
"magic", "car", "tic-tac-toe", "wine", "breast", "transfusion", "poker"]
datasets_with_header_row = ["avila", "anuran", "diabetes", "sepsis"]
for data_name, data_path in zip(datasets_names, datasets_path):
if data_name != data_chosen:
continue
print("start: ", data_name, "at ", data_path, "===================")
if data_name in datasets_without_header_row:
d = pd.read_csv(data_path, header=None)
elif data_name in datasets_with_header_row:
d = pd.read_csv(data_path)
else:
# sys.exit("error: data name not in the datasets lists that show whether the header should be included!")
print(data_name, "not in the folder!")
num_cores = 5
res_pd_this_data = RunExp.run_cars_beam(d, random_st, max_dp, max_iter, max_num_bin_numeric, max_num_subgroups,
k_fold=k_fold, data_name=data_name,
num_cores=num_cores, nbeam=5, num_initial_rule_list=5,
onlyonefold=False, cv_fold=cv_fold)
print(res_pd_this_data)
res_pd_full = pd.concat([res_pd_full, res_pd_this_data], ignore_index=True)
res_pd_full.to_csv(path_or_buf=file_name, float_format=flt_format, index=False)