-
Notifications
You must be signed in to change notification settings - Fork 0
/
config.500k.yaml
92 lines (71 loc) · 2.41 KB
/
config.500k.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
############ options ####################
#generate_null_rule : yes
#comma_as_dead_span : yes
#use_mmap : yes
#remove_alignments: yes
# separate lm for lexical/normal fragment
#double_lm : yes
############ Rule Extraction ############
# source side with parsed dependency tree
# support format : stanford parser ( tag,base dependency )
file_source_tree : /poisson2/home2/raphael/ntcir10/data.500k/data.en.tree
file_source_dep : /poisson2/home2/raphael/ntcir10/data.500k/data.en.dep
# taget side with plain text
file_target : /poisson2/home2/raphael/ntcir10/data.500k/data.ja
# aligment file
file_alignment : /poisson2/home2/raphael/ntcir10/data.500k/aligned.grow-diag-final-and
# lexical probability table
file_lex_e2f : /poisson2/home2/raphael/ntcir10/data.500k/lex.e2f
file_lex_f2e : /poisson2/home2/raphael/ntcir10/data.500k/lex.f2e
# output file of rules extracted
rule_table_path : /poisson2/home2/raphael/ntcir10/gentile.500k.model
# language model setting
file_lm : /poisson2/home2/raphael/ntcir10/lm/ja.full.binary
max_gram : 5
x_as_tag : no
max_merge_nodes : 3
max_tokens : 6
min_deep_extract_terminals : 3
max_transaction_rate : 3
############ Probability Estimate ############
# input file of rules_extracted
dispersion_tables : 10
max_rules_for_each_source: 2000
############ Decoder ############
debug : no
#input file of rule table is same as file_rules_final
#input translation data
file_translation_input_tree : /poisson2/home2/raphael/ntcir10/data.test.100/data.en.tree
file_translation_input_dep : /poisson2/home2/raphael/ntcir10/data.test.100/data.en.dep
# output translation data
file_translation_output : /poisson2/home2/raphael/ntcir10/data.test.100/gentile.500k.ja
# the number of nbest in the mode of nbest output , make sure it bigger than 0;
nbest : 50
size_cube_pruning : 1000
size_beam : 1000
# max non-terminals in tree reconstruction
reconstruction_max_nt : 2
# weight settings
weights :
- 0.00504458302615129
- 0.162896643596612
- 0.0364258778007254
- 0.134336627483515
- 0.100918404310669
- -0.0338177273228751
- -0.252108407937042
- 0.074201366359231
- 0.200250362163178
# #Statistical Features (in ruletable):
# # Balance CountFE Pf2e Pe2f Frequency frag_penalty strength
# - 0.188971952669076
# - 0.188971952669076
# - 0.188971952669076
# - 0.181349660986841
# - 0.13934387193861
# - 0.0172401560409634
# - -0.0314811258340791
# #CONTEXTMATCHED
# - 0.328480456831666
# # languagemodel
# - 0.313132775698764