-
Notifications
You must be signed in to change notification settings - Fork 2
/
config.yaml
167 lines (167 loc) · 2.85 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
generator:
name: MSCodecLM
config:
encoder_dim: 32
sample_rate: 16000
encoder_rates:
- 3
- 4
- 5
- 8
latent_dim: 512
decoder_dim: 1536
decoder_rates:
- 8
- 5
- 4
- 3
vq_strides:
- 4
- 2
- 1
noise: true
depthwise: true
use_cblinear: true
attn_window_size: 8
local_embedding_path: embed_llama2.pt
global_embedding_path: layer1.pth
d_list:
- mfd
mfd:
name: MultiFrequencyDiscriminator
config:
hop_lengths:
- 32
- 64
- 128
- 256
- 512
- 1024
hidden_channels:
- 64
- 128
- 256
- 512
- 512
- 512
domain: double
mel_scale: true
sample_rate: 16000
mpd:
name: MultiPeriodDiscriminator
config:
period_sizes:
- 2
- 3
- 5
- 7
- 11
period_kernel_size: 5
msd:
name: MultiScaleDiscriminator
config:
num_scales: 3
pool_kernel_size: 4
pool_stride: 2
optimizer:
g:
name: AdamW
config:
lr: 0.0002
betas:
- 0.8
- 0.99
eps: 1.0e-06
d:
name: AdamW
config:
lr: 0.0002
betas:
- 0.8
- 0.99
eps: 1.0e-06
lr_scheduler:
g:
name: ExponentialLR
config:
gamma: 0.999
d:
name: ExponentialLR
config:
gamma: 0.999
criterion:
g_criterion:
name: losses.generator_loss.GeneratorSTFTLoss
config:
use_mel_loss: false
adv_criterion: MSEGLoss
mel_loss_weight: 45
use_feature_match: true
feat_match_loss_weight: 20
use_full_stft_loss: true
use_sub_stft_loss: true
full_stft_loss_weight: 1
sub_stft_loss_weight: 1
mel_scale_loss:
sampling_rate: 16000
n_fft: 1024
num_mels: 80
hop_size: 160
win_size: 800
fmin: 0
full_multi_scale_stft_loss:
fft_sizes:
- 512
- 1024
- 2048
win_sizes:
- 480
- 960
- 1200
hop_sizes:
- 120
- 240
- 300
sub_multi_scale_stft_loss:
num_bands: 6
fft_sizes:
- 128
- 256
- 256
win_sizes:
- 80
- 120
- 200
hop_sizes:
- 20
- 40
- 50
d_criterion:
name: losses.discriminator_loss.MSEDiscriminatorLoss
config: null
commit_loss_weight: 1.0
training_file: train.scp
validation_file: val.scp
seed: 2333
cudnn_deterministic: false
tensorboard: true
checkpoint_interval: 5000
summary_interval: 100
validation_interval: 5000
num_epoches: 500
print_freq: 10
discriminator_iter_start: 0
num_ckpt_keep: 10
segment_size: 57600
audio_norm_scale: 0.95
batch_size: 8
num_workers: 8
num_plots: 8
local_rank: -1
basic_model_config: config/msc16k.yaml
exp_model_config: null
log_dir: log_g_voc
hop_length: 2000
ngpus_per_node: 2
sample_rate: 16000
model_ckpt_dir: log_g_voc/model_ckpts