Skip to content

Commit a30997b

Browse files
Adding local/chain/multi_style/run_tdnn_1a.sh to the branch
1 parent 5849164 commit a30997b

File tree

6 files changed

+803
-0
lines changed

6 files changed

+803
-0
lines changed
Lines changed: 285 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,285 @@
1+
#!/bin/bash
2+
3+
# 7q is as 7p but a modified topology with resnet-style skip connections, more layers,
4+
# skinnier bottlenecks, removing the 3-way splicing and skip-layer splicing,
5+
# and re-tuning the learning rate and l2 regularize. The configs are
6+
# standardized and substantially simplified. There isn't any advantage in WER
7+
# on this setup; the advantage of this style of config is that it also works
8+
# well on smaller datasets, and we adopt this style here also for consistency.
9+
10+
# local/chain/compare_wer_general.sh --rt03 tdnn7p_sp tdnn7q_sp
11+
# System tdnn7p_sp tdnn7q_sp
12+
# WER on train_dev(tg) 11.80 11.79
13+
# WER on train_dev(fg) 10.77 10.84
14+
# WER on eval2000(tg) 14.4 14.3
15+
# WER on eval2000(fg) 13.0 12.9
16+
# WER on rt03(tg) 17.5 17.6
17+
# WER on rt03(fg) 15.3 15.2
18+
# Final train prob -0.057 -0.058
19+
# Final valid prob -0.069 -0.073
20+
# Final train prob (xent) -0.886 -0.894
21+
# Final valid prob (xent) -0.9005 -0.9106
22+
# Num-parameters 22865188 18702628
23+
24+
25+
# steps/info/chain_dir_info.pl exp/chain/tdnn7q_sp
26+
# exp/chain/tdnn7q_sp: num-iters=394 nj=3..16 num-params=18.7M dim=40+100->6034 combine=-0.058->-0.057 (over 8) xent:train/valid[261,393,final]=(-1.20,-0.897,-0.894/-1.20,-0.919,-0.911) logprob:train/valid[261,393,final]=(-0.090,-0.059,-0.058/-0.098,-0.073,-0.073)
27+
28+
set -e
29+
30+
# configs for 'chain'
31+
stage=0
32+
train_stage=-10
33+
get_egs_stage=-10
34+
num_epochs=1
35+
36+
# Augmentation options
37+
multi_style=true
38+
augment_test_set=true
39+
noise_list="reverb:babble:music:noise:clean"
40+
41+
affix=1a
42+
if [ -e data/rt03 ]; then maybe_rt03=rt03; else maybe_rt03= ; fi
43+
44+
decode_iter=
45+
decode_nj=50
46+
47+
# training options
48+
frames_per_eg=150,110,100
49+
remove_egs=false
50+
common_egs_dir=
51+
xent_regularize=0.1
52+
dropout_schedule='0,[email protected],[email protected],0'
53+
54+
test_online_decoding=false # if true, it will run the last decoding stage.
55+
56+
# End configuration section.
57+
echo "$0 $@" # Print the command line for logging
58+
59+
. ./cmd.sh
60+
. ./path.sh
61+
. ./utils/parse_options.sh
62+
63+
suffix=
64+
$multi_style && suffix=_ms
65+
dir=exp/chain/tdnn${affix}${suffix}_ep_${num_epochs}
66+
67+
if ! cuda-compiled; then
68+
cat <<EOF && exit 1
69+
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
70+
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
71+
where "nvcc" is installed.
72+
EOF
73+
fi
74+
75+
# The iVector-extraction and feature-dumping parts are the same as the standard
76+
# nnet3 setup, and you can skip them by setting "--stage 8" if you have already
77+
# run those things.
78+
79+
clean_set=train_nodup
80+
clean_ali=tri4_ali_nodup
81+
train_set=$clean_set$suffix # Will be prepared by the script local/nnet3/prepare_multistyle_data.sh
82+
ali_dir=$clean_ali$suffix
83+
treedir=exp/chain/tri5_7d_tree$suffix
84+
lang=data/lang_chain_2y
85+
86+
87+
# if we are using the speed-perturbed data we need to generate
88+
# alignments for it.
89+
local/nnet3/prepare_multistyle_data.sh --stage $stage \
90+
--multi-style $multi_style \
91+
--generate-alignments $multi_style \
92+
--augment-test-set $augment_test_set \
93+
--noise-list $noise_list \
94+
--train-set $clean_set --clean-ali $clean_ali || exit 1;
95+
96+
if [ $stage -le 9 ]; then
97+
# Get the alignments as lattices (gives the LF-MMI training more freedom).
98+
# use the same num-jobs as the alignments
99+
nj=$(cat exp/tri4_ali_nodup$suffix/num_jobs) || exit 1;
100+
steps/align_fmllr_lats.sh --nj $nj --cmd "$train_cmd" data/${clean_set} \
101+
data/lang exp/tri4 exp/tri4_lats_nodup${suffix}_clean
102+
rm exp/tri4_lats_nodup${suffix}_clean/fsts.*.gz # save space
103+
local/copy_lat_dir.sh --nj $nj --cmd "$train_cmd" \
104+
data/${train_set} exp/tri4_lats_nodup${suffix}_clean exp/tri4_lats_nodup${suffix} || exit 1;
105+
fi
106+
107+
if [ $stage -le 10 ]; then
108+
# Create a version of the lang/ directory that has one state per phone in the
109+
# topo file. [note, it really has two states.. the first one is only repeated
110+
# once, the second one has zero or more repeats.]
111+
rm -rf $lang
112+
cp -r data/lang $lang
113+
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
114+
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
115+
# Use our special topology... note that later on may have to tune this
116+
# topology.
117+
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
118+
fi
119+
120+
if [ $stage -le 11 ]; then
121+
# Build a tree using our new topology. This is the critically different
122+
# step compared with other recipes.
123+
steps/nnet3/chain/build_tree.sh --frame-subsampling-factor 3 \
124+
--context-opts "--context-width=2 --central-position=1" \
125+
--cmd "$train_cmd" 7000 data/$train_set $lang $ali_dir $treedir
126+
fi
127+
128+
if [ $stage -le 12 ]; then
129+
echo "$0: creating neural net configs using the xconfig parser";
130+
131+
num_targets=$(tree-info $treedir/tree |grep num-pdfs|awk '{print $2}')
132+
learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
133+
affine_opts="l2-regularize=0.01 dropout-proportion=0.0 dropout-per-dim=true dropout-per-dim-continuous=true"
134+
tdnnf_opts="l2-regularize=0.01 dropout-proportion=0.0 bypass-scale=0.66"
135+
linear_opts="l2-regularize=0.01 orthonormal-constraint=-1.0"
136+
prefinal_opts="l2-regularize=0.01"
137+
output_opts="l2-regularize=0.002"
138+
139+
mkdir -p $dir/configs
140+
141+
cat <<EOF > $dir/configs/network.xconfig
142+
input dim=100 name=ivector
143+
input dim=40 name=input
144+
145+
# please note that it is important to have input layer with the name=input
146+
# as the layer immediately preceding the fixed-affine-layer to enable
147+
# the use of short notation for the descriptor
148+
fixed-affine-layer name=lda input=Append(-1,0,1,ReplaceIndex(ivector, t, 0)) affine-transform-file=$dir/configs/lda.mat
149+
150+
# the first splicing is moved before the lda layer, so no splicing here
151+
relu-batchnorm-dropout-layer name=tdnn1 $affine_opts dim=1536
152+
tdnnf-layer name=tdnnf2 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
153+
tdnnf-layer name=tdnnf3 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
154+
tdnnf-layer name=tdnnf4 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=1
155+
tdnnf-layer name=tdnnf5 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=0
156+
tdnnf-layer name=tdnnf6 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
157+
tdnnf-layer name=tdnnf7 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
158+
tdnnf-layer name=tdnnf8 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
159+
tdnnf-layer name=tdnnf9 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
160+
tdnnf-layer name=tdnnf10 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
161+
tdnnf-layer name=tdnnf11 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
162+
tdnnf-layer name=tdnnf12 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
163+
tdnnf-layer name=tdnnf13 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
164+
tdnnf-layer name=tdnnf14 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
165+
tdnnf-layer name=tdnnf15 $tdnnf_opts dim=1536 bottleneck-dim=160 time-stride=3
166+
linear-component name=prefinal-l dim=256 $linear_opts
167+
168+
prefinal-layer name=prefinal-chain input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
169+
output-layer name=output include-log-softmax=false dim=$num_targets $output_opts
170+
171+
prefinal-layer name=prefinal-xent input=prefinal-l $prefinal_opts big-dim=1536 small-dim=256
172+
output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor $output_opts
173+
EOF
174+
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
175+
fi
176+
177+
if [ $stage -le 13 ]; then
178+
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
179+
utils/create_split_dir.pl \
180+
/export/b0{5,6,7,8}/$USER/kaldi-data/egs/swbd-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
181+
fi
182+
183+
# --cmd "queue.pl --config /home/dpovey/queue_conly.conf" \
184+
185+
186+
steps/nnet3/chain/train.py --stage $train_stage \
187+
--cmd "$train_cmd" \
188+
--feat.online-ivector-dir exp/nnet3/ivectors_${train_set} \
189+
--feat.cmvn-opts "--norm-means=false --norm-vars=false" \
190+
--chain.xent-regularize $xent_regularize \
191+
--chain.leaky-hmm-coefficient 0.1 \
192+
--chain.l2-regularize 0.0 \
193+
--chain.apply-deriv-weights false \
194+
--chain.lm-opts="--num-extra-lm-states=2000" \
195+
--trainer.dropout-schedule $dropout_schedule \
196+
--trainer.add-option="--optimization.memory-compression-level=2" \
197+
--egs.dir "$common_egs_dir" \
198+
--egs.stage $get_egs_stage \
199+
--egs.opts "--frames-overlap-per-eg 0 --constrained false" \
200+
--egs.chunk-width $frames_per_eg \
201+
--trainer.num-chunk-per-minibatch 64 \
202+
--trainer.frames-per-iter 1500000 \
203+
--trainer.num-epochs $num_epochs \
204+
--trainer.optimization.num-jobs-initial 3 \
205+
--trainer.optimization.num-jobs-final 16 \
206+
--trainer.optimization.initial-effective-lrate 0.00025 \
207+
--trainer.optimization.final-effective-lrate 0.000025 \
208+
--trainer.max-param-change 2.0 \
209+
--cleanup.remove-egs $remove_egs \
210+
--feat-dir data/${train_set}_hires \
211+
--tree-dir $treedir \
212+
--lat-dir exp/tri4_lats_nodup$suffix \
213+
--dir $dir || exit 1;
214+
215+
fi
216+
217+
if [ $stage -le 14 ]; then
218+
# Note: it might appear that this $lang directory is mismatched, and it is as
219+
# far as the 'topo' is concerned, but this script doesn't read the 'topo' from
220+
# the lang directory.
221+
utils/mkgraph.sh --self-loop-scale 1.0 data/lang_sw1_tg $dir $dir/graph_sw1_tg
222+
fi
223+
224+
225+
graph_dir=$dir/graph_sw1_tg
226+
iter_opts=
227+
if [ ! -z $decode_iter ]; then
228+
iter_opts=" --iter $decode_iter "
229+
fi
230+
if [ $stage -le 15 ]; then
231+
rm $dir/.error 2>/dev/null || true
232+
for decode_set in train_dev eval2000 $maybe_rt03; do
233+
(
234+
steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
235+
--nj $decode_nj --cmd "$decode_cmd" $iter_opts \
236+
--online-ivector-dir exp/nnet3/ivectors_${decode_set} \
237+
$graph_dir data/${decode_set}_hires \
238+
$dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1;
239+
if $has_fisher; then
240+
steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
241+
data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \
242+
$dir/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1;
243+
fi
244+
) || touch $dir/.error &
245+
done
246+
wait
247+
if [ -f $dir/.error ]; then
248+
echo "$0: something went wrong in decoding"
249+
exit 1
250+
fi
251+
fi
252+
253+
if $test_online_decoding && [ $stage -le 16 ]; then
254+
# note: if the features change (e.g. you add pitch features), you will have to
255+
# change the options of the following command line.
256+
steps/online/nnet3/prepare_online_decoding.sh \
257+
--mfcc-config conf/mfcc_hires.conf \
258+
$lang exp/nnet3/extractor $dir ${dir}_online
259+
260+
rm $dir/.error 2>/dev/null || true
261+
for decode_set in train_dev eval2000 $maybe_rt03; do
262+
(
263+
# note: we just give it "$decode_set" as it only uses the wav.scp, the
264+
# feature type does not matter.
265+
266+
steps/online/nnet3/decode.sh --nj $decode_nj --cmd "$decode_cmd" \
267+
--acwt 1.0 --post-decode-acwt 10.0 \
268+
$graph_dir data/${decode_set}_hires \
269+
${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_tg || exit 1;
270+
if $has_fisher; then
271+
steps/lmrescore_const_arpa.sh --cmd "$decode_cmd" \
272+
data/lang_sw1_{tg,fsh_fg} data/${decode_set}_hires \
273+
${dir}_online/decode_${decode_set}${decode_iter:+_$decode_iter}_sw1_{tg,fsh_fg} || exit 1;
274+
fi
275+
) || touch $dir/.error &
276+
done
277+
wait
278+
if [ -f $dir/.error ]; then
279+
echo "$0: something went wrong in decoding"
280+
exit 1
281+
fi
282+
fi
283+
284+
285+
exit 0;

egs/swbd/s5c/local/copy_ali_dir.sh

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#!/bin/bash
2+
3+
noise_list="reverb1:babble:music:noise"
4+
max_jobs_run=50
5+
nj=100
6+
cmd=queue.pl
7+
write_binary=true
8+
9+
. ./path.sh
10+
. utils/parse_options.sh
11+
12+
if [ $# -ne 3 ]; then
13+
echo "Usage: $0 <out-data> <src-ali-dir> <out-ali-dir>"
14+
exit 1
15+
fi
16+
17+
data=$1
18+
src_dir=$2
19+
dir=$3
20+
21+
mkdir -p $dir
22+
23+
num_jobs=$(cat $src_dir/num_jobs)
24+
25+
rm -f $dir/ali_tmp.*.{ark,scp} 2>/dev/null
26+
27+
# Copy the alignments temporarily
28+
echo "creating temporary alignments in $dir"
29+
$cmd --max-jobs-run $max_jobs_run JOB=1:$num_jobs $dir/log/copy_ali_temp.JOB.log \
30+
copy-int-vector --binary=$write_binary \
31+
"ark:gunzip -c $src_dir/ali.JOB.gz |" \
32+
ark,scp:$dir/ali_tmp.JOB.ark,$dir/ali_tmp.JOB.scp || exit 1
33+
34+
# Make copies of utterances for perturbed data
35+
utt_prefixes=`echo $noise_list | awk -F ":" '{for (i=1; i<=NF; i++) printf "%s- ", $i}'`
36+
for p in $utt_prefixes; do
37+
cat $dir/ali_tmp.*.scp | awk -v p=$p '{print p$0}'
38+
done | sort -k1,1 > $dir/ali_out.scp.noise
39+
40+
cat $dir/ali_tmp.*.scp | awk '{print $0}' | sort -k1,1 > $dir/ali_out.scp.clean
41+
42+
cat $dir/ali_out.scp.clean $dir/ali_out.scp.noise | sort -k1,1 > $dir/ali_out.scp
43+
44+
utils/split_data.sh ${data} $nj
45+
46+
# Copy and dump the lattices for perturbed data
47+
echo Creating alignments for augmented data by copying alignments from clean data
48+
$cmd --max-jobs-run $max_jobs_run JOB=1:$nj $dir/log/copy_out_ali.JOB.log \
49+
copy-int-vector --binary=$write_binary \
50+
"scp:utils/filter_scp.pl ${data}/split$nj/JOB/utt2spk $dir/ali_out.scp |" \
51+
"ark:| gzip -c > $dir/ali.JOB.gz" || exit 1
52+
53+
#rm $dir/ali_out.scp.{prefix,clean} $dir/ali_out.scp
54+
rm $dir/ali_tmp.*
55+
56+
echo $nj > $dir/num_jobs
57+
58+
for f in cmvn_opts tree splice_opts phones.txt final.mdl splice_opts tree frame_subsampling_factor; do
59+
if [ -f $src_dir/$f ]; then cp $src_dir/$f $dir/$f; fi
60+
done

0 commit comments

Comments
 (0)