Skip to content

Commit 60f2bcf

Browse files
authored
Merge pull request #2 from kaldi-asr/master
pull from master
2 parents 240f0e4 + 7906590 commit 60f2bcf

23 files changed

+93
-70
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Compiled extensionless executable files in /src/*/
22
# This stanza must precede wildcard patterns below!
33
/src/*/*
4+
!/src/lm/test_data/
45
!/src/*/?*.*
56
!/src/doc/*
67
!/src/*/Makefile

egs/fisher_swbd/s5/local/chain/compare_wer_general.sh

100644100755
File mode changed.

egs/fisher_swbd/s5/local/chain/run_blstm_6h.sh

100644100755
File mode changed.

egs/fisher_swbd/s5/local/chain/run_blstm_6j.sh

100644100755
File mode changed.

egs/fisher_swbd/s5/local/chain/run_tdnn_7b.sh

100644100755
File mode changed.

egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1a.sh

100644100755
File mode changed.

egs/fisher_swbd/s5/local/chain/run_tdnn_lstm_1b.sh

100644100755
File mode changed.

egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1a.sh

100644100755
File mode changed.

egs/fisher_swbd/s5/local/chain/run_tdnn_opgru_1b.sh

100644100755
File mode changed.

egs/librispeech/s5/local/chain/tuning/run_tdnn_1b.sh

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -173,8 +173,6 @@ if [ $stage -le 15 ]; then
173173
/export/b0{5,6,7,8}/$USER/kaldi-data/egs/librispeech-$(date +'%m_%d_%H_%M')/s5c/$dir/egs/storage $dir/egs/storage
174174
fi
175175

176-
touch $dir/egs/.nodelete # keep egs around when that run dies.
177-
178176
steps/nnet3/chain/train.py --stage $train_stage \
179177
--cmd "$decode_cmd" \
180178
--feat.online-ivector-dir $train_ivector_dir \

egs/swbd/s5c/local/nnet3/run_ivector_common.sh

Lines changed: 17 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -4,49 +4,38 @@
44
set -e
55
stage=1
66
train_stage=-10
7-
generate_alignments=true # false if doing ctc training
7+
generate_alignments=true
88
speed_perturb=true
99

1010
. ./path.sh
1111
. ./utils/parse_options.sh
1212

13-
mkdir -p nnet3
14-
# perturbed data preparation
13+
mkdir -p exp/nnet3
1514
train_set=train_nodup
1615

1716
if [ -e data/rt03 ]; then maybe_rt03=rt03; else maybe_rt03= ; fi
1817

19-
if [ "$speed_perturb" == "true" ]; then
18+
if $speed_perturb; then
2019
if [ $stage -le 1 ]; then
21-
#Although the nnet will be trained by high resolution data, we still have to perturbe the normal data to get the alignment
20+
# Although the nnet will be trained by high resolution data, we still have to perturb the normal data to get the alignments
2221
# _sp stands for speed-perturbed
23-
24-
for datadir in train_nodup; do
25-
utils/perturb_data_dir_speed.sh 0.9 data/${datadir} data/temp1
26-
utils/perturb_data_dir_speed.sh 1.1 data/${datadir} data/temp2
27-
utils/combine_data.sh data/${datadir}_tmp data/temp1 data/temp2
28-
utils/validate_data_dir.sh --no-feats data/${datadir}_tmp
29-
rm -r data/temp1 data/temp2
30-
31-
mfccdir=mfcc_perturbed
32-
steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
33-
data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
34-
steps/compute_cmvn_stats.sh data/${datadir}_tmp exp/make_mfcc/${datadir}_tmp $mfccdir || exit 1;
35-
utils/fix_data_dir.sh data/${datadir}_tmp
36-
37-
utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- data/${datadir} data/temp0
38-
utils/combine_data.sh data/${datadir}_sp data/${datadir}_tmp data/temp0
39-
utils/fix_data_dir.sh data/${datadir}_sp
40-
rm -r data/temp0 data/${datadir}_tmp
41-
done
22+
echo "$0: preparing directory for speed-perturbed data"
23+
utils/data/perturb_data_dir_speed_3way.sh data/${train_set} data/${train_set}_sp
24+
25+
echo "$0: creating MFCC features for low-resolution speed-perturbed data"
26+
mfccdir=mfcc_perturbed
27+
steps/make_mfcc.sh --cmd "$train_cmd" --nj 50 \
28+
data/${train_set}_sp exp/make_mfcc/${train_set}_sp $mfccdir
29+
steps/compute_cmvn_stats.sh data/${train_set}_sp exp/make_mfcc/${train_set}_sp $mfccdir
30+
utils/fix_data_dir.sh data/${train_set}_sp
4231
fi
4332

44-
if [ $stage -le 2 ] && [ "$generate_alignments" == "true" ]; then
45-
#obtain the alignment of the perturbed data
33+
if [ $stage -le 2 ] && $generate_alignments; then
34+
# obtain the alignment of the perturbed data
4635
steps/align_fmllr.sh --nj 100 --cmd "$train_cmd" \
47-
data/train_nodup_sp data/lang exp/tri4 exp/tri4_ali_nodup_sp || exit 1
36+
data/${train_set}_sp data/lang exp/tri4 exp/tri4_ali_nodup_sp
4837
fi
49-
train_set=train_nodup_sp
38+
train_set=${train_set}_sp
5039
fi
5140

5241
if [ $stage -le 3 ]; then

egs/wsj/s5/steps/cleanup/lattice_oracle_align.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,9 @@ fi
7272
nj=$(cat $latdir/num_jobs)
7373
oov=$(cat $lang/oov.int)
7474

75-
utils/split_data.sh --per-utt $data $nj
75+
utils/split_data.sh $data $nj
7676

77-
sdata=$data/split${nj}utt
77+
sdata=$data/split$nj;
7878

7979
if [ $stage -le 1 ]; then
8080
$cmd JOB=1:$nj $dir/log/get_oracle.JOB.log \

egs/wsj/s5/utils/data/get_uniform_subsegments.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ def run(args):
8787
else:
8888
end = end_time
8989
new_utt = "{utt_id}-{s:08d}-{e:08d}".format(
90-
utt_id=utt_id, s=int(100 * (start - start_time)),
91-
e=int(100 * (end - start_time)))
90+
utt_id=utt_id, s=int(round(100 * (start - start_time))),
91+
e=int(round(100 * (end - start_time))))
9292
print ("{new_utt} {utt_id} {s} {e}".format(
9393
new_utt=new_utt, utt_id=utt_id, s=start - start_time,
9494
e=end - start_time))

egs/wsj/s5/utils/data/perturb_data_dir_speed_3way.sh

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,17 @@ utils/data/get_utt2dur.sh ${srcdir}
3838

3939
utils/data/perturb_data_dir_speed.sh 0.9 ${srcdir} ${destdir}_speed0.9 || exit 1
4040
utils/data/perturb_data_dir_speed.sh 1.1 ${srcdir} ${destdir}_speed1.1 || exit 1
41-
utils/data/combine_data.sh $destdir ${srcdir} ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1
4241

43-
rm -r ${destdir}_speed0.9 ${destdir}_speed1.1
42+
utils/copy_data_dir.sh --spk-prefix sp1.0- --utt-prefix sp1.0- ${srcdir} ${destdir}_speed1.0
43+
if [ ! -f $srcdir/utt2uniq ]; then
44+
cat $srcdir/utt2spk | awk '{printf("sp1.0-%s %s\n", $1, $1);}' > ${destdir}_speed1.0/utt2uniq
45+
else
46+
cat $srcdir/utt2uniq | awk '{printf("sp1.0-%s %s\n", $1, $2);}' > ${destdir}_speed1.0/utt2uniq
47+
fi
48+
49+
utils/data/combine_data.sh $destdir ${destdir}_speed1.0 ${destdir}_speed0.9 ${destdir}_speed1.1 || exit 1
50+
51+
rm -r ${destdir}_speed0.9 ${destdir}_speed1.1 ${destdir}_speed1.0
4452

4553
echo "$0: generated 3-way speed-perturbed version of data in $srcdir, in $destdir"
4654
utils/validate_data_dir.sh --no-feats --no-text $destdir

src/base/io-funcs.cc

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,14 @@ int PeekToken(std::istream &is, bool binary) {
178178
}
179179
int ans = is.peek();
180180
if (read_bracket) {
181-
if (!is.unget())
181+
if (!is.unget()) {
182182
KALDI_WARN << "Error ungetting '<' in PeekToken";
183+
// Clear the bad bit. It seems to be possible for this code to be
184+
// reached, and the C++ standard is very vague on whether even a single
185+
// call to unget() should succeed; see
186+
// http://www.cplusplus.com/reference/istream/istream/unget/
187+
is.clear();
188+
}
183189
}
184190
return ans;
185191
}
@@ -197,7 +203,12 @@ void ExpectToken(std::istream &is, bool binary, const char *token) {
197203
KALDI_ERR << "Failed to read token [started at file position "
198204
<< pos_at_start << "], expected " << token;
199205
}
200-
if (strcmp(str.c_str(), token) != 0) {
206+
// The second half of the '&&' expression below is so that if we're expecting
207+
// "<Foo>", we will accept "Foo>" instead. This is so that the model-reading
208+
// code will tolerate errors in PeekToken where is.unget() failed; search for
209+
// is.clear() in PeekToken() for an explanation.
210+
if (strcmp(str.c_str(), token) != 0 &&
211+
!(token[0] == '<' && strcmp(str.c_str(), token + 1) == 0)) {
201212
KALDI_ERR << "Expected token \"" << token << "\", got instead \""
202213
<< str <<"\".";
203214
}

src/doc/chain.dox

Lines changed: 1 addition & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -389,34 +389,7 @@ on the paths.
389389
You might notice in the current example scripts that we use iVectors. We do so
390390
just because they generally help a bit, and because the baseline setup we were
391391
comparing with, uses them. There is no inherent connection with 'chain'
392-
models, and no fundamental requirement to use them. Actually we want to get rid
393-
of them (see below).
394-
395-
396-
\section chain_next_steps Next steps (TODOs) with 'chain' models
397-
398-
(Note: this list is valid as of Dec 13 2015, but may become out of date).
399-
Things we need to do (and that we'd like help with) are:
400-
- Supply example scripts (and tune them) on a wide range of corpora
401-
(It will be interesting to see whether there are scale-dependent effects
402-
affecting how well this model works).
403-
- Create and tune LSTM and BLSTM versions of the training script. (This
404-
may involve some playing around with learning rate schedules and
405-
configurations).
406-
- Figure out how to speed up the forward-backward part of the computation.
407-
(E.g. using state-level pruning, or just by optimizing the current kernels or
408-
data structures).
409-
410-
A longer-term TODO, which Dan should do, is to create an online decoding setup
411-
for these models. Actually this isn't really distinct from nnet3 online
412-
decoding in general, since the models are no different from regular nnet3
413-
acoustic models. But we do have to decide whether to continue to support
414-
iVectors-- getting rid of them would simplify the setup considerably, and
415-
would hopefully make it more robust. We are hoping that with LSTMs, since it
416-
already sees quite a wide acoustic context, iVector adaptation will no longer
417-
be as helpful and could be dropped. We also have other ideas how to
418-
incorporate adaptation as part of the neural network, without the use of
419-
iVectors. This will require some experimentation.
392+
models, and no fundamental requirement to use them.
420393

421394

422395
*/

src/lm/arpa-lm-compiler-test.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,17 @@ bool ScoringTest(bool seps, const string &infile, const string& sentence,
204204
return ok;
205205
}
206206

207+
bool ThrowsExceptionTest(bool seps, const string &infile) {
208+
try {
209+
// Make memory cleanup easy in both cases of try-catch block.
210+
std::unique_ptr<ArpaLmCompiler> compiler(Compile(seps, infile));
211+
return false;
212+
} catch (const std::runtime_error&) {
213+
// Kaldi throws only std::runtime_error in kaldi-error.cc
214+
return true;
215+
}
216+
}
217+
207218
} // namespace kaldi
208219

209220
bool RunAllTests(bool seps) {
@@ -214,6 +225,9 @@ bool RunAllTests(bool seps) {
214225

215226
ok &= kaldi::ScoringTest(seps, "test_data/input.arpa", "b b b a", 59.2649);
216227
ok &= kaldi::ScoringTest(seps, "test_data/input.arpa", "a b", 4.36082);
228+
229+
ok &= kaldi::ThrowsExceptionTest(seps, "test_data/missing_bos.arpa");
230+
217231
if (!ok) {
218232
KALDI_WARN << "Tests " << (seps ? "with" : "without")
219233
<< " epsilon substitution FAILED";

src/lm/arpa-lm-compiler.cc

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -360,10 +360,18 @@ void ArpaLmCompiler::RemoveRedundantStates() {
360360
<< fst_.NumStates();
361361
}
362362

363+
void ArpaLmCompiler::Check() const {
364+
if (fst_.Start() == fst::kNoStateId) {
365+
KALDI_ERR << "Arpa file did not contain the beginning-of-sentence symbol "
366+
<< Symbols()->Find(Options().bos_symbol) << ".";
367+
}
368+
}
369+
363370
void ArpaLmCompiler::ReadComplete() {
364371
fst_.SetInputSymbols(Symbols());
365372
fst_.SetOutputSymbols(Symbols());
366373
RemoveRedundantStates();
374+
Check();
367375
}
368376

369377
} // namespace kaldi

src/lm/arpa-lm-compiler.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ class ArpaLmCompiler : public ArpaFileParser {
5252
// this function removes states that only have a backoff arc coming
5353
// out of them.
5454
void RemoveRedundantStates();
55+
void Check() const;
5556

5657
int sub_eps_;
5758
ArpaLmCompilerImplInterface* impl_; // Owned.

src/lm/test_data/missing_bos.arpa

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
2+
\data\
3+
ngram 1=3
4+
ngram 2=1
5+
ngram 3=1
6+
7+
\1-grams:
8+
-5.234679 a -3.3
9+
-3.456783 b -3.0
10+
-4.333333 </s>
11+
12+
\2-grams:
13+
-1.45678 a b -3.23
14+
15+
\3-grams:
16+
-0.23940 a b </s>
17+
18+
\end\

src/nnet3/nnet-chain-training.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ NnetChainTrainer::NnetChainTrainer(const NnetChainTrainingOptions &opts,
3737
if (opts.nnet_config.zero_component_stats)
3838
ZeroComponentStats(nnet);
3939
KALDI_ASSERT(opts.nnet_config.momentum >= 0.0 &&
40-
opts.nnet_config.max_param_change >= 0.0);
40+
opts.nnet_config.max_param_change >= 0.0 &&
41+
opts.nnet_config.backstitch_training_interval > 0);
4142
delta_nnet_ = nnet_->Copy();
4243
ScaleNnet(0.0, delta_nnet_);
4344
const int32 num_updatable = NumUpdatableComponents(*delta_nnet_);

src/nnet3/nnet-simple-component.cc

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2854,8 +2854,8 @@ void NaturalGradientAffineComponent::Read(std::istream &is, bool binary) {
28542854
}
28552855
std::string token;
28562856
ReadToken(is, binary, &token);
2857-
if (token != "<NaturalGradientAffineComponent>" &&
2858-
token != "</NaturalGradientAffineComponent>")
2857+
// the following has to handle a couple variants of
2858+
if (token.find("NaturalGradientAffineComponent>") == std::string::npos)
28592859
KALDI_ERR << "Expected <NaturalGradientAffineComponent> or "
28602860
<< "</NaturalGradientAffineComponent>, got " << token;
28612861
SetNaturalGradientConfigs();

src/nnet3/nnet-training.cc

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,8 @@ NnetTrainer::NnetTrainer(const NnetTrainerOptions &config,
3434
if (config.zero_component_stats)
3535
ZeroComponentStats(nnet);
3636
KALDI_ASSERT(config.momentum >= 0.0 &&
37-
config.max_param_change >= 0.0);
37+
config.max_param_change >= 0.0 &&
38+
config.backstitch_training_interval > 0);
3839
delta_nnet_ = nnet_->Copy();
3940
ScaleNnet(0.0, delta_nnet_);
4041
const int32 num_updatable = NumUpdatableComponents(*delta_nnet_);

0 commit comments

Comments
 (0)