We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent b5385b4 commit 98aa1d8Copy full SHA for 98aa1d8
egs/gale_arabic/s5c/local/prepare_dict_subword.sh
@@ -48,7 +48,7 @@ glossaries="<UNK> <sil>"
48
if [ $stage -le 0 ]; then
49
echo "$0: making subword lexicon... $(date)."
50
# get pair_code file
51
- cut -d ' ' -f2- data/train/text | sed 's/<[^>]*>//g' | utils/lang/bpe/learn_bpe.py -s $num_merges > data/local/pair_code.txt
+ cut -d ' ' -f2- data/train/text | sed 's/<sil>//g;s/<UNK>//g' | utils/lang/bpe/learn_bpe.py -s $num_merges > data/local/pair_code.txt
52
mv $dir/lexicon.txt $dir/lexicon_word.txt
53
# get words
54
cut -d ' ' -f1 $dir/lexicon_word.txt > $dir/words.txt
0 commit comments