-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathppl.sh
62 lines (54 loc) · 2.42 KB
/
ppl.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env bash
# This script gets perplexity (PPL) of a text given a languge model (LM).
# The script computes PPL for bunch of texts given bunch of LMs.
#echo ${#}
# if [ $# -ne 3 ]; then
# echo "usage: ${0} test_dir lm_dir ppl_dir";
# exit -1;
# fi
################ generic function code ############################
function computer_ppl {
test_dir=${1} # input dir
lm_dir=${2} # LMs dir
ppl_dir=${3} # output dir
for test_text in ${test_dir}
do
test_name=$(basename $test_text)
printf "test text: %s\n---------\n\n" ${test_name}
for lm in ${lm_dir}
do
lm_name=$(basename $lm)
printf "applying %s\n" ${lm_name}
ngram -lm ${lm} -ppl ${test_text} > ${ppl_dir}/${lm_name}_${test_name}.ppl
printf "%s\n" "------------------"
done
printf "%s\n" "====================================="
printf "%s\n" "====================================="
for ppl_file in ${ppl_dir}/*${test_name}.ppl
do
ppl=`grep ppl= ${ppl_file} | awk '{ printf ("%.1f\n", $6)}'`
oov=`grep OOVs ${ppl_file} | awk '{ print $7}'`
words=`grep words ${ppl_file} | awk '{ print $5}'`
oov_rate=$(python -c "print($oov/float($words)*100)")
printf "%s ppl\t %.1f %%oov\t %s\n" ${ppl} ${oov_rate} ${ppl_file} \
>> ${ppl_dir}/${test_name}_results.txt
done
sort -n ${ppl_dir}/${test_name}_results.txt > \
${ppl_dir}${test_name}_results_sorted.txt
done
################ end of generic function code ############################
# You can change here accoridng to your paths
test_dir="asr-test/test_txt/*" # input dir
ppl_dir="asr-test/ppl_dir" # output dir
lm_dir="lms/*gt100_pruned8*" # LMs dir
final_result_dir="asr-test"
outfile="lm_ppl.result"
computer_ppl ${test_dir} ${ppl_dir} ${lm_dir}
# sort results for each corpus and collected it in the outfile
cat ${ppl_dir}/*jsc*sorted.txt > ${final_result_dir}/${outfile}
echo "--------------------------------" >> ${final_result_dir}/${outfile}
cat ${ppl_dir}/*kacst500*sorted.txt >> ${final_result_dir}/${outfile}
echo "--------------------------------" >> ${final_result_dir}/${outfile}
cat ${ppl_dir}/*N7_02*sorted.txt >> ${final_result_dir}/${outfile}
echo "--------------------------------" >> ${final_result_dir}/${outfile}
cat ${ppl_dir}/*N7_04*sorted.txt >> ${final_result_dir}/${outfile}