fix ds2 bugs

mindspore-lab · Nov 15, 2023 · f210594 · f210594
1 parent 53ee157
commit f210594
Show file tree

Hide file tree

Showing 3 changed files with 8 additions and 19 deletions.
diff --git a/examples/deepspeech2/README_CN.md b/examples/deepspeech2/README_CN.md
@@ -34,16 +34,15 @@ DeepSpeech2是一种采用CTC损失训练的语音识别模型。它用神经网
 如为未下载数据集，可使用提供的脚本进行一键下载以及数据准备，如下所示：
 
 ```shell
-cd mindaudio/data
 # Download and creat json
-python librispeech_prepare.py --root_path "your_data_path"
+python mindaudio/data/librispeech.py --root_path "your_data_path"
 ```
 
 如已下载好压缩文件，请按如下命令操作：
 
 ```shell
 # creat json
-python librispeech_prepare.py --root_path "your_data_path"  --data_ready True
+python mindaudio/data/librispeech.py --root_path "your_data_path"  --data_ready True
 ```
 
 LibriSpeech存储flac音频格式的文件。要在MindAudio中使用它们，须将所有flac文件转换为wav文件，用户可以使用[ffmpeg](https://gist.github.com/seungwonpark/4f273739beef2691cd53b5c39629d830)或[sox](https://sourceforge.net/projects/sox/)进行转换。
@@ -94,19 +93,13 @@ mpirun -n 8 python train.py -c "./deepspeech2.yaml"
 mpirun --allow-run-as-root -n 8 python train.py -c "./deepspeech2.yaml"
 ```
 
-#### 在GPU上进行多卡训练
-If you want to use the GPU for distributed training, see the following command：
-```shell
-# Distribute_training
-# assume you have 8 GPUs
-mpirun -n 8 python train.py -c "./deepspeech2.yaml" --device_target "GPU"
-```
 
 ### 3.评估模型
 
+将训好的权重地址更新在deepspeech2.yaml配置文件Pretrained_model中，执行以下命令
 ```shell
 # Validate a trained model
-python eval.py -c "./deepspeech2.yaml" --pre_trained_model_path "xx.ckpt"
+python eval.py -c "./deepspeech2.yaml"
 ```
 
 

diff --git a/examples/deepspeech2/deepspeech2.yaml b/examples/deepspeech2/deepspeech2.yaml
@@ -45,7 +45,7 @@ EvalConfig:
     save_output: 'librispeech_val_output'
 
 # use to finetune or eval model
-Pretrained_model: './ckpt'
+Pretrained_model: ''
 
 labels: ["'", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M",
          "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", " ", "_"]
diff --git a/examples/deepspeech2/eval.py b/examples/deepspeech2/eval.py
@@ -6,12 +6,12 @@
 import mindspore.ops as ops
 import numpy as np
 from dataset import create_dataset
-from hparams import parse_args
 from mindspore import context, nn
 from mindspore.train.serialization import load_checkpoint, load_param_into_net
 
 from mindaudio.models.decoders.greedydecoder import MSGreedyDecoder
 from mindaudio.models.deepspeech2 import DeepSpeechModel
+from mindaudio.utils.hparams import parse_args
 
 
 class PredictWithSoftmax(nn.Cell):
@@ -73,10 +73,7 @@ def construct(self, inputs, input_length):
     load_param_into_net(model, param_dict)
     print("Successfully loading the pre-trained model")
 
-    if args.Decoder_type == "greedy":
-        decoder = MSGreedyDecoder(labels=labels, blank_index=labels.index("_"))
-    else:
-        raise NotImplementedError("Only greedy decoder is supported now")
+    decoder = MSGreedyDecoder(labels=labels, blank_index=labels.index("_"))
     target_decoder = MSGreedyDecoder(labels, blank_index=labels.index("_"))
 
     model.set_train(False)
@@ -106,8 +103,7 @@ def construct(self, inputs, input_length):
         decoded_output, _ = decoder.decode(out, output_sizes)
         target_strings = target_decoder.convert_to_strings(split_targets)
 
-        if args.save_output is not None:
-            output_data.append((out.asnumpy(), output_sizes.asnumpy(), target_strings))
+        output_data.append((out.asnumpy(), output_sizes.asnumpy(), target_strings))
         for doutput, toutput in zip(decoded_output, target_strings):
             transcript, reference = doutput[0], toutput[0]
             wer_inst = decoder.wer(transcript, reference)