Skip to content

Commit

Permalink
feat: 切换默认声学模型到m251bn
Browse files Browse the repository at this point in the history
  • Loading branch information
nl8590687 committed Mar 27, 2022
1 parent f19b207 commit 087f51f
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 28 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ $ python3 client_http.py

请注意,开启API服务器之后,需要使用本ASRT项目对应的客户端软件来进行语音识别,详见Wiki文档[下载ASRT语音识别客户端SDK和Demo](https://wiki.ailemon.net/docs/asrt-doc/download)

如果要训练和使用非251版模型,请在代码中 `import speech_model_zoo` 的相应位置做修改。
如果要训练和使用非251bn版模型,请在代码中 `import speech_model_zoo` 的相应位置做修改。

使用docker直接部署ASRT:
```shell
Expand Down
2 changes: 1 addition & 1 deletion README_EN.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ To test whether it is successful or not that calls api service interface:
$ python3 client_http.py
```

If you want to train and use other model(not Model 251), make changes in the corresponding position of the `import speech_model_zoo` in the code files.
If you want to train and use other model(not Model 251bn), make changes in the corresponding position of the `import speech_model_zoo` in the code files.

If there is any problem during the execution of the program or during use, it can be promptly put forward in the issue, and I will reply as soon as possible.

Expand Down
8 changes: 4 additions & 4 deletions asrserver.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import http.server
import socket
from speech_model import ModelSpeech
from speech_model_zoo import SpeechModel251
from speech_model_zoo import SpeechModel251BN
from speech_features import Spectrogram
from LanguageModel2 import ModelLanguage

Expand All @@ -35,13 +35,13 @@
CHANNELS = 1
# 默认输出的拼音的表示大小是1428,即1427个拼音+1个空白块
OUTPUT_SIZE = 1428
sm251 = SpeechModel251(
sm251bn = SpeechModel251BN(
input_shape=(AUDIO_LENGTH, AUDIO_FEATURE_LENGTH, CHANNELS),
output_size=OUTPUT_SIZE
)
feat = Spectrogram()
ms = ModelSpeech(sm251, feat, max_label_length=64)
ms.load_model('save_models/' + sm251.get_model_name() + '.model.h5')
ms = ModelSpeech(sm251bn, feat, max_label_length=64)
ms.load_model('save_models/' + sm251bn.get_model_name() + '.model.h5')

ml = ModelLanguage('model_language')
ml.LoadModel()
Expand Down
22 changes: 15 additions & 7 deletions asrserver_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,37 +23,43 @@
ASRT语音识别基于HTTP协议的API服务器程序
"""

import argparse
import base64
import json
from flask import Flask, Response, request

from speech_model import ModelSpeech
from speech_model_zoo import SpeechModel251
from speech_model_zoo import SpeechModel251BN
from speech_features import Spectrogram
from LanguageModel2 import ModelLanguage
from utils.ops import decode_wav_bytes

API_STATUS_CODE_OK = 200000 # OK
API_STATUS_CODE_CLIENT_ERROR = 400000
API_STATUS_CODE_CLIENT_ERROR_FORMAT = 400001 # 请求数据格式错误
API_STATUS_CODE_CLIENT_ERROR_FORMAT = 400002 # 请求数据配置不支持
API_STATUS_CODE_CLIENT_ERROR_CONFIG = 400002 # 请求数据配置不支持
API_STATUS_CODE_SERVER_ERROR = 500000
API_STATUS_CODE_SERVER_ERROR_RUNNING = 500001 # 服务器运行中出错

parser = argparse.ArgumentParser(description='ASRT HTTP+Json RESTful API Service')
parser.add_argument('--listen', default='0.0.0.0', type=str, help='the network to listen')
parser.add_argument('--port', default='20001', type=str, help='the port to listen')
args = parser.parse_args()

app = Flask("ASRT API Service")

AUDIO_LENGTH = 1600
AUDIO_FEATURE_LENGTH = 200
CHANNELS = 1
# 默认输出的拼音的表示大小是1428,即1427个拼音+1个空白块
OUTPUT_SIZE = 1428
sm251 = SpeechModel251(
sm251bn = SpeechModel251BN(
input_shape=(AUDIO_LENGTH, AUDIO_FEATURE_LENGTH, CHANNELS),
output_size=OUTPUT_SIZE
)
feat = Spectrogram()
ms = ModelSpeech(sm251, feat, max_label_length=64)
ms.load_model('save_models/' + sm251.get_model_name() + '.model.h5')
ms = ModelSpeech(sm251bn, feat, max_label_length=64)
ms.load_model('save_models/' + sm251bn.get_model_name() + '.model.h5')

ml = ModelLanguage('model_language')
ml.LoadModel()
Expand Down Expand Up @@ -149,7 +155,7 @@ def recognition_post(level):
json_data = AsrtApiResponse(API_STATUS_CODE_OK, 'all level')
json_data.result = result
buffer = json_data.to_json()
print('output:', buffer)
print('ASRT Result:', result,'output:', buffer)
return Response(buffer, mimetype='application/json')
else:
request_data = request.get_json()
Expand All @@ -165,6 +171,8 @@ def recognition_post(level):
# request_data['samples'][-100:])
json_data = AsrtApiResponse(API_STATUS_CODE_SERVER_ERROR, str(except_general))
buffer = json_data.to_json()
#print("input:", request_data, "\n", "output:", buffer)
print("output:", buffer, "error:", except_general)
return Response(buffer, mimetype='application/json')


Expand All @@ -173,4 +181,4 @@ def recognition_post(level):
#app.run(host='0.0.0.0', port=20001)
# for production env
import waitress
waitress.serve(app, host='0.0.0.0', port=20001)
waitress.serve(app, host=args.listen, port=args.port)
8 changes: 4 additions & 4 deletions evaluate_speech_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import os

from speech_model import ModelSpeech
from speech_model_zoo import SpeechModel251
from speech_model_zoo import SpeechModel251BN
from data_loader import DataLoader
from speech_features import Spectrogram

Expand All @@ -37,14 +37,14 @@
CHANNELS = 1
# 默认输出的拼音的表示大小是1428,即1427个拼音+1个空白块
OUTPUT_SIZE = 1428
sm251 = SpeechModel251(
sm251bn = SpeechModel251BN(
input_shape=(AUDIO_LENGTH, AUDIO_FEATURE_LENGTH, CHANNELS),
output_size=OUTPUT_SIZE
)
feat = Spectrogram()
evalue_data = DataLoader('dev')
ms = ModelSpeech(sm251, feat, max_label_length=64)
ms = ModelSpeech(sm251bn, feat, max_label_length=64)

ms.load_model('save_models/' + sm251.get_model_name() + '.model.h5')
ms.load_model('save_models/' + sm251bn.get_model_name() + '.model.h5')
ms.evaluate_model(data_loader=evalue_data, data_count=-1,
out_report=True, show_ratio=True, show_per_step=100)
8 changes: 4 additions & 4 deletions predict_speech_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
import os

from speech_model import ModelSpeech
from speech_model_zoo import SpeechModel251
from speech_model_zoo import SpeechModel251BN
from speech_features import Spectrogram
from LanguageModel2 import ModelLanguage

Expand All @@ -37,14 +37,14 @@
CHANNELS = 1
# 默认输出的拼音的表示大小是1428,即1427个拼音+1个空白块
OUTPUT_SIZE = 1428
sm251 = SpeechModel251(
sm251bn = SpeechModel251BN(
input_shape=(AUDIO_LENGTH, AUDIO_FEATURE_LENGTH, CHANNELS),
output_size=OUTPUT_SIZE
)
feat = Spectrogram()
ms = ModelSpeech(sm251, feat, max_label_length=64)
ms = ModelSpeech(sm251bn, feat, max_label_length=64)

ms.load_model('save_models/' + sm251.get_model_name() + '.model.h5')
ms.load_model('save_models/' + sm251bn.get_model_name() + '.model.h5')
res = ms.recognize_speech_from_file('filename.wav')
print('*[提示] 声学模型语音识别结果:\n', res)

Expand Down
14 changes: 7 additions & 7 deletions train_speech_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
from tensorflow.keras.optimizers import Adam

from speech_model import ModelSpeech
from speech_model_zoo import SpeechModel251
from speech_model_zoo import SpeechModel251BN
from data_loader import DataLoader
from speech_features import Spectrogram
from speech_features import SpecAugment

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

Expand All @@ -39,16 +39,16 @@
CHANNELS = 1
# 默认输出的拼音的表示大小是1428,即1427个拼音+1个空白块
OUTPUT_SIZE = 1428
sm251 = SpeechModel251(
sm251bn = SpeechModel251BN(
input_shape=(AUDIO_LENGTH, AUDIO_FEATURE_LENGTH, CHANNELS),
output_size=OUTPUT_SIZE
)
feat = Spectrogram()
feat = SpecAugment()
train_data = DataLoader('train')
opt = Adam(lr = 0.0001, beta_1 = 0.9, beta_2 = 0.999, decay = 0.0, epsilon = 10e-8)
ms = ModelSpeech(sm251, feat, max_label_length=64)
ms = ModelSpeech(sm251bn, feat, max_label_length=64)

#ms.load_model('save_models/' + sm251.get_model_name() + '.model.h5')
#ms.load_model('save_models/' + sm251bn.get_model_name() + '.model.h5')
ms.train_model(optimizer=opt, data_loader=train_data,
epochs=50, save_step=1, batch_size=16, last_epoch=0)
ms.save_model('save_models/' + sm251.get_model_name())
ms.save_model('save_models/' + sm251bn.get_model_name())

0 comments on commit 087f51f

Please sign in to comment.