Skip to content

Commit

Permalink
small lm fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
Pherkel committed Sep 18, 2023
1 parent f945067 commit f6e6377
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
2 changes: 1 addition & 1 deletion swr2_asr/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -360,4 +360,4 @@ def create_lexicon(vocab_counts_path, lexicon_path):
file.write(f"{word} ")
for char in word:
file.write(char + " ")
file.write("<SPACE>")
file.write("<SPACE>\n")
8 changes: 5 additions & 3 deletions swr2_asr/utils/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,11 @@ def get_beam_search_decoder(
)

if not os.path.isdir(os.path.join(lang_model_path, f"mls_lm_{language}")):
url = f"https://dl.fbaipublicfiles.com/mls/mls_lm_{language}.tar.gz"
torch.hub.download_url_to_file(url, f"data/mls_lm_{language}.tar.gz")
_extract_tar("data/mls_lm_{language}.tar.gz", overwrite=True)
# check if zip file exists
if not os.path.isfile(f"data/mls_lm_{language}.tar.gz"):
url = f"https://dl.fbaipublicfiles.com/mls/mls_lm_{language}.tar.gz"
torch.hub.download_url_to_file(url, f"data/mls_lm_{language}.tar.gz")
_extract_tar(f"data/mls_lm_{language}.tar.gz", overwrite=True)

tokens_path = os.path.join(lang_model_path, f"mls_lm_{language}", "tokens.txt")
if not os.path.isfile(tokens_path):
Expand Down

0 comments on commit f6e6377

Please sign in to comment.