Skip to content

Commit

Permalink
deleted a TODO
Browse files Browse the repository at this point in the history
  • Loading branch information
JoJoBarthold2 committed Sep 6, 2023
1 parent 9bb4ffb commit cbbe597
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions swr2_asr/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def decode(self, labels: list[int], remove_special_tokens: bool = True):
"""
string = []
for i in labels:
if remove_special_tokens and self.index_map[f"{i}"] == "<UNK>":
if remove_special_tokens and self.index_map[f"{i}"] == "<UNK>":
continue
if remove_special_tokens and self.index_map[f"{i}"] == "<SPACE>":
string.append(" ")
Expand Down Expand Up @@ -329,7 +329,7 @@ def train_bpe_tokenizer(
"ü",
]

# TODO: add padding token / whitespace token / special tokens

trainer = BpeTrainer(
special_tokens=["[UNK]"],
vocab_size=vocab_size,
Expand Down

0 comments on commit cbbe597

Please sign in to comment.