From c4f4ed3f5cffb4af88845981ad3eb4c39886d6f4 Mon Sep 17 00:00:00 2001 From: Baiju Meswani Date: Mon, 16 Sep 2024 21:55:03 +0000 Subject: [PATCH] Address PR review comments --- src/models/model.cpp | 19 ------------------- src/models/model.h | 3 --- 2 files changed, 22 deletions(-) diff --git a/src/models/model.cpp b/src/models/model.cpp index 03b7131ad..9711f10b7 100644 --- a/src/models/model.cpp +++ b/src/models/model.cpp @@ -183,25 +183,6 @@ std::vector Tokenizer::DecodeBatch(std::span sequenc return strings; } -std::vector Tokenizer::GetDecoderPromptIds(size_t batch_size, const std::string& language, - const std::string& task, int32_t no_timestamps) const { - ort_extensions::OrtxObjectPtr prompt_ids; - CheckResult(OrtxGetDecoderPromptIds(tokenizer_, batch_size, language.c_str(), - task.c_str(), no_timestamps, ort_extensions::ptr(prompt_ids))); - - std::vector> tokens_vector; - std::vector> span_sequences; - for (size_t i = 0; i < batch_size; i++) { - const extTokenId_t* tokens = nullptr; - size_t token_count = 0; - CheckResult(OrtxTokenId2DArrayGetItem(prompt_ids.get(), i, &tokens, &token_count)); - tokens_vector.emplace_back(tokens, tokens + token_count); - span_sequences.emplace_back(tokens_vector.back()); - } - - return PadInputs(span_sequences, pad_token_id_); -} - int32_t Tokenizer::TokenToTokenId(const char* token) const { extTokenId_t token_id; CheckResult(OrtxConvertTokenToId(tokenizer_, token, &token_id)); diff --git a/src/models/model.h b/src/models/model.h index 9e0215413..91537cc1d 100644 --- a/src/models/model.h +++ b/src/models/model.h @@ -76,9 +76,6 @@ struct Tokenizer : std::enable_shared_from_this, LeakChecked EncodeBatch(std::span strings) const; std::vector DecodeBatch(std::span sequences, size_t count) const; - std::vector GetDecoderPromptIds(size_t batch_size, const std::string& language, - const std::string& task, int32_t no_timestamps) const; - int32_t TokenToTokenId(const char* token) const; OrtxPtr tokenizer_;