Skip to content

Commit

Permalink
Add cloud translation support with multiple providers and configurati… (
Browse files Browse the repository at this point in the history
#183)

* Add cloud translation support with multiple providers and configuration options

* Refactor CMakeLists.txt for cloud translation sources formatting

* Add support for translating only full sentences in cloud translation

* Update ICU build configuration and fix header include case sensitivity

* Fix CURL helper function signatures and improve URL encoding

* Fix character type casting in DeepLTranslator for language conversion

* Refactor file saving logic in transcription filter to streamline sentence handling and add support for saving translated sentences

* Add support for Deepl Free API endpoint and enhance cloud translation configuration

* Add ccache detection to ICU build configuration for improved compilation speed

* Enhance ICU build configuration to use ccache as a compiler wrapper for improved performance
  • Loading branch information
royshil authored Nov 25, 2024
1 parent b7ab6a9 commit 04a6f6a
Show file tree
Hide file tree
Showing 28 changed files with 1,798 additions and 73 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,8 @@ target_sources(
src/translation/translation-language-utils.cpp
src/ui/filter-replace-dialog.cpp)

add_subdirectory(src/translation/cloud-translation)

set_target_properties_plugin(${CMAKE_PROJECT_NAME} PROPERTIES OUTPUT_NAME ${_name})

if(ENABLE_TESTS)
Expand Down
20 changes: 16 additions & 4 deletions cmake/BuildICU.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -48,26 +48,38 @@ if(WIN32)
"${ICU_LIB_${lib}}")
endforeach()
else()
# Add ccache detection at the start
find_program(CCACHE_PROGRAM ccache)
if(CCACHE_PROGRAM)
message(STATUS "Found ccache: ${CCACHE_PROGRAM}")
# Create compiler wrapper commands
set(C_LAUNCHER "${CCACHE_PROGRAM} ${CMAKE_C_COMPILER}")
set(CXX_LAUNCHER "${CCACHE_PROGRAM} ${CMAKE_CXX_COMPILER}")
endif()

set(ICU_URL
"https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION_DASH}/icu4c-${ICU_VERSION_UNDERSCORE}-src.tgz"
)
set(ICU_HASH "SHA256=cb968df3e4d2e87e8b11c49a5d01c787bd13b9545280fc6642f826527618caef")
if(APPLE)
set(ICU_PLATFORM "MacOSX")
set(TARGET_ARCH -arch\ $ENV{MACOS_ARCH})
set(ICU_BUILD_ENV_VARS CFLAGS=${TARGET_ARCH} CXXFLAGS=${TARGET_ARCH} LDFLAGS=${TARGET_ARCH})
set(ICU_BUILD_ENV_VARS CFLAGS=${TARGET_ARCH} CXXFLAGS=${TARGET_ARCH} LDFLAGS=${TARGET_ARCH} CC=${C_LAUNCHER}
CXX=${CXX_LAUNCHER})
else()
set(ICU_PLATFORM "Linux")
set(ICU_BUILD_ENV_VARS CFLAGS=-fPIC CXXFLAGS=-fPIC LDFLAGS=-fPIC)
set(ICU_BUILD_ENV_VARS CFLAGS=-fPIC CXXFLAGS=-fPIC LDFLAGS=-fPIC CC=${C_LAUNCHER} CXX=${CXX_LAUNCHER})
endif()

ExternalProject_Add(
ICU_build
DOWNLOAD_EXTRACT_TIMESTAMP true
GIT_REPOSITORY "https://github.com/unicode-org/icu.git"
GIT_TAG "release-${ICU_VERSION_DASH}"
CONFIGURE_COMMAND ${CMAKE_COMMAND} -E env ${ICU_BUILD_ENV_VARS} <SOURCE_DIR>/icu4c/source/runConfigureICU
${ICU_PLATFORM} --prefix=<INSTALL_DIR> --enable-static --disable-shared
CONFIGURE_COMMAND
${CMAKE_COMMAND} -E env ${ICU_BUILD_ENV_VARS} <SOURCE_DIR>/icu4c/source/runConfigureICU ${ICU_PLATFORM}
--prefix=<INSTALL_DIR> --enable-static --disable-shared --disable-tools --disable-samples --disable-layout
--disable-layoutex --disable-tests --disable-draft --disable-extras --disable-icuio
BUILD_COMMAND make -j4
BUILD_BYPRODUCTS
<INSTALL_DIR>/lib/${CMAKE_STATIC_LIBRARY_PREFIX}icudata${CMAKE_STATIC_LIBRARY_SUFFIX}
Expand Down
25 changes: 24 additions & 1 deletion data/locale/en-US.ini
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ whisper_sampling_method="Whisper Sampling Method"
n_threads="Number of threads"
n_max_text_ctx="Max text context"
translate="Translate"
translate_local="Local Translation"
translate_cloud="Cloud Translation"
no_context="No context"
single_segment="Single segment"
print_special="Print special"
Expand Down Expand Up @@ -75,6 +77,11 @@ general_group="General"
transcription_group="Transcription"
file_output_group="File Output Configuration"
translate_explaination="Enabling translation will increase the processing load on your machine, This feature uses additional resources to translate content in real-time, which may impact performance. <a href='#'>Learn More</a>"
translate_cloud_explaination="Cloud translation requires an active internet connection and API keys to the translation provider."
translate_cloud_provider="Translation Provider"
translate_cloud_only_full_sentences="Translate only full sentences"
translate_cloud_api_key="Access Key"
translate_cloud_secret_key="Secret Key"
log_group="Logging"
advanced_group="Advanced Configuration"
buffered_output_parameters="Buffered Output Configuration"
Expand All @@ -89,4 +96,20 @@ translate_only_full_sentences="Translate only full sentences"
duration_filter_threshold="Duration filter"
segment_duration="Segment duration"
n_context_sentences="# Context sentences"
max_sub_duration="Max. sub duration (ms)"
max_sub_duration="Max. sub duration (ms)"
Google-Cloud-Translation="Google Cloud Translation"
Microsoft-Translator="Microsoft Azure Translator"
Amazon-Translate="AWS Translate"
IBM-Watson-Translate="IBM Watson Translate"
Yandex-Translate="Yandex Translate"
Baidu-Translate="Baidu Translate"
Tencent-Translate="Tencent Translate"
Alibaba-Translate="Alibaba Translate"
Naver-Translate="Naver Translate"
Kakao-Translate="Kakao Translate"
Papago-Translate="Papago"
Deepl-Translate="Deepl"
Bing-Translate="Bing Translate"
OpenAI-Translate="OpenAI"
Claude-Translate="Claude"
translate_cloud_deepl_free="Use Deepl Free API Endpoint"
184 changes: 125 additions & 59 deletions src/transcription-filter-callbacks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "whisper-utils/whisper-utils.h"
#include "whisper-utils/whisper-model-utils.h"
#include "translation/language_codes.h"
#include "translation/cloud-translation/translation-cloud.h"

void send_caption_to_source(const std::string &target_source_name, const std::string &caption,
struct transcription_filter_data *gf)
Expand Down Expand Up @@ -80,30 +81,60 @@ std::string send_sentence_to_translation(const std::string &sentence,
return "";
}

void send_sentence_to_cloud_translation_async(const std::string &sentence,
struct transcription_filter_data *gf,
const std::string &source_language,
std::function<void(const std::string &)> callback)
{
std::thread([sentence, gf, source_language, callback]() {
const std::string last_text = gf->last_text_for_cloud_translation;
gf->last_text_for_cloud_translation = sentence;
if (gf->translate_cloud && !sentence.empty()) {
obs_log(gf->log_level, "Translating text with cloud provider %s. %s -> %s",
gf->translate_cloud_provider.c_str(), source_language.c_str(),
gf->translate_cloud_target_language.c_str());
std::string translated_text;
if (sentence == last_text) {
// do not translate the same sentence twice
callback(gf->last_text_cloud_translation);
return;
}
CloudTranslatorConfig config;
config.provider = gf->translate_cloud_provider;
config.access_key = gf->translate_cloud_api_key;
config.secret_key = gf->translate_cloud_secret_key;
config.free = gf->translate_cloud_deepl_free;
config.region = gf->translate_cloud_region;

translated_text = translate_cloud(config, sentence,
gf->translate_cloud_target_language,
source_language);
if (!translated_text.empty()) {
if (gf->log_words) {
obs_log(LOG_INFO, "Cloud Translation: '%s' -> '%s'",
sentence.c_str(), translated_text.c_str());
}
gf->last_text_translation = translated_text;
callback(translated_text);
return;
} else {
obs_log(gf->log_level, "Failed to translate text");
}
}
callback("");
}).detach();
}

void send_sentence_to_file(struct transcription_filter_data *gf,
const DetectionResultWithText &result, const std::string &str_copy,
const std::string &translated_sentence)
const DetectionResultWithText &result, const std::string &sentence,
const std::string &file_path, bool bump_sentence_number)
{
// Check if we should save the sentence
if (gf->save_only_while_recording && !obs_frontend_recording_active()) {
// We are not recording, do not save the sentence to file
return;
}

std::string translated_file_path = "";
bool write_translations = gf->translate && !translated_sentence.empty();

// if translation is enabled, save the translated sentence to another file
if (write_translations) {
// add a postfix to the file name (without extension) with the translation target language
std::string output_file_path = gf->output_file_path;
std::string file_extension =
output_file_path.substr(output_file_path.find_last_of(".") + 1);
std::string file_name =
output_file_path.substr(0, output_file_path.find_last_of("."));
translated_file_path = file_name + "_" + gf->target_lang + "." + file_extension;
}

// should the file be truncated?
std::ios_base::openmode openmode = std::ios::out;
if (gf->truncate_output_file) {
Expand All @@ -114,15 +145,9 @@ void send_sentence_to_file(struct transcription_filter_data *gf,
if (!gf->save_srt) {
// Write raw sentence to file
try {
std::ofstream output_file(gf->output_file_path, openmode);
output_file << str_copy << std::endl;
std::ofstream output_file(file_path, openmode);
output_file << sentence << std::endl;
output_file.close();
if (write_translations) {
std::ofstream translated_output_file(translated_file_path,
openmode);
translated_output_file << translated_sentence << std::endl;
translated_output_file.close();
}
} catch (const std::ofstream::failure &e) {
obs_log(LOG_ERROR, "Exception opening/writing/closing file: %s", e.what());
}
Expand All @@ -133,9 +158,9 @@ void send_sentence_to_file(struct transcription_filter_data *gf,
}

obs_log(gf->log_level, "Saving sentence to file %s, sentence #%d",
gf->output_file_path.c_str(), gf->sentence_number);
file_path.c_str(), gf->sentence_number);
// Append sentence to file in .srt format
std::ofstream output_file(gf->output_file_path, openmode);
std::ofstream output_file(file_path, openmode);
output_file << gf->sentence_number << std::endl;
// use the start and end timestamps to calculate the start and end time in srt format
auto format_ts_for_srt = [](std::ofstream &output_stream, uint64_t ts) {
Expand All @@ -156,28 +181,34 @@ void send_sentence_to_file(struct transcription_filter_data *gf,
format_ts_for_srt(output_file, result.end_timestamp_ms);
output_file << std::endl;

output_file << str_copy << std::endl;
output_file << sentence << std::endl;
output_file << std::endl;
output_file.close();

if (write_translations) {
obs_log(gf->log_level, "Saving translation to file %s, sentence #%d",
translated_file_path.c_str(), gf->sentence_number);

// Append translated sentence to file in .srt format
std::ofstream translated_output_file(translated_file_path, openmode);
translated_output_file << gf->sentence_number << std::endl;
format_ts_for_srt(translated_output_file, result.start_timestamp_ms);
translated_output_file << " --> ";
format_ts_for_srt(translated_output_file, result.end_timestamp_ms);
translated_output_file << std::endl;

translated_output_file << translated_sentence << std::endl;
translated_output_file << std::endl;
translated_output_file.close();
if (bump_sentence_number) {
gf->sentence_number++;
}
}
}

gf->sentence_number++;
void send_translated_sentence_to_file(struct transcription_filter_data *gf,
const DetectionResultWithText &result,
const std::string &translated_sentence,
const std::string &target_lang)
{
// if translation is enabled, save the translated sentence to another file
if (translated_sentence.empty()) {
obs_log(gf->log_level, "Translation is empty, not saving to file");
} else {
// add a postfix to the file name (without extension) with the translation target language
std::string translated_file_path = "";
std::string output_file_path = gf->output_file_path;
std::string file_extension =
output_file_path.substr(output_file_path.find_last_of(".") + 1);
std::string file_name =
output_file_path.substr(0, output_file_path.find_last_of("."));
translated_file_path = file_name + "_" + target_lang + "." + file_extension;
send_sentence_to_file(gf, result, translated_sentence, translated_file_path, false);
}
}

Expand Down Expand Up @@ -235,41 +266,76 @@ void set_text_callback(struct transcription_filter_data *gf,
}
}

bool should_translate =
bool should_translate_local =
gf->translate_only_full_sentences ? result.result == DETECTION_RESULT_SPEECH : true;

// send the sentence to translation (if enabled)
std::string translated_sentence =
should_translate ? send_sentence_to_translation(str_copy, gf, result.language) : "";
std::string translated_sentence_local =
should_translate_local ? send_sentence_to_translation(str_copy, gf, result.language)
: "";

if (gf->translate) {
if (gf->translation_output == "none") {
// overwrite the original text with the translated text
str_copy = translated_sentence;
str_copy = translated_sentence_local;
} else {
if (gf->buffered_output) {
// buffered output - add the sentence to the monitor
gf->translation_monitor.addSentenceFromStdString(
translated_sentence,
translated_sentence_local,
get_time_point_from_ms(result.start_timestamp_ms),
get_time_point_from_ms(result.end_timestamp_ms),
result.result == DETECTION_RESULT_PARTIAL);
} else {
// non-buffered output - send the sentence to the selected source
send_caption_to_source(gf->translation_output, translated_sentence,
gf);
send_caption_to_source(gf->translation_output,
translated_sentence_local, gf);
}
}
if (gf->save_to_file && gf->output_file_path != "") {
send_translated_sentence_to_file(gf, result, translated_sentence_local,
gf->target_lang);
}
}

if (gf->buffered_output) {
gf->captions_monitor.addSentenceFromStdString(
str_copy, get_time_point_from_ms(result.start_timestamp_ms),
get_time_point_from_ms(result.end_timestamp_ms),
result.result == DETECTION_RESULT_PARTIAL);
} else {
// non-buffered output - send the sentence to the selected source
send_caption_to_source(gf->text_source_name, str_copy, gf);
bool should_translate_cloud = (gf->translate_cloud_only_full_sentences
? result.result == DETECTION_RESULT_SPEECH
: true) &&
gf->translate_cloud;

if (should_translate_cloud) {
send_sentence_to_cloud_translation_async(
str_copy, gf, result.language,
[gf, result](const std::string &translated_sentence_cloud) {
if (gf->translate_cloud_output != "none") {
send_caption_to_source(gf->translate_cloud_output,
translated_sentence_cloud, gf);
} else {
// overwrite the original text with the translated text
send_caption_to_source(gf->text_source_name,
translated_sentence_cloud, gf);
}
if (gf->save_to_file && gf->output_file_path != "") {
send_translated_sentence_to_file(
gf, result, translated_sentence_cloud,
gf->translate_cloud_target_language);
}
});
}

// send the original text to the output
// unless the translation is enabled and set to overwrite the original text
if (!((should_translate_cloud && gf->translate_cloud_output == "none") ||
(should_translate_local && gf->translation_output == "none"))) {
if (gf->buffered_output) {
gf->captions_monitor.addSentenceFromStdString(
str_copy, get_time_point_from_ms(result.start_timestamp_ms),
get_time_point_from_ms(result.end_timestamp_ms),
result.result == DETECTION_RESULT_PARTIAL);
} else {
// non-buffered output - send the sentence to the selected source
send_caption_to_source(gf->text_source_name, str_copy, gf);
}
}

if (gf->caption_to_stream && result.result == DETECTION_RESULT_SPEECH) {
Expand All @@ -279,7 +345,7 @@ void set_text_callback(struct transcription_filter_data *gf,

if (gf->save_to_file && gf->output_file_path != "" &&
result.result == DETECTION_RESULT_SPEECH) {
send_sentence_to_file(gf, result, str_copy, translated_sentence);
send_sentence_to_file(gf, result, str_copy, gf->output_file_path, true);
}

if (!result.text.empty() && (result.result == DETECTION_RESULT_SPEECH ||
Expand Down
18 changes: 15 additions & 3 deletions src/transcription-filter-data.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,18 @@ struct transcription_filter_data {
float duration_filter_threshold = 2.25f;
int segment_duration = 7000;

// Last transcription result
std::string last_text_for_translation;
std::string last_text_translation;
// Cloud translation options
bool translate_cloud = false;
std::string translate_cloud_provider;
std::string translate_cloud_target_language;
std::string translate_cloud_output;
std::string translate_cloud_api_key;
std::string translate_cloud_secret_key;
bool translate_cloud_only_full_sentences = true;
std::string last_text_for_cloud_translation;
std::string last_text_cloud_translation;
bool translate_cloud_deepl_free;
std::string translate_cloud_region;

// Transcription context sentences
int n_context_sentences;
Expand Down Expand Up @@ -119,6 +128,9 @@ struct transcription_filter_data {
std::string translation_model_index;
std::string translation_model_path_external;
bool translate_only_full_sentences;
// Last transcription result
std::string last_text_for_translation;
std::string last_text_translation;

bool buffered_output = false;
TokenBufferThread captions_monitor;
Expand Down
Loading

0 comments on commit 04a6f6a

Please sign in to comment.