From c05fc5e1917de330f8ba79fac65f634dec5c6c6c Mon Sep 17 00:00:00 2001 From: Nishchal Bhandari Date: Tue, 8 Oct 2024 20:59:50 +0000 Subject: [PATCH] read from CLI --- src/fstalign.cpp | 5 ++--- src/fstalign.h | 9 +-------- src/main.cpp | 9 ++++++++- 3 files changed, 11 insertions(+), 12 deletions(-) diff --git a/src/fstalign.cpp b/src/fstalign.cpp index 125cb52..cc9226d 100644 --- a/src/fstalign.cpp +++ b/src/fstalign.cpp @@ -635,7 +635,7 @@ void write_stitches_to_nlp(vector& stitches, ofstream &output_nlp_fil } void HandleWer(FstLoader& refLoader, FstLoader& hypLoader, SynonymEngine &engine, const string& output_sbs, const string& output_nlp, - AlignerOptions alignerOptions, bool add_inserts_nlp, bool use_case) { + AlignerOptions alignerOptions, bool add_inserts_nlp, bool use_case, std::vector ref_extra_columns, std::vector hyp_extra_columns) { // int speaker_switch_context_size, int numBests, int pr_threshold, string symbols_filename, // string composition_approach, bool record_case_stats) { auto logger = logger::GetOrCreateLogger("fstalign"); @@ -701,8 +701,7 @@ void HandleWer(FstLoader& refLoader, FstLoader& hypLoader, SynonymEngine &engine JsonLogUnigramBigramStats(topAlignment); if (!output_sbs.empty()) { logger->info("output_sbs = {}", output_sbs); - std::vector extra_hyp_columns = {"confidence"}; - WriteSbs(topAlignment, stitches, output_sbs, std::vector(),extra_hyp_columns); + WriteSbs(topAlignment, stitches, output_sbs, ref_extra_columns, hyp_extra_columns); } if (!output_nlp.empty() && !nlp_ref_loader) { diff --git a/src/fstalign.h b/src/fstalign.h index 5cbe130..f02f807 100644 --- a/src/fstalign.h +++ b/src/fstalign.h @@ -48,16 +48,9 @@ struct AlignerOptions { int levenstein_maximum_error_streak = 100; }; -// original -// void HandleWer(FstLoader *refLoader, FstLoader *hypLoader, SynonymEngine *engine, string output_sbs, string -// output_nlp, -// int speaker_switch_context_size, int numBests, int pr_threshold, string symbols_filename, -// string composition_approach, bool record_case_stats); -// void HandleAlign(NlpFstLoader *refLoader, CtmFstLoader *hypLoader, SynonymEngine *engine, ofstream &output_nlp_file, -// int numBests, string symbols_filename, string composition_approach); void HandleWer(FstLoader& refLoader, FstLoader& hypLoader, SynonymEngine &engine, const string& output_sbs, const string& output_nlp, - AlignerOptions alignerOptions, bool add_inserts_nlp = false, bool use_case = false); + AlignerOptions alignerOptions, bool add_inserts_nlp, bool use_case, std::vector ref_extra_columns, std::vector hyp_extra_columns); void HandleAlign(NlpFstLoader &refLoader, CtmFstLoader &hypLoader, SynonymEngine &engine, ofstream &output_nlp_file, AlignerOptions alignerOptions); diff --git a/src/main.cpp b/src/main.cpp index d87ddbc..bf03942 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -40,6 +40,9 @@ int main(int argc, char **argv) { bool disable_cutoffs = false; bool disable_hyphen_ignore = false; + std::vector ref_extra_columns = std::vector(); + std::vector hyp_extra_columns = std::vector(); + CLI::App app("Rev FST Align"); app.set_help_all_flag("--help-all", "Expand all help"); app.add_flag("--version", version, "Show fstalign version."); @@ -97,6 +100,10 @@ int main(int argc, char **argv) { c->add_option("--composition-approach", composition_approach, "Desired composition logic. Choices are 'standard' or 'adapted'"); + c->add_option("--ref-extra-cols", ref_extra_columns, + "Extra columns from the reference to include in SBS output."); + c->add_option("--hyp-extra-cols", hyp_extra_columns, + "Extra columns from the hypothesis to include in SBS output."); } get_wer->add_option("--wer-sidecar", wer_sidecar_filename, "WER sidecar json file."); @@ -180,7 +187,7 @@ int main(int argc, char **argv) { } if (command == "wer") { - HandleWer(*ref, *hyp, engine, output_sbs, output_nlp, alignerOptions, add_inserts_nlp, use_case); + HandleWer(*ref, *hyp, engine, output_sbs, output_nlp, alignerOptions, add_inserts_nlp, use_case, ref_extra_columns, hyp_extra_columns); } else if (command == "align") { if (output_nlp.empty()) { console->error("the output nlp file must be specified");