Skip to content

Commit

Permalink
GUI: Persist transcription app config
Browse files Browse the repository at this point in the history
The configuration of the transcription app, such as the number of rows
and columns in the text box, now persists across app restarts. I found
that I would have to change from the defaults to my preferred config
every time I started up in VR, which was annoying. Now we just start
with the config that was set last time.

* Add dependency on rapidyaml (MIT)
* Serialize transcription config to file under Resources/
* Add Config class to wrap serializing/deserializing
* Update build instructions
* Simplify StartApp() API, taking Config struct instead of a ton of
  arguments
  • Loading branch information
yum-food committed Jan 6, 2023
1 parent 66d311b commit 461714c
Show file tree
Hide file tree
Showing 11 changed files with 238 additions and 51 deletions.
2 changes: 2 additions & 0 deletions GUI/GUI/GUI/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@ x64
x86
# No .rc generated files
GUI.APS
# No fetched files
ryml.h
105 changes: 105 additions & 0 deletions GUI/GUI/GUI/Config.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#include <wx/wxprec.h>

#ifndef WX_PRECOMP
#include <wx/wx.h>
#endif

#include "Config.h"

#define RYML_SINGLE_HDR_DEFINE_NOW
#include "ryml.h"

#include <fstream>
#include <memory>
#include <string>

TranscriptionAppConfig::TranscriptionAppConfig()
: microphone("index"),
language("english"),
model("base.en"),
chars_per_sync("20"),
bytes_per_char("1"),
rows("4"),
cols("48"),
window_duration("15"),
enable_local_beep(true),
use_cpu(false)
{}

bool TranscriptionAppConfig::Serialize(const std::filesystem::path& path) {
ryml::Tree t;
ryml::NodeRef root = t.rootref();
root |= ryml::MAP;
root["microphone"] << ryml::to_substr(microphone);
root["language"] << ryml::to_substr(language);
root["model"] << ryml::to_substr(model);
root["chars_per_sync"] << ryml::to_substr(chars_per_sync);
root["bytes_per_char"] << ryml::to_substr(bytes_per_char);
root["rows"] << ryml::to_substr(rows);
root["cols"] << ryml::to_substr(cols);
root["window_duration"] << ryml::to_substr(window_duration);
root["enable_local_beep"] << enable_local_beep;
root["use_cpu"] << use_cpu;

// Write the config to a tmp file. If we crash in the middle of this, it
// doesn't matter, since the next process will just overwrite it.
std::filesystem::path tmp_path = path;
tmp_path += ".tmp";
FILE* fp = fopen(tmp_path.string().c_str(), "wb");
if (!fp) {
wxLogError("Failed to open %s: %s", path.string().c_str(), strerror(errno));
return false;
}
ryml::emit_yaml(t, fp); // For now we assume this didn't fail.
fclose(fp);
fp = nullptr;

// If there's an old config, delete it.
struct stat tmpstat;
if (stat(path.string().c_str(), &tmpstat) == 0) {
if (::_unlink(path.string().c_str())) {
wxLogError("Failed to delete old config at %s: %s", path.string().c_str(),
strerror(errno));
return false;
}
}

// File renames within the same filesystem are atomic, so there's no risk
// of leaving a corrupt file on disk.
if (rename(tmp_path.string().c_str(), path.string().c_str()) != 0) {
wxLogError("Failed to save config to %s: %s", path.string().c_str(),
strerror(errno));
return false;
}

return true;
}

bool TranscriptionAppConfig::Deserialize(const std::filesystem::path& path) {
std::ifstream file(path, std::ios::binary | std::ios::ate);
if (!file.is_open()) {
return false;
}
std::streamsize size = file.tellg();
file.seekg(0, std::ios::beg);
std::vector<char> yaml_buf(size);
if (!file.read(yaml_buf.data(), size)) {
return false;
}

ryml::Tree t = ryml::parse_in_place(ryml::to_substr(yaml_buf.data()));
ryml::ConstNodeRef root = t.rootref();
TranscriptionAppConfig c;
root["microphone"] >> c.microphone;
root["language"] >> c.language;
root["model"] >> c.model;
root["chars_per_sync"] >> c.chars_per_sync;
root["bytes_per_char"] >> c.bytes_per_char;
root["rows"] >> c.rows;
root["cols"] >> c.cols;
root["window_duration"] >> c.window_duration;
root["enable_local_beep"] >> c.enable_local_beep;

*this = std::move(c);
return true;
}
26 changes: 26 additions & 0 deletions GUI/GUI/GUI/Config.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#pragma once

#include <filesystem>

class TranscriptionAppConfig {
public:
TranscriptionAppConfig();

bool Serialize(const std::filesystem::path& path);

bool Deserialize(const std::filesystem::path& path);

// The default path at which configs are serialized.
static constexpr char kConfigPath[] = "Resources/transcription_app_config.yml";

std::string microphone;
std::string language;
std::string model;
std::string chars_per_sync;
std::string bytes_per_char;
std::string rows;
std::string cols;
std::string window_duration;
bool enable_local_beep;
bool use_cpu;
};
69 changes: 48 additions & 21 deletions GUI/GUI/GUI/Frame.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include "Logging.h"
#include "PythonWrapper.h"

#include "Config.h"

#include <filesystem>
#include <string>
#include <vector>
Expand Down Expand Up @@ -226,6 +228,20 @@ namespace {
const size_t kNumBytesPerChar = sizeof(kBytesPerChar) / sizeof(kBytesPerChar[0]);
// Sorry international users. Optimize for English speakers, by default.
constexpr int kBytesDefault = 0;

// Given the string value of a dropdown menu's entry, find its index. If no
// entry matches, return `default_index`.
int GetDropdownChoiceIndex(const wxString menu[],
const size_t num_menu_entries, const std::string& entry,
const int default_index) {
for (int i = 0; i < num_menu_entries; i++) {
if (entry == menu[i]) {
return i;
}
}
return default_index;
}

} // namespace

using ::Logging::Log;
Expand All @@ -235,6 +251,9 @@ Frame::Frame()
py_app_(nullptr),
py_app_drain_(this, ID_PY_APP_DRAIN)
{
TranscriptionAppConfig c;
c.Deserialize(TranscriptionAppConfig::kConfigPath);

auto* main_panel = new wxPanel(this, ID_MAIN_PANEL);
main_panel_ = main_panel;
{
Expand Down Expand Up @@ -282,7 +301,8 @@ Frame::Frame()
{
auto* py_app_mic = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_MIC, wxDefaultPosition,
wxDefaultSize, kNumMicChoices, kMicChoices);
py_app_mic->SetSelection(kMicDefault);
int mic_idx = GetDropdownChoiceIndex(kMicChoices, kNumMicChoices, c.microphone, kMicDefault);
py_app_mic->SetSelection(mic_idx);
py_app_mic->SetToolTip(
"Select which microphone to listen to when "
"transcribing. To get list microphones and get their "
Expand All @@ -291,7 +311,8 @@ Frame::Frame()

auto* py_app_lang = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_LANG, wxDefaultPosition,
wxDefaultSize, kNumLangChoices, kLangChoices);
py_app_lang->SetSelection(kLangDefault);
int lang_idx = GetDropdownChoiceIndex(kLangChoices, kNumLangChoices, c.language, kLangDefault);
py_app_lang->SetSelection(lang_idx);
py_app_lang->SetToolTip("Select which language you will "
"speak in. It will be transcribed into that language. "
"If using a language with non-ASCII characters (i.e. "
Expand All @@ -302,7 +323,8 @@ Frame::Frame()

auto* py_app_model = new wxChoice(py_app_config_panel_pairs, ID_PY_APP_MODEL, wxDefaultPosition,
wxDefaultSize, kNumModelChoices, kModelChoices);
py_app_model->SetSelection(kModelDefault);
int model_idx = GetDropdownChoiceIndex(kModelChoices, kNumModelChoices, c.model, kModelDefault);
py_app_model->SetSelection(model_idx);
py_app_model->SetToolTip("Select which version of "
"the transcription model to use. 'base' is a good "
"choice for most users. 'small' is slightly more "
Expand All @@ -314,7 +336,8 @@ Frame::Frame()
auto* py_app_chars_per_sync = new wxChoice(py_app_config_panel_pairs,
ID_PY_APP_CHARS_PER_SYNC, wxDefaultPosition,
wxDefaultSize, kNumCharsPerSync, kCharsPerSync);
py_app_chars_per_sync->SetSelection(kCharsDefault);
int chars_idx = GetDropdownChoiceIndex(kCharsPerSync, kNumCharsPerSync, c.chars_per_sync, kCharsDefault);
py_app_chars_per_sync->SetSelection(chars_idx);
py_app_chars_per_sync->SetToolTip(
"VRChat syncs avatar parameters roughly 5 times per "
"second. We use this to send text to the box. By "
Expand All @@ -325,28 +348,29 @@ Frame::Frame()
auto* py_app_bytes_per_char = new wxChoice(py_app_config_panel_pairs,
ID_PY_APP_BYTES_PER_CHAR, wxDefaultPosition,
wxDefaultSize, kNumBytesPerChar, kBytesPerChar);
py_app_bytes_per_char->SetSelection(kBytesDefault);
int bytes_idx = GetDropdownChoiceIndex(kBytesPerChar, kNumBytesPerChar, c.bytes_per_char, kBytesDefault);
py_app_bytes_per_char->SetSelection(bytes_idx);
py_app_bytes_per_char->SetToolTip(
"If you speak a language that uses non-ASCII "
"characters (i.e. not English), set this to 2.");
py_app_bytes_per_char_ = py_app_bytes_per_char;

auto* py_app_rows = new wxTextCtrl(py_app_config_panel_pairs,
ID_PY_APP_ROWS, /*value=*/"4",
ID_PY_APP_ROWS, c.rows,
wxDefaultPosition, wxDefaultSize, /*style=*/0);
py_app_rows->SetToolTip(
"The number of rows on the text box.");
py_app_rows_ = py_app_rows;

auto* py_app_cols = new wxTextCtrl(py_app_config_panel_pairs,
ID_PY_APP_COLS, /*value=*/"48",
ID_PY_APP_COLS, c.cols,
wxDefaultPosition, wxDefaultSize, /*style=*/0);
py_app_cols->SetToolTip(
"The number of columns on the text box.");
py_app_cols_ = py_app_cols;

auto* py_app_window_duration = new wxTextCtrl(py_app_config_panel_pairs,
ID_PY_APP_WINDOW_DURATION, /*value=*/"15",
ID_PY_APP_WINDOW_DURATION, c.window_duration,
wxDefaultPosition, wxDefaultSize, /*style=*/0);
py_app_window_duration->SetToolTip(
"This controls how long the slice of audio that "
Expand Down Expand Up @@ -387,7 +411,7 @@ Frame::Frame()

auto* py_app_enable_local_beep = new wxCheckBox(py_config_panel,
ID_PY_APP_ENABLE_LOCAL_BEEP, "Enable local beep");
py_app_enable_local_beep->SetValue(true);
py_app_enable_local_beep->SetValue(c.enable_local_beep);
py_app_enable_local_beep->SetToolTip(
"By default, TaSTT will play a sound (audible only to "
"you) when it begins transcription and when it stops. "
Expand All @@ -397,7 +421,7 @@ Frame::Frame()

auto* py_app_use_cpu = new wxCheckBox(py_config_panel,
ID_PY_APP_USE_CPU, "Use CPU");
py_app_use_cpu->SetValue(false);
py_app_use_cpu->SetValue(c.use_cpu);
py_app_use_cpu->SetToolTip(
"If checked, the transcription engine will run on your "
"CPU instead of your GPU. This is typically much slower "
Expand Down Expand Up @@ -928,17 +952,20 @@ void Frame::OnAppStart(wxCommandEvent& event) {
return;
}

wxProcess* p = PythonWrapper::StartApp(std::move(cb),
kMicChoices[which_mic].ToStdString(),
kLangChoices[which_lang].ToStdString(),
kModelChoices[which_model].ToStdString(),
kCharsPerSync[chars_per_sync_idx].ToStdString(),
kBytesPerChar[bytes_per_char_idx].ToStdString(),
rows,
cols,
window_duration,
enable_local_beep,
use_cpu);
TranscriptionAppConfig c;
c.microphone = kMicChoices[which_mic].ToStdString();
c.language = kLangChoices[which_lang].ToStdString();
c.model = kModelChoices[which_model].ToStdString();
c.chars_per_sync = kCharsPerSync[chars_per_sync_idx].ToStdString();
c.bytes_per_char = kBytesPerChar[bytes_per_char_idx].ToStdString();
c.rows = std::to_string(rows);
c.cols = std::to_string(cols);
c.window_duration = std::to_string(window_duration);
c.enable_local_beep = enable_local_beep;
c.use_cpu = use_cpu;
c.Serialize(TranscriptionAppConfig::kConfigPath);

wxProcess* p = PythonWrapper::StartApp(std::move(cb), c);
if (!p) {
Log(transcribe_out_, "Failed to launch transcription engine\n");
return;
Expand Down
3 changes: 3 additions & 0 deletions GUI/GUI/GUI/GUI.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -140,17 +140,20 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="App.cpp" />
<ClCompile Include="Config.cpp" />
<ClCompile Include="Frame.cpp" />
<ClCompile Include="Logging.cpp" />
<ClCompile Include="main.cpp" />
<ClCompile Include="PythonWrapper.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="App.h" />
<ClInclude Include="Config.h" />
<ClInclude Include="Frame.h" />
<ClInclude Include="Logging.h" />
<ClInclude Include="PythonWrapper.h" />
<ClInclude Include="resource.h" />
<ClInclude Include="ryml.h" />
<ClInclude Include="ScopeGuard.h" />
</ItemGroup>
<ItemGroup>
Expand Down
9 changes: 9 additions & 0 deletions GUI/GUI/GUI/GUI.vcxproj.filters
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
<ClCompile Include="Logging.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="Config.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="PythonWrapper.h">
Expand All @@ -50,6 +53,12 @@
<ClInclude Include="Logging.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="ryml.h">
<Filter>Header Files</Filter>
</ClInclude>
<ClInclude Include="Config.h">
<Filter>Header Files</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="GUI.rc">
Expand Down
27 changes: 13 additions & 14 deletions GUI/GUI/GUI/PythonWrapper.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include "Logging.h"
#include "PythonWrapper.h"

#include "Config.h"

#include <stdio.h>

#include <filesystem>
Expand Down Expand Up @@ -142,23 +144,20 @@ bool PythonWrapper::InstallPip(std::string* out) {

wxProcess* PythonWrapper::StartApp(
std::function<void(wxProcess* proc, int ret)>&& exit_callback,
const std::string& mic, const std::string& lang, const std::string& model,
const std::string& chars_per_sync, const std::string& bytes_per_char,
int rows, int cols, int window_duration_s, bool enable_local_beep,
bool use_cpu) {
const TranscriptionAppConfig& config) {
return InvokeAsyncWithArgs({
"-u",
"Resources/Scripts/transcribe.py",
"--mic", mic,
"--lang", lang,
"--model", model,
"--chars_per_sync", chars_per_sync,
"--bytes_per_char", bytes_per_char,
"--enable_local_beep", enable_local_beep ? "1" : "0",
"--rows", std::to_string(rows),
"--cols", std::to_string(cols),
"--window_duration_s", std::to_string(window_duration_s),
"--cpu", use_cpu ? "1" : "0",
"--mic", config.microphone,
"--lang", config.language,
"--model", config.model,
"--chars_per_sync", config.chars_per_sync,
"--bytes_per_char", config.bytes_per_char,
"--enable_local_beep", config.enable_local_beep ? "1" : "0",
"--rows", config.rows,
"--cols", config.cols,
"--window_duration_s", config.window_duration,
"--cpu", config.use_cpu ? "1" : "0",
},
std::move(exit_callback));
}
Expand Down
Loading

0 comments on commit 461714c

Please sign in to comment.