Skip to content

Commit

Permalink
Fix invalid regular expressions in MML parsing
Browse files Browse the repository at this point in the history
A workaround is added to restore the single line mode in expressions which require it.
Also split by regex utility method was
implemented with a major mistake, which became
obvious from testing.
  • Loading branch information
YuriSizov committed May 26, 2024
1 parent 187f4ee commit 9dc1e95
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 20 deletions.
7 changes: 4 additions & 3 deletions src/sequencer/base/mml_parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,10 @@ void MMLParser::_create_mml_regex(bool p_reset) {
user_defs_str = String("|").join(user_defs);
}

String reg_string;
reg_string = "(\\s+)"; // whitespace [1]
// Godot's RegEx implementation doesn't support passing global flags, but PCRE2 allows local flags, which we can abuse.
// (?s) enables single line mode (dot matches newline) for the entire expression.
String reg_string = "(?s)";
reg_string += "(\\s+)"; // whitespace [1]
reg_string += "|(#[^;]*)"; // system [2]
reg_string += "|("; // --all-- [3]
reg_string += "([a-g])([\\-+#]?)"; // note [4][5]
Expand All @@ -76,7 +78,6 @@ void MMLParser::_create_mml_regex(bool p_reset) {
reg_string += ")\\s*(-?[0-9]*)"; // parameter [9]
reg_string += "\\s*(\\.*)"; // periods [10]

// FIXME: Godot's RegEx implementation doesn't support passing global flags. This pattern originally used "gms". Behavioral implications require investigation.
_mml_regex = RegEx::create_from_string(reg_string);
}

Expand Down
25 changes: 15 additions & 10 deletions src/sequencer/simml_sequencer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,9 @@ String SiMMLSequencer::_on_before_compile(String p_mml) {

// Remove comments.

// FIXME: Godot's RegEx implementation doesn't support passing global flags. This pattern originally used "gms". Behavioral implications require investigation.
Ref<RegEx> re_comments = RegEx::create_from_string("/\\*.*?\\*/|//.*?[\\r\\n]+");
// Godot's RegEx implementation doesn't support passing global flags, but PCRE2 allows local flags, which we can abuse.
// (?s) enables single line mode (dot matches newline) for the entire expression.
Ref<RegEx> re_comments = RegEx::create_from_string("(?s)/\\*.*?\\*/|//.*?[\\r\\n]+");
mml = re_comments->sub(mml, "", true);

// Format last.
Expand All @@ -200,8 +201,9 @@ String SiMMLSequencer::_on_before_compile(String p_mml) {

String expanded_mml;

// FIXME: Godot's RegEx implementation doesn't support passing global flags. This pattern originally used "gms". Behavioral implications require investigation.
Ref<RegEx> re_sequence = RegEx::create_from_string("[ \\t\\r\\n]*(#([A-Z@\\-]+)(\\+=|=)?)?([^;{]*({.*?})?[^;]*);");
// Godot's RegEx implementation doesn't support passing global flags, but PCRE2 allows local flags, which we can abuse.
// (?s) enables single line mode (dot matches newline) for the entire expression.
Ref<RegEx> re_sequence = RegEx::create_from_string("(?s)[ \\t\\r\\n]*(#([A-Z@\\-]+)(\\+=|=)?)?([^;{]*({.*?})?[^;]*);");
Ref<RegEx> re_macro_id = RegEx::create_from_string("([A-Z])?(-([A-Z])?)?");

TypedArray<RegExMatch> matches = re_sequence->search_all(mml);
Expand Down Expand Up @@ -264,8 +266,9 @@ String SiMMLSequencer::_on_before_compile(String p_mml) {

// Expand repeat.

// FIXME: Godot's RegEx implementation doesn't support passing global flags. This pattern originally used "gms". Behavioral implications require investigation.
Ref<RegEx> re_repeat = RegEx::create_from_string("!\\[(\\d*)(.*?)(!\\|(.*?))?!\\](\\d*)");
// Godot's RegEx implementation doesn't support passing global flags, but PCRE2 allows local flags, which we can abuse.
// (?s) enables single line mode (dot matches newline) for the entire expression.
Ref<RegEx> re_repeat = RegEx::create_from_string("(?s)!\\[(\\d*)(.*?)(!\\|(.*?))?!\\](\\d*)");
matches = re_repeat->search_all(expanded_mml);
// Iterate backwards so we can do in-place replacements without disturbing indices.
for (int i = matches.size() - 1; i >= 0; i--) {
Expand Down Expand Up @@ -334,8 +337,9 @@ void SiMMLSequencer::_on_beat(int p_delay_samples, int p_beat_counter) {
void SiMMLSequencer::_on_table_parse(MMLEvent *p_prev, String p_table) {
ERR_FAIL_COND_MSG(p_prev->id < _envelope_event_id || p_prev->id > _envelope_event_id + 10, "SiMMLSequencer : Internal table is available only for envelope commands.");

// FIXME: Godot's RegEx implementation doesn't support passing global flags. This pattern originally used "ms". Behavioral implications require investigation.
Ref<RegEx> re_table = RegEx::create_from_string("\\{([^}]*)\\}(.*)");
// Godot's RegEx implementation doesn't support passing global flags, but PCRE2 allows local flags, which we can abuse.
// (?s) enables single line mode (dot matches newline) for the entire expression.
Ref<RegEx> re_table = RegEx::create_from_string("(?s)\\{([^}]*)\\}(.*)");

Ref<RegExMatch> res = re_table->search(p_table);
ERR_FAIL_COND_MSG(!res.is_valid(), "SiMMLSequencer: Invalid table format.");
Expand Down Expand Up @@ -680,8 +684,9 @@ void SiMMLSequencer::_try_process_command_callback(String p_command, int p_numbe
}

bool SiMMLSequencer::_parse_system_command_before(String p_command, String p_param) {
// FIXME: Godot's RegEx implementation doesn't support passing global flags. This pattern originally used "ms". Behavioral implications require investigation.
Ref<RegEx> re_param = RegEx::create_from_string("\\s*(\\d*)\\s*(\\{(.*?)\\})?(.*)");
// Godot's RegEx implementation doesn't support passing global flags, but PCRE2 allows local flags, which we can abuse.
// (?s) enables single line mode (dot matches newline) for the entire expression.
Ref<RegEx> re_param = RegEx::create_from_string("(?s)\\s*(\\d*)\\s*(\\{(.*?)\\})?(.*)");
Ref<RegExMatch> res = re_param->search(p_param);

int number = res->get_string(1).to_int();
Expand Down
5 changes: 3 additions & 2 deletions src/sion_voice.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,9 @@ String SiONVoice::get_mml(int p_index, String p_chip_type, bool p_append_postfix
int SiONVoice::set_by_mml(String p_mml) {
reset();

// FIXME: Godot's RegEx implementation doesn't support passing global flags. This pattern originally used "ms". Behavioral implications require investigation.
Ref<RegEx> re_command = RegEx::create_from_string("(#[A-Z]*@)\\s*(\\d+)\\s*{(.*?)}(.*?);");
// Godot's RegEx implementation doesn't support passing global flags, but PCRE2 allows local flags, which we can abuse.
// (?s) enables single line mode (dot matches newline) for the entire expression.
Ref<RegEx> re_command = RegEx::create_from_string("(?s)(#[A-Z]*@)\\s*(\\d+)\\s*{(.*?)}(.*?);");
Ref<RegExMatch> res = re_command->search(p_mml);
if (res.is_null()) {
return -1;
Expand Down
12 changes: 10 additions & 2 deletions src/utils/godot_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,18 @@ PackedStringArray split_string_by_regex(const String &p_string, const String &p_

Ref<RegEx> re_split = RegEx::create_from_string(p_regex);
TypedArray<RegExMatch> matches = re_split->search_all(p_string);

int last_index = 0;
for (int i = 0; i < matches.size(); i++) {
Ref<RegExMatch> match = matches[i];
arr.append(match->get_string());
Ref<RegExMatch> separator = matches[i];
String match = p_string.substr(last_index, separator->get_start() - last_index);

arr.append(match);
last_index = separator->get_end();
}

String last_match = p_string.substr(last_index);
arr.append(last_match);

return arr;
}
7 changes: 4 additions & 3 deletions src/utils/translator_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,12 @@ Vector<int> TranslatorUtil::_split_data_string(SiOPMChannelParams *r_params, Str
return Vector<int>();
}

// FIXME: Godot's RegEx implementation doesn't support passing global flags. This pattern originally used "gms". Behavioral implications require investigation.
Ref<RegEx> re_command = RegEx::create_from_string("/\\*.*?\\*/|//.*?[\\r\\n]+");
// Godot's RegEx implementation doesn't support passing global flags, but PCRE2 allows local flags, which we can abuse.
// (?s) enables single line mode (dot matches newline) for the entire expression.
Ref<RegEx> re_comments = RegEx::create_from_string("(?s)/\\*.*?\\*/|//.*?[\\r\\n]+");
Ref<RegEx> re_cleanup = RegEx::create_from_string("^[^\\d\\-.]+|[^\\d\\-.]+$");

String sanitized_string = re_command->sub(p_data_string, "", true);
String sanitized_string = re_comments->sub(p_data_string, "", true);
sanitized_string = re_cleanup->sub(sanitized_string, "", true);
PackedStringArray string_data = split_string_by_regex(sanitized_string, "[^\\d\\-.]+");

Expand Down

0 comments on commit 9dc1e95

Please sign in to comment.