From 90618dc13c664583c29a06be02f98e469054e98b Mon Sep 17 00:00:00 2001 From: homexp13 Date: Thu, 30 Nov 2023 15:42:50 +0300 Subject: [PATCH 1/4] badwordtest_1 --- code/__DEFINES/text.dm | 2 +- code/__HELPERS/chat_filter.dm | 12 ++++---- code/__HELPERS/text.dm | 28 +++++++++---------- .../configuration/configuration.dm | 4 +-- 4 files changed, 23 insertions(+), 23 deletions(-) diff --git a/code/__DEFINES/text.dm b/code/__DEFINES/text.dm index ffac885bf4c88..5a6ba21312edb 100644 --- a/code/__DEFINES/text.dm +++ b/code/__DEFINES/text.dm @@ -53,7 +53,7 @@ #define SANITIZE_FILENAME(text) (GLOB.filename_forbidden_chars.Replace(text, "")) /// Simply removes the < and > characters, and limits the length of the message. -#define STRIP_HTML_SIMPLE(text, limit) (GLOB.angular_brackets.Replace(copytext(text, 1, limit), "")) +#define STRIP_HTML_SIMPLE(text, limit) (GLOB.angular_brackets.Replace_char(copytext_char(text, 1, limit), "")) //MASSMETA EDIT /// Removes everything enclose in < and > inclusive of the bracket, and limits the length of the message. #define STRIP_HTML_FULL(text, limit) (GLOB.html_tags.Replace(copytext(text, 1, limit), "")) diff --git a/code/__HELPERS/chat_filter.dm b/code/__HELPERS/chat_filter.dm index b395da3d7aa3b..7ba05afdf8aff 100644 --- a/code/__HELPERS/chat_filter.dm +++ b/code/__HELPERS/chat_filter.dm @@ -5,7 +5,7 @@ /// Given a text, will return what word is on the IC filter, with the reason. /// Returns null if the message is OK. /proc/is_ic_filtered(message) - if (config.ic_filter_regex?.Find(message)) + if (config.ic_filter_regex?.Find_char(message)) //MASSMETA EDIT var/matched_group = GET_MATCHED_GROUP(config.ic_filter_regex) return list( matched_group, @@ -17,7 +17,7 @@ /// Given a text, will return what word is on the soft IC filter, with the reason. /// Returns null if the message is OK. /proc/is_soft_ic_filtered(message) - if (config.soft_ic_filter_regex?.Find(message)) + if (config.soft_ic_filter_regex?.Find_char(message)) //MASSMETA EDIT var/matched_group = GET_MATCHED_GROUP(config.soft_ic_filter_regex) return list( matched_group, @@ -29,7 +29,7 @@ /// Given a text, will return what word is on the OOC filter, with the reason. /// Returns null if the message is OK. /proc/is_ooc_filtered(message) - if (config.ooc_filter_regex?.Find(message)) + if (config.ooc_filter_regex?.Find_char(message)) //MASSMETA EDIT var/matched_group = GET_MATCHED_GROUP(config.ooc_filter_regex) return list(matched_group, config.shared_filter_reasons[matched_group]) @@ -38,7 +38,7 @@ /// Given a text, will return that word is on the soft OOC filter, with the reason. /// Returns null if the message is OK. /proc/is_soft_ooc_filtered(message) - if (config.soft_ooc_filter_regex?.Find(message)) + if (config.soft_ooc_filter_regex?.Find_char(message)) //MASSMETA EDIT var/matched_group = GET_MATCHED_GROUP(config.soft_ooc_filter_regex) return list(matched_group, config.soft_shared_filter_reasons[matched_group]) @@ -64,7 +64,7 @@ /// Given a text, will return what word is on the IC filter, ignoring words allowed on the PDA, with the reason. /// Returns null if the message is OK. /proc/is_ic_filtered_for_pdas(message) - if (config.ic_outside_pda_filter_regex?.Find(message)) + if (config.ic_outside_pda_filter_regex?.Find_char(message)) //MASSMETA EDIT var/matched_group = GET_MATCHED_GROUP(config.ic_outside_pda_filter_regex) return list( matched_group, @@ -76,7 +76,7 @@ /// Given a text, will return what word is on the soft IC filter, ignoring words allowed on the PDA, with the reason. /// Returns null if the message is OK. /proc/is_soft_ic_filtered_for_pdas(message) - if (config.soft_ic_outside_pda_filter_regex?.Find(message)) + if (config.soft_ic_outside_pda_filter_regex?.Find_char(message)) //MASSMETA EDIT var/matched_group = GET_MATCHED_GROUP(config.soft_ic_outside_pda_filter_regex) return list( matched_group, diff --git a/code/__HELPERS/text.dm b/code/__HELPERS/text.dm index 7639f1b6a07fe..0e09b1ee8987f 100644 --- a/code/__HELPERS/text.dm +++ b/code/__HELPERS/text.dm @@ -36,7 +36,7 @@ /// Runs byond's html encoding sanitization proc, after replacing new-lines and tabs for the # character. /proc/sanitize(text) var/static/regex/regex = regex(@"[\n\t]", "g") - return html_encode(regex.Replace(text, "#")) + return html_encode(regex.Replace_char(text, "#")) //MASSMETA EDIT /// Runs STRIP_HTML_SIMPLE and sanitize. @@ -66,14 +66,14 @@ */ /proc/htmlrendertext(t) // Trim "whitespace" by lazily capturing word characters in the middle - var/static/regex/matchMiddle = new(@"^\s*([\W\w]*?)\s*$") - if(matchMiddle.Find(t) == 0) + var/static/regex/matchMiddle = new(@"^\s*([\W\wа-яё]*?)\s*$", "i") //MASSMETA EDIT + if(matchMiddle.Find_char(t) == 0) //MASSMETA EDIT return t t = matchMiddle.group[1] // Replace any non-space whitespace characters with spaces, and also multiple occurences with just one space var/static/regex/matchSpacing = new(@"\s+", "g") - t = replacetext(t, matchSpacing, " ") + t = replacetext_char(t, matchSpacing, " ") //MASSMETA EDIT return t @@ -121,7 +121,7 @@ if(isnull(user_input)) // User pressed cancel return if(no_trim) - return copytext(html_encode(user_input), 1, max_length) + return copytext_char(html_encode(user_input), 1, max_length) //MASSMETA EDIT else return trim(html_encode(user_input), max_length) //trim is "outside" because html_encode can expand single symbols into multiple symbols (such as turning < into <) @@ -297,16 +297,16 @@ //Returns a string with reserved characters and spaces before the first letter removed /proc/trim_left(text) - for (var/i = 1 to length(text)) - if (text2ascii(text, i) > 32) - return copytext(text, i) + for (var/i = 1 to length_char(text)) //MASSMETA EDIT + if (text2ascii_char(text, i) > 32) //MASSMETA EDIT + return copytext_char(text, i) //MASSMETA EDIT return "" //Returns a string with reserved characters and spaces after the last letter removed /proc/trim_right(text) - for (var/i = length(text), i > 0, i--) - if (text2ascii(text, i) > 32) - return copytext(text, 1, i + 1) + for (var/i = length_char(text), i > 0, i--) //MASSMETA EDIT + if (text2ascii_char(text, i) > 32) //MASSMETA EDIT + return copytext_char(text, 1, i + 1) //MASSMETA EDIT return "" //Returns a string with reserved characters and spaces after the first and last letters removed @@ -339,8 +339,8 @@ * * max_length - integer length to truncate at */ /proc/truncate(text, max_length) - if(length(text) > max_length) - return copytext(text, 1, max_length) + if(length_char(text) > max_length) //MASSMETA EDIT + return copytext_char(text, 1, max_length) //MASSMETA EDIT return text //Returns a string with reserved characters and spaces before the first word and after the last word removed. @@ -354,7 +354,7 @@ . = t if(t) . = t[1] - return uppertext(.) + copytext(t, 1 + length(.)) + return uppertext(.) + copytext_char(t, 1 + length_char(.)) //MASSMETA EDIT ///Returns a string with the first letter of each word capitialized /proc/full_capitalize(input) diff --git a/code/controllers/configuration/configuration.dm b/code/controllers/configuration/configuration.dm index 830f15ae559d2..bce689cfb47ff 100644 --- a/code/controllers/configuration/configuration.dm +++ b/code/controllers/configuration/configuration.dm @@ -478,7 +478,7 @@ Example config: if (isnull(banned_words) || banned_words.len == 0) return null - var/static/regex/should_join_on_word_bounds = regex(@"^\w+$") + var/static/regex/should_join_on_word_bounds = regex(@"^[\wа-яё]+$", "i") //MASSMETA EDIT // Stuff like emoticons needs another split, since there's no way to get ":)" on a word bound. // Furthermore, normal words need to be on word bounds, so "(adminhelp)" gets filtered. @@ -493,7 +493,7 @@ Example config: // We don't want a whitespace_split part if there's no stuff that requires it var/whitespace_split = to_join_on_whitespace_splits.len > 0 ? @"(?:(?:^|\s+)(" + jointext(to_join_on_whitespace_splits, "|") + @")(?:$|\s+))" : "" - var/word_bounds = @"(\b(" + jointext(to_join_on_word_bounds, "|") + @")\b)" + var/word_bounds = @"((" + jointext(to_join_on_word_bounds, "|") + @"))" //MASSMETA EDIT var/regex_filter = whitespace_split != "" ? "([whitespace_split]|[word_bounds])" : word_bounds return regex(regex_filter, "i") From 27eb6b1e8887977138981b4785be554a1315667a Mon Sep 17 00:00:00 2001 From: homexp13 Date: Thu, 30 Nov 2023 16:20:28 +0300 Subject: [PATCH 2/4] unittest --- code/modules/unit_tests/chat_filter.dm | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/code/modules/unit_tests/chat_filter.dm b/code/modules/unit_tests/chat_filter.dm index 1ac649cc9f03c..af7e8ba62bfbd 100644 --- a/code/modules/unit_tests/chat_filter.dm +++ b/code/modules/unit_tests/chat_filter.dm @@ -26,6 +26,9 @@ null, ) + //MASSMETA EDIT REMOVAL BEGIN - CODE_WORDS - these words HAVE filtered words in them + /* + test_filter( "these words have filtered words in them: ablockedinic blockedinicbbbb aablockedinicbb", null, @@ -34,6 +37,9 @@ null, ) + */ + //MASSMETA EDIT REMOVAL END + test_filter( "<(0_0<) <(0_0)> (>0_0)> KIRBY DANCE!!!", "<(0_0<)", From 6d786487a9534895fbc94b1320b180ced82ccb46 Mon Sep 17 00:00:00 2001 From: homexp13 Date: Thu, 30 Nov 2023 17:44:27 +0300 Subject: [PATCH 3/4] lowertext --- code/__HELPERS/chat_filter.dm | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/code/__HELPERS/chat_filter.dm b/code/__HELPERS/chat_filter.dm index 7ba05afdf8aff..2e78f0dd62000 100644 --- a/code/__HELPERS/chat_filter.dm +++ b/code/__HELPERS/chat_filter.dm @@ -5,7 +5,7 @@ /// Given a text, will return what word is on the IC filter, with the reason. /// Returns null if the message is OK. /proc/is_ic_filtered(message) - if (config.ic_filter_regex?.Find_char(message)) //MASSMETA EDIT + if (config.ic_filter_regex?.Find_char(lowertext(message))) //MASSMETA EDIT var/matched_group = GET_MATCHED_GROUP(config.ic_filter_regex) return list( matched_group, @@ -17,7 +17,7 @@ /// Given a text, will return what word is on the soft IC filter, with the reason. /// Returns null if the message is OK. /proc/is_soft_ic_filtered(message) - if (config.soft_ic_filter_regex?.Find_char(message)) //MASSMETA EDIT + if (config.soft_ic_filter_regex?.Find_char(lowertext(message))) //MASSMETA EDIT var/matched_group = GET_MATCHED_GROUP(config.soft_ic_filter_regex) return list( matched_group, @@ -29,7 +29,7 @@ /// Given a text, will return what word is on the OOC filter, with the reason. /// Returns null if the message is OK. /proc/is_ooc_filtered(message) - if (config.ooc_filter_regex?.Find_char(message)) //MASSMETA EDIT + if (config.ooc_filter_regex?.Find_char(lowertext(message))) //MASSMETA EDIT var/matched_group = GET_MATCHED_GROUP(config.ooc_filter_regex) return list(matched_group, config.shared_filter_reasons[matched_group]) @@ -38,7 +38,7 @@ /// Given a text, will return that word is on the soft OOC filter, with the reason. /// Returns null if the message is OK. /proc/is_soft_ooc_filtered(message) - if (config.soft_ooc_filter_regex?.Find_char(message)) //MASSMETA EDIT + if (config.soft_ooc_filter_regex?.Find_char(lowertext(message))) //MASSMETA EDIT var/matched_group = GET_MATCHED_GROUP(config.soft_ooc_filter_regex) return list(matched_group, config.soft_shared_filter_reasons[matched_group]) @@ -64,7 +64,7 @@ /// Given a text, will return what word is on the IC filter, ignoring words allowed on the PDA, with the reason. /// Returns null if the message is OK. /proc/is_ic_filtered_for_pdas(message) - if (config.ic_outside_pda_filter_regex?.Find_char(message)) //MASSMETA EDIT + if (config.ic_outside_pda_filter_regex?.Find_charlowertext((message))) //MASSMETA EDIT var/matched_group = GET_MATCHED_GROUP(config.ic_outside_pda_filter_regex) return list( matched_group, @@ -76,7 +76,7 @@ /// Given a text, will return what word is on the soft IC filter, ignoring words allowed on the PDA, with the reason. /// Returns null if the message is OK. /proc/is_soft_ic_filtered_for_pdas(message) - if (config.soft_ic_outside_pda_filter_regex?.Find_char(message)) //MASSMETA EDIT + if (config.soft_ic_outside_pda_filter_regex?.Find_char(lowertext(message))) //MASSMETA EDIT var/matched_group = GET_MATCHED_GROUP(config.soft_ic_outside_pda_filter_regex) return list( matched_group, From c3d59466fbb912204cbb103cbee2dc9b21a14dea Mon Sep 17 00:00:00 2001 From: homexp13 Date: Thu, 30 Nov 2023 17:50:07 +0300 Subject: [PATCH 4/4] durak durakom --- code/__HELPERS/chat_filter.dm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/code/__HELPERS/chat_filter.dm b/code/__HELPERS/chat_filter.dm index 2e78f0dd62000..fa02b37cae90b 100644 --- a/code/__HELPERS/chat_filter.dm +++ b/code/__HELPERS/chat_filter.dm @@ -64,7 +64,7 @@ /// Given a text, will return what word is on the IC filter, ignoring words allowed on the PDA, with the reason. /// Returns null if the message is OK. /proc/is_ic_filtered_for_pdas(message) - if (config.ic_outside_pda_filter_regex?.Find_charlowertext((message))) //MASSMETA EDIT + if (config.ic_outside_pda_filter_regex?.Find_char(lowertext(message))) //MASSMETA EDIT var/matched_group = GET_MATCHED_GROUP(config.ic_outside_pda_filter_regex) return list( matched_group,