From 2dc29233bf0d6c7a3532f58c98683c9923cd7eae Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Tue, 3 Sep 2024 22:59:04 +0900 Subject: [PATCH 1/2] =?UTF-8?q?add:=20#400:=20=E6=96=87=E5=AD=97=E3=81=8C?= =?UTF-8?q?=E7=95=B0=E5=AD=97=E4=BD=93=E3=82=BB=E3=83=AC=E3=82=AF=E3=82=BF?= =?UTF-8?q?=E3=81=8B=E3=81=A9=E3=81=86=E3=81=8B=E3=82=92=E5=88=A4=E5=AE=9A?= =?UTF-8?q?=E3=81=99=E3=82=8B=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89=E3=82=92?= =?UTF-8?q?=E5=AE=9A=E7=BE=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://en.wikipedia.org/wiki/Variant_form_(Unicode) --- core/src/util.rs | 1 + core/src/util/extension.rs | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 core/src/util/extension.rs diff --git a/core/src/util.rs b/core/src/util.rs index 09d7245b..62ca0efa 100644 --- a/core/src/util.rs +++ b/core/src/util.rs @@ -1,3 +1,4 @@ pub mod converter; +pub(crate) mod extension; pub mod sequence_matcher; mod trimmer; diff --git a/core/src/util/extension.rs b/core/src/util/extension.rs new file mode 100644 index 00000000..9a04fada --- /dev/null +++ b/core/src/util/extension.rs @@ -0,0 +1,33 @@ +pub(crate) trait CharExt { + fn is_variation_selector(&self) -> bool; +} + +impl CharExt for char { + /// 異字体セレクタかどうかを判別します + fn is_variation_selector(&self) -> bool { + matches!(self, '\u{FE00}'..='\u{FE0F}' | '\u{E0100}'..='\u{E01EF}') + } +} + +#[cfg(test)] +mod tests { + use crate::util::extension::CharExt; + + #[test] + fn is_variation_selector() { + assert_eq!('あ'.is_variation_selector(), false); + assert_eq!('亜'.is_variation_selector(), false); + + assert_eq!('\u{FDFF}'.is_variation_selector(), false); + assert_eq!('\u{FE00}'.is_variation_selector(), true); + + assert_eq!('\u{FE0F}'.is_variation_selector(), true); + assert_eq!('\u{FE10}'.is_variation_selector(), false); + + assert_eq!('\u{E00FF}'.is_variation_selector(), false); + assert_eq!('\u{E0100}'.is_variation_selector(), true); + + assert_eq!('\u{E01EF}'.is_variation_selector(), true); + assert_eq!('\u{E01F0}'.is_variation_selector(), false); + } +} From e04e8a24025db83de17966ede51d8a330de672d1 Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Tue, 3 Sep 2024 23:19:40 +0900 Subject: [PATCH 2/2] =?UTF-8?q?add:=20#400:=20=E6=96=87=E5=AD=97=E5=88=97?= =?UTF-8?q?=E3=81=8B=E3=82=89=E7=95=B0=E5=AD=97=E4=BD=93=E3=82=BB=E3=83=AC?= =?UTF-8?q?=E3=82=AF=E3=82=BF=E3=82=92=E5=8F=96=E3=82=8A=E9=99=A4=E3=81=8F?= =?UTF-8?q?=E3=83=A1=E3=82=BD=E3=83=83=E3=83=89=E3=82=92=E5=AE=9A=E7=BE=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/src/util/extension.rs | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/core/src/util/extension.rs b/core/src/util/extension.rs index 9a04fada..f7a81cf3 100644 --- a/core/src/util/extension.rs +++ b/core/src/util/extension.rs @@ -9,9 +9,22 @@ impl CharExt for char { } } +pub(crate) trait StrExt { + fn strip_variation_selectors(&self) -> String; +} + +impl StrExt for str { + /// 文字列から異字体セレクタを取り除きます + fn strip_variation_selectors(&self) -> String { + self.chars() + .filter(|c| !c.is_variation_selector()) + .collect() + } +} + #[cfg(test)] mod tests { - use crate::util::extension::CharExt; + use crate::util::extension::{CharExt, StrExt}; #[test] fn is_variation_selector() { @@ -30,4 +43,20 @@ mod tests { assert_eq!('\u{E01EF}'.is_variation_selector(), true); assert_eq!('\u{E01F0}'.is_variation_selector(), false); } + + #[test] + fn strip_variation_selectors_逢坂() { + let normal = "\u{9022}\u{5742}"; // 逢坂 + let variant = "\u{9022}\u{E0101}\u{5742}"; // 逢󠄁坂 + assert_ne!(normal, variant); + assert_eq!(normal, variant.strip_variation_selectors()); + } + + #[test] + fn strip_variation_selectors_茨城() { + let normal = "\u{8328}\u{57CE}"; + let variant = "\u{8328}\u{E0100}\u{57CE}"; + assert_ne!(normal, variant); + assert_eq!(normal, variant.strip_variation_selectors()); + } }