diff --git a/core/Cargo.toml b/core/Cargo.toml index 2ad307de..0e72ca7d 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -19,6 +19,7 @@ default = ["city-name-correction"] blocking = ["reqwest/blocking"] city-name-correction = [] format-house-number = [] +eliminate-whitespaces = [] [dependencies] itertools = "0.13.0" diff --git a/core/src/lib.rs b/core/src/lib.rs index aa31c5a9..b942087c 100644 --- a/core/src/lib.rs +++ b/core/src/lib.rs @@ -4,6 +4,7 @@ //! - `blocking`: Provide method that works synchronously //! - `city-name-correction`*(enabled by default)*: Enable autocorrection if ambiguous city name was typed //! - `format-house-number`: Enable normalization of addresses after town name +//! - `eliminate-whitespaces`*(experimental)*: Enable elimination of whitespaces from given text #[cfg(all(target_family = "wasm", feature = "blocking"))] compile_error! { diff --git a/core/src/tokenizer/read_prefecture.rs b/core/src/tokenizer/read_prefecture.rs index d811f320..e88f639b 100644 --- a/core/src/tokenizer/read_prefecture.rs +++ b/core/src/tokenizer/read_prefecture.rs @@ -60,7 +60,11 @@ impl Tokenizer { prefecture_name: None, city_name: None, town_name: None, - rest: input.strip_variation_selectors(), + rest: if cfg!(feature = "eliminate-whitespaces") { + input.strip_variation_selectors().strip_whitespaces() + } else { + input.strip_variation_selectors() + }, _state: PhantomData, } } @@ -117,6 +121,17 @@ mod tests { assert_eq!(tokenizer.rest, "東京都葛飾区立石5-13-1") } + #[test] + #[cfg(feature = "eliminate-whitespaces")] + fn new_ホワイトスペース除却() { + let tokenizer = Tokenizer::new("東京都 目黒区 下目黒 4‐1‐1"); + assert_eq!(tokenizer.input, "東京都 目黒区 下目黒 4‐1‐1"); + assert_eq!(tokenizer.prefecture_name, None); + assert_eq!(tokenizer.city_name, None); + assert_eq!(tokenizer.town_name, None); + assert_eq!(tokenizer.rest, "東京都目黒区下目黒4‐1‐1") + } + #[test] fn read_prefecture_成功() { let tokenizer = Tokenizer::new("東京都港区芝公園4丁目2-8"); diff --git a/core/src/util/extension.rs b/core/src/util/extension.rs index f7a81cf3..a8cd5940 100644 --- a/core/src/util/extension.rs +++ b/core/src/util/extension.rs @@ -10,10 +10,15 @@ impl CharExt for char { } pub(crate) trait StrExt { + fn strip_whitespaces(&self) -> String; fn strip_variation_selectors(&self) -> String; } impl StrExt for str { + /// 文字列からホワイトスペースを取り除きます + fn strip_whitespaces(&self) -> String { + self.chars().filter(|c| !c.is_whitespace()).collect() + } /// 文字列から異字体セレクタを取り除きます fn strip_variation_selectors(&self) -> String { self.chars() @@ -59,4 +64,16 @@ mod tests { assert_ne!(normal, variant); assert_eq!(normal, variant.strip_variation_selectors()); } + + #[test] + fn strip_whitespaces() { + assert_eq!("四谷1丁目".strip_whitespaces(), "四谷1丁目"); + assert_eq!("四谷 1丁目".strip_whitespaces(), "四谷1丁目"); + assert_eq!("四谷 1丁目".strip_whitespaces(), "四谷1丁目"); + assert_eq!("四谷 1 丁 目".strip_whitespaces(), "四谷1丁目"); + assert_eq!("神田3丁目".strip_whitespaces(), "神田3丁目"); + assert_eq!("神田 3丁目".strip_whitespaces(), "神田3丁目"); + assert_eq!("神田  3丁目".strip_whitespaces(), "神田3丁目"); + assert_eq!("神田 3 丁目".strip_whitespaces(), "神田3丁目"); + } }