From 8aaebcdfb3de363127705b6fcba1f086940aa51b Mon Sep 17 00:00:00 2001 From: Yuuki Toriyama Date: Thu, 12 Sep 2024 23:10:27 +0900 Subject: [PATCH] =?UTF-8?q?update:=20#410:=20=E3=83=95=E3=82=A3=E3=83=BC?= =?UTF-8?q?=E3=83=81=E3=83=A3=E3=83=95=E3=83=A9=E3=82=B0`eliminate-whitesp?= =?UTF-8?q?aces`=E3=81=8C=E6=8C=87=E5=AE=9A=E3=81=95=E3=82=8C=E3=81=9F?= =?UTF-8?q?=E5=A0=B4=E5=90=88=E3=80=81=E5=85=A5=E5=8A=9B=E3=81=95=E3=82=8C?= =?UTF-8?q?=E3=81=9F=E6=96=87=E5=AD=97=E5=88=97=E3=81=8B=E3=82=89=E3=83=9B?= =?UTF-8?q?=E3=83=AF=E3=82=A4=E3=83=88=E3=82=B9=E3=83=9A=E3=83=BC=E3=82=B9?= =?UTF-8?q?=E3=82=92=E5=8F=96=E3=82=8A=E9=99=A4=E3=81=8F=E3=82=88=E3=81=86?= =?UTF-8?q?=E3=81=AB=E3=81=97=E3=81=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- core/Cargo.toml | 1 + core/src/tokenizer/read_prefecture.rs | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/core/Cargo.toml b/core/Cargo.toml index 2ad307de..0e72ca7d 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -19,6 +19,7 @@ default = ["city-name-correction"] blocking = ["reqwest/blocking"] city-name-correction = [] format-house-number = [] +eliminate-whitespaces = [] [dependencies] itertools = "0.13.0" diff --git a/core/src/tokenizer/read_prefecture.rs b/core/src/tokenizer/read_prefecture.rs index d811f320..e88f639b 100644 --- a/core/src/tokenizer/read_prefecture.rs +++ b/core/src/tokenizer/read_prefecture.rs @@ -60,7 +60,11 @@ impl Tokenizer { prefecture_name: None, city_name: None, town_name: None, - rest: input.strip_variation_selectors(), + rest: if cfg!(feature = "eliminate-whitespaces") { + input.strip_variation_selectors().strip_whitespaces() + } else { + input.strip_variation_selectors() + }, _state: PhantomData, } } @@ -117,6 +121,17 @@ mod tests { assert_eq!(tokenizer.rest, "東京都葛飾区立石5-13-1") } + #[test] + #[cfg(feature = "eliminate-whitespaces")] + fn new_ホワイトスペース除却() { + let tokenizer = Tokenizer::new("東京都 目黒区 下目黒 4‐1‐1"); + assert_eq!(tokenizer.input, "東京都 目黒区 下目黒 4‐1‐1"); + assert_eq!(tokenizer.prefecture_name, None); + assert_eq!(tokenizer.city_name, None); + assert_eq!(tokenizer.town_name, None); + assert_eq!(tokenizer.rest, "東京都目黒区下目黒4‐1‐1") + } + #[test] fn read_prefecture_成功() { let tokenizer = Tokenizer::new("東京都港区芝公園4丁目2-8");