Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

不要な空白の除却: フィーチャフラグeliminate-whitespacesが指定された場合、入力された文字列からホワイトスペースを除去する #411

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ default = ["city-name-correction"]
blocking = ["reqwest/blocking"]
city-name-correction = []
format-house-number = []
eliminate-whitespaces = []

[dependencies]
itertools = "0.13.0"
Expand Down
1 change: 1 addition & 0 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
//! - `blocking`: Provide method that works synchronously
//! - `city-name-correction`*(enabled by default)*: Enable autocorrection if ambiguous city name was typed
//! - `format-house-number`: Enable normalization of addresses after town name
//! - `eliminate-whitespaces`*(experimental)*: Enable elimination of whitespaces from given text

#[cfg(all(target_family = "wasm", feature = "blocking"))]
compile_error! {
Expand Down
17 changes: 16 additions & 1 deletion core/src/tokenizer/read_prefecture.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,11 @@ impl Tokenizer<Init> {
prefecture_name: None,
city_name: None,
town_name: None,
rest: input.strip_variation_selectors(),
rest: if cfg!(feature = "eliminate-whitespaces") {
input.strip_variation_selectors().strip_whitespaces()
} else {
input.strip_variation_selectors()
},
_state: PhantomData,
}
}
Expand Down Expand Up @@ -117,6 +121,17 @@ mod tests {
assert_eq!(tokenizer.rest, "東京都葛飾区立石5-13-1")
}

#[test]
#[cfg(feature = "eliminate-whitespaces")]
fn new_ホワイトスペース除却() {
let tokenizer = Tokenizer::new("東京都 目黒区 下目黒 4‐1‐1");
assert_eq!(tokenizer.input, "東京都 目黒区 下目黒 4‐1‐1");
assert_eq!(tokenizer.prefecture_name, None);
assert_eq!(tokenizer.city_name, None);
assert_eq!(tokenizer.town_name, None);
assert_eq!(tokenizer.rest, "東京都目黒区下目黒4‐1‐1")
}

#[test]
fn read_prefecture_成功() {
let tokenizer = Tokenizer::new("東京都港区芝公園4丁目2-8");
Expand Down
17 changes: 17 additions & 0 deletions core/src/util/extension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@ impl CharExt for char {
}

pub(crate) trait StrExt {
fn strip_whitespaces(&self) -> String;
fn strip_variation_selectors(&self) -> String;
}

impl StrExt for str {
/// 文字列からホワイトスペースを取り除きます
fn strip_whitespaces(&self) -> String {
self.chars().filter(|c| !c.is_whitespace()).collect()
}
/// 文字列から異字体セレクタを取り除きます
fn strip_variation_selectors(&self) -> String {
self.chars()
Expand Down Expand Up @@ -59,4 +64,16 @@ mod tests {
assert_ne!(normal, variant);
assert_eq!(normal, variant.strip_variation_selectors());
}

#[test]
fn strip_whitespaces() {
assert_eq!("四谷1丁目".strip_whitespaces(), "四谷1丁目");
assert_eq!("四谷 1丁目".strip_whitespaces(), "四谷1丁目");
assert_eq!("四谷 1丁目".strip_whitespaces(), "四谷1丁目");
assert_eq!("四谷 1 丁 目".strip_whitespaces(), "四谷1丁目");
assert_eq!("神田3丁目".strip_whitespaces(), "神田3丁目");
assert_eq!("神田 3丁目".strip_whitespaces(), "神田3丁目");
assert_eq!("神田  3丁目".strip_whitespaces(), "神田3丁目");
assert_eq!("神田 3 丁目".strip_whitespaces(), "神田3丁目");
}
}
Loading