Skip to content

Commit

Permalink
Merge pull request #413 from YuukiToriyama/feature/strip-whitespaces/…
Browse files Browse the repository at this point in the history
…master

不要な空白の除却をrelease/v0.1.14にマージ
  • Loading branch information
YuukiToriyama authored Sep 12, 2024
2 parents 6edd8d7 + 267e22d commit 29ff980
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 2 deletions.
1 change: 1 addition & 0 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ default = ["city-name-correction"]
blocking = ["reqwest/blocking"]
city-name-correction = []
format-house-number = []
eliminate-whitespaces = []

[dependencies]
itertools = "0.13.0"
Expand Down
1 change: 1 addition & 0 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
//! - `blocking`: Provide method that works synchronously
//! - `city-name-correction`*(enabled by default)*: Enable autocorrection if ambiguous city name was typed
//! - `format-house-number`: Enable normalization of addresses after town name
//! - `eliminate-whitespaces`*(experimental)*: Enable elimination of whitespaces from given text

#[cfg(all(target_family = "wasm", feature = "blocking"))]
compile_error! {
Expand Down
17 changes: 16 additions & 1 deletion core/src/tokenizer/read_prefecture.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,11 @@ impl Tokenizer<Init> {
prefecture_name: None,
city_name: None,
town_name: None,
rest: input.strip_variation_selectors(),
rest: if cfg!(feature = "eliminate-whitespaces") {
input.strip_variation_selectors().strip_whitespaces()
} else {
input.strip_variation_selectors()
},
_state: PhantomData,
}
}
Expand Down Expand Up @@ -117,6 +121,17 @@ mod tests {
assert_eq!(tokenizer.rest, "東京都葛飾区立石5-13-1")
}

#[test]
#[cfg(feature = "eliminate-whitespaces")]
fn new_ホワイトスペース除却() {
let tokenizer = Tokenizer::new("東京都 目黒区 下目黒 4‐1‐1");
assert_eq!(tokenizer.input, "東京都 目黒区 下目黒 4‐1‐1");
assert_eq!(tokenizer.prefecture_name, None);
assert_eq!(tokenizer.city_name, None);
assert_eq!(tokenizer.town_name, None);
assert_eq!(tokenizer.rest, "東京都目黒区下目黒4‐1‐1")
}

#[test]
fn read_prefecture_成功() {
let tokenizer = Tokenizer::new("東京都港区芝公園4丁目2-8");
Expand Down
17 changes: 17 additions & 0 deletions core/src/util/extension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,15 @@ impl CharExt for char {
}

pub(crate) trait StrExt {
fn strip_whitespaces(&self) -> String;
fn strip_variation_selectors(&self) -> String;
}

impl StrExt for str {
/// 文字列からホワイトスペースを取り除きます
fn strip_whitespaces(&self) -> String {
self.chars().filter(|c| !c.is_whitespace()).collect()
}
/// 文字列から異字体セレクタを取り除きます
fn strip_variation_selectors(&self) -> String {
self.chars()
Expand Down Expand Up @@ -59,4 +64,16 @@ mod tests {
assert_ne!(normal, variant);
assert_eq!(normal, variant.strip_variation_selectors());
}

#[test]
fn strip_whitespaces() {
assert_eq!("四谷1丁目".strip_whitespaces(), "四谷1丁目");
assert_eq!("四谷 1丁目".strip_whitespaces(), "四谷1丁目");
assert_eq!("四谷 1丁目".strip_whitespaces(), "四谷1丁目");
assert_eq!("四谷 1 丁 目".strip_whitespaces(), "四谷1丁目");
assert_eq!("神田3丁目".strip_whitespaces(), "神田3丁目");
assert_eq!("神田 3丁目".strip_whitespaces(), "神田3丁目");
assert_eq!("神田  3丁目".strip_whitespaces(), "神田3丁目");
assert_eq!("神田 3 丁目".strip_whitespaces(), "神田3丁目");
}
}
2 changes: 1 addition & 1 deletion wasm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ crate-type = ["cdylib"]

[features]
debug = []
nightly = ["japanese-address-parser/format-house-number"]
nightly = ["japanese-address-parser/format-house-number", "japanese-address-parser/eliminate-whitespaces"]

[dependencies]
console_error_panic_hook = "0.1.7"
Expand Down

0 comments on commit 29ff980

Please sign in to comment.