Skip to content

Commit

Permalink
Merge pull request #375 from YuukiToriyama/feature/optout-county-name…
Browse files Browse the repository at this point in the history
…-completion/master

郡名補完機能のオプトアウトをrelease/v0.1.10にマージ
  • Loading branch information
YuukiToriyama authored Aug 18, 2024
2 parents be5b942 + b56d1ca commit 04335f0
Show file tree
Hide file tree
Showing 8 changed files with 252 additions and 181 deletions.
3 changes: 2 additions & 1 deletion core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,9 @@ rust-version = "1.73.0"
crate-type = ["rlib", "cdylib"]

[features]
default = []
default = ["city-name-correction"]
blocking = ["reqwest/blocking"]
city-name-correction = []

[dependencies]
itertools = "0.13.0"
Expand Down
6 changes: 6 additions & 0 deletions core/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
//! A Rust library to parse japanese addresses.
//!
//! ## Feature flags
//! - `blocking`: Provide method that works synchronously
//! - `city-name-correction`*(enabled by default)*: Enable autocorrection if ambiguous city name was typed

#[cfg(all(target_family = "wasm", feature = "blocking"))]
compile_error! {
"The `blocking` feature is not supported with wasm target."
Expand Down
38 changes: 28 additions & 10 deletions core/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,21 @@ pub async fn parse(api: Arc<AsyncApi>, input: &str) -> ParseResult {
Ok(result) => result,
};
// 市町村名を特定
let Ok(tokenizer) = tokenizer.read_city(prefecture.cities) else {
return ParseResult {
address: Address::from(tokenizer),
error: Some(Error::new_parse_error(ParseErrorKind::City)),
};
let tokenizer = match tokenizer.read_city(&prefecture.cities) {
Ok(found) => found,
Err(not_found) => {
// 市区町村が特定できない場合かつフィーチャフラグが有効な場合、郡名が抜けている可能性を検討
match not_found.read_city_with_county_name_completion(&prefecture.cities) {
Ok(found) if cfg!(feature = "city-name-correction") => found,
_ => {
// それでも見つからない場合は終了
return ParseResult {
address: Address::from(tokenizer),
error: Some(Error::new_parse_error(ParseErrorKind::City)),
};
}
}
}
};
// その市町村の町名リストを取得
let city = match api
Expand Down Expand Up @@ -254,11 +264,19 @@ pub fn parse_blocking(api: Arc<BlockingApi>, input: &str) -> ParseResult {
}
Ok(result) => result,
};
let Ok(tokenizer) = tokenizer.read_city(prefecture.cities) else {
return ParseResult {
address: Address::from(tokenizer),
error: Some(Error::new_parse_error(ParseErrorKind::City)),
};
let tokenizer = match tokenizer.read_city(&prefecture.cities) {
Ok(found) => found,
Err(not_found) => {
match not_found.read_city_with_county_name_completion(&prefecture.cities) {
Ok(found) if cfg!(feature = "city-name-correction") => found,
_ => {
return ParseResult {
address: Address::from(tokenizer),
error: Some(Error::new_parse_error(ParseErrorKind::City)),
};
}
}
}
};
let city = match api.get_city_master(
tokenizer.prefecture_name.as_ref().unwrap(),
Expand Down
1 change: 0 additions & 1 deletion core/src/parser/adapter.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
pub mod orthographical_variant_adapter;
pub mod vague_expression_adapter;
122 changes: 0 additions & 122 deletions core/src/parser/adapter/vague_expression_adapter.rs

This file was deleted.

3 changes: 3 additions & 0 deletions core/src/tokenizer.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
pub(crate) mod read_city;
pub(crate) mod read_city_with_county_name_completion;
pub(crate) mod read_prefecture;
pub(crate) mod read_town;

Expand All @@ -11,6 +12,8 @@ pub(crate) struct PrefectureNameFound;
#[derive(Debug)]
pub(crate) struct CityNameFound;
#[derive(Debug)]
pub(crate) struct CityNameNotFound;
#[derive(Debug)]
pub(crate) struct TownNameFound;
#[derive(Debug)]
pub(crate) struct End;
Expand Down
55 changes: 8 additions & 47 deletions core/src/tokenizer/read_city.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,14 @@ use std::marker::PhantomData;
use crate::parser::adapter::orthographical_variant_adapter::{
OrthographicalVariantAdapter, OrthographicalVariants, Variant,
};
use crate::parser::adapter::vague_expression_adapter::VagueExpressionAdapter;
use crate::tokenizer::{CityNameFound, End, PrefectureNameFound, Tokenizer};
use crate::tokenizer::{CityNameFound, CityNameNotFound, PrefectureNameFound, Tokenizer};

impl Tokenizer<PrefectureNameFound> {
pub(crate) fn read_city(
&self,
candidates: Vec<String>,
) -> Result<Tokenizer<CityNameFound>, Tokenizer<End>> {
for candidate in &candidates {
candidates: &Vec<String>,
) -> Result<Tokenizer<CityNameFound>, Tokenizer<CityNameNotFound>> {
for candidate in candidates {
if self.rest.starts_with(candidate) {
return Ok(Tokenizer {
input: self.input.clone(),
Expand Down Expand Up @@ -69,26 +68,13 @@ impl Tokenizer<PrefectureNameFound> {
}
}

// ここまでで市町村名の特定ができない場合はVagueExpressionAdapterを使用して市町村名を推測する
let vague_expression_adapter = VagueExpressionAdapter {};
if let Some(result) = vague_expression_adapter.apply(self.rest.as_str(), &candidates) {
return Ok(Tokenizer {
input: self.input.clone(),
prefecture_name: self.prefecture_name.clone(),
city_name: Some(result.0),
town_name: None,
rest: result.1,
_state: PhantomData::<CityNameFound>,
});
}

Err(Tokenizer {
input: self.input.clone(),
prefecture_name: self.prefecture_name.clone(),
city_name: None,
town_name: None,
rest: self.rest.clone(),
_state: PhantomData::<End>,
_state: PhantomData::<CityNameNotFound>,
})
}
}
Expand All @@ -108,7 +94,7 @@ mod tests {
rest: "横浜市保土ケ谷区川辺町2番地9".to_string(),
_state: PhantomData::<PrefectureNameFound>,
};
let result = tokenizer.read_city(vec![
let result = tokenizer.read_city(&vec![
"横浜市保土ケ谷区".to_string(),
"横浜市鶴見区".to_string(),
"横浜市西区".to_string(),
Expand All @@ -132,7 +118,7 @@ mod tests {
rest: "横浜市保土ヶ谷区川辺町2番地9".to_string(),
_state: PhantomData::<PrefectureNameFound>,
};
let result = tokenizer.read_city(vec![
let result = tokenizer.read_city(&vec![
"横浜市保土ケ谷区".to_string(),
"横浜市鶴見区".to_string(),
"横浜市西区".to_string(),
Expand All @@ -146,31 +132,6 @@ mod tests {
assert_eq!(tokenizer.rest, "川辺町2番地9");
}

#[test]
fn read_city_vague_expression_adapterで成功() {
let tokenizer = Tokenizer {
input: "埼玉県東秩父村大字御堂634番地".to_string(), // 「秩父郡」が省略されている
prefecture_name: Some("埼玉県".to_string()),
city_name: None,
town_name: None,
rest: "東秩父村大字御堂634番地".to_string(),
_state: PhantomData::<PrefectureNameFound>,
};
let result = tokenizer.read_city(vec![
"秩父郡皆野町".to_string(),
"秩父郡長瀞町".to_string(),
"秩父郡小鹿野町".to_string(),
"秩父郡東秩父村".to_string(),
]);
assert!(result.is_ok());
let tokenizer = result.unwrap();
assert_eq!(tokenizer.input, "埼玉県東秩父村大字御堂634番地");
assert_eq!(tokenizer.prefecture_name, Some("埼玉県".to_string()));
assert_eq!(tokenizer.city_name, Some("秩父郡東秩父村".to_string()));
assert_eq!(tokenizer.town_name, None);
assert_eq!(tokenizer.rest, "大字御堂634番地");
}

#[test]
fn read_city_失敗() {
let tokenizer = Tokenizer {
Expand All @@ -181,7 +142,7 @@ mod tests {
rest: "京都市上京区川辺町2番地9".to_string(),
_state: PhantomData::<PrefectureNameFound>,
};
let result = tokenizer.read_city(vec![
let result = tokenizer.read_city(&vec![
"横浜市保土ケ谷区".to_string(),
"横浜市鶴見区".to_string(),
"横浜市西区".to_string(),
Expand Down
Loading

0 comments on commit 04335f0

Please sign in to comment.