diff --git a/core/src/parser/adapter/vague_expression_adapter.rs b/core/src/parser/adapter/vague_expression_adapter.rs index 13007c98..1ff93c10 100644 --- a/core/src/parser/adapter/vague_expression_adapter.rs +++ b/core/src/parser/adapter/vague_expression_adapter.rs @@ -59,7 +59,70 @@ mod tests { assert_eq!(city_name, "南条郡南越前町"); } + #[test] + fn 郡名が省略されている場合_西村山郡河北町() { + let yamagata = Prefecture::yamagata(); + let (rest, city_name) = VagueExpressionAdapter {} + .apply("河北町大字吉田字馬場261", &yamagata.cities) + .unwrap(); + assert_eq!(rest, "大字吉田字馬場261"); + assert_eq!(city_name, "西村山郡河北町"); + } + + #[test] + fn 郡名と町名が一致している場合_最上郡最上町() { + let yamagata = Prefecture::yamagata(); + let (rest, city_name) = VagueExpressionAdapter {} + .apply("最上町法田2672-2", &yamagata.cities) + .unwrap(); + assert_eq!(rest, "法田2672-2"); + assert_eq!(city_name, "最上郡最上町"); + } + impl Prefecture { + fn yamagata() -> Self { + Prefecture { + name: "山形県".to_string(), + cities: vec![ + "山形市".to_string(), + "米沢市".to_string(), + "鶴岡市".to_string(), + "酒田市".to_string(), + "新庄市".to_string(), + "寒河江市".to_string(), + "上山市".to_string(), + "村山市".to_string(), + "長井市".to_string(), + "天童市".to_string(), + "東根市".to_string(), + "尾花沢市".to_string(), + "南陽市".to_string(), + "東村山郡山辺町".to_string(), + "東村山郡中山町".to_string(), + "西村山郡河北町".to_string(), + "西村山郡西川町".to_string(), + "西村山郡朝日町".to_string(), + "西村山郡大江町".to_string(), + "北村山郡大石田町".to_string(), + "最上郡金山町".to_string(), + "最上郡最上町".to_string(), + "最上郡舟形町".to_string(), + "最上郡真室川町".to_string(), + "最上郡大蔵村".to_string(), + "最上郡鮭川村".to_string(), + "最上郡戸沢村".to_string(), + "東置賜郡高畠町".to_string(), + "東置賜郡川西町".to_string(), + "西置賜郡小国町".to_string(), + "西置賜郡白鷹町".to_string(), + "西置賜郡飯豊町".to_string(), + "東田川郡三川町".to_string(), + "東田川郡庄内町".to_string(), + "飽海郡遊佐町".to_string(), + ], + } + } + fn hukui() -> Self { Prefecture { name: "福井県".to_string(), diff --git a/core/src/util.rs b/core/src/util.rs index 7d8f3774..09d7245b 100644 --- a/core/src/util.rs +++ b/core/src/util.rs @@ -1,2 +1,3 @@ pub mod converter; pub mod sequence_matcher; +mod trimmer; diff --git a/core/src/util/sequence_matcher.rs b/core/src/util/sequence_matcher.rs index 80055cdc..a40d8b04 100644 --- a/core/src/util/sequence_matcher.rs +++ b/core/src/util/sequence_matcher.rs @@ -1,3 +1,4 @@ +use crate::util::trimmer::trim_city_name; use rapidfuzz::distance::lcs_seq; pub struct SequenceMatcher; @@ -14,6 +15,23 @@ impl SequenceMatcher { possibilities: &[String], threshold: Option, ) -> Result { + let highest_matches: Vec = + Self::get_most_similar_matches(input, possibilities, threshold) + .into_iter() + .filter(|candidate| input.starts_with(&trim_city_name(candidate))) + .collect(); + match &highest_matches.len() { + 0 => Err(Error::NoCandidateExist), + 1 => Ok(highest_matches.first().unwrap().clone()), + _ => Err(Error::MoreThanOneCandidateExist(highest_matches)), + } + } + + fn get_most_similar_matches( + input: &str, + possibilities: &[String], + threshold: Option, + ) -> Vec { let mut highest_similarity: f64 = 0.0; let mut highest_matches: Vec = vec![]; let length_of_longest_possibility = Self::get_length_of_longest_one(possibilities).unwrap(); @@ -30,11 +48,7 @@ impl SequenceMatcher { highest_similarity = similarity; } } - match &highest_matches.len() { - 0 => Err(Error::NoCandidateExist), - 1 => Ok(highest_matches.first().unwrap().clone()), - _ => Err(Error::MoreThanOneCandidateExist(highest_matches)), - } + highest_matches } fn get_length_of_longest_one(text_list: &[String]) -> Option { diff --git a/core/src/util/trimmer.rs b/core/src/util/trimmer.rs new file mode 100644 index 00000000..ef931f1f --- /dev/null +++ b/core/src/util/trimmer.rs @@ -0,0 +1,18 @@ +pub fn trim_city_name(input: &str) -> String { + match input.chars().position(|c| c == '郡' || c == '市') { + Some(position) => input.chars().skip(position + 1).collect::(), + None => input.to_string(), + } +} + +#[cfg(test)] +mod tests { + use crate::util::trimmer::trim_city_name; + + #[test] + fn trim_city_name_成功() { + assert_eq!(trim_city_name("南会津郡下郷町"), "下郷町"); + assert_eq!(trim_city_name("南会津郡只見町"), "只見町"); + assert_eq!(trim_city_name("白河市新白河一丁目"), "新白河一丁目"); + } +} diff --git a/tests/integration_tests.rs b/tests/integration_tests.rs index e80d35a9..cf17cf6c 100644 --- a/tests/integration_tests.rs +++ b/tests/integration_tests.rs @@ -40,7 +40,6 @@ async fn 郡が省略されている場合への対応テスト() { } #[tokio::test] -#[ignore] async fn 郡名と町名が一致している場合() { run_data_driven_tests("./test_data/郡名と町名が一致している場合.csv").await }