Skip to content

Commit

Permalink
郡名と町名が一致している場合: 類似度測定による候補を更に絞り込む (#292)
Browse files Browse the repository at this point in the history
* update: #268: `VagueExpressionAdapter`に単体テストを追加
* update: #268: `get_most_similar_match()`を分解
* add: #268: `trim_city_name()`を定義
* add: #268: `trim_city_name()`を使用し候補を絞るようにした
* update: #268: 結合テスト`郡名と町名が一致している場合()`を有効化
  • Loading branch information
YuukiToriyama authored Jun 14, 2024
1 parent 65a5bd5 commit 9204588
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 6 deletions.
63 changes: 63 additions & 0 deletions core/src/parser/adapter/vague_expression_adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,70 @@ mod tests {
assert_eq!(city_name, "南条郡南越前町");
}

#[test]
fn 郡名が省略されている場合_西村山郡河北町() {
let yamagata = Prefecture::yamagata();
let (rest, city_name) = VagueExpressionAdapter {}
.apply("河北町大字吉田字馬場261", &yamagata.cities)
.unwrap();
assert_eq!(rest, "大字吉田字馬場261");
assert_eq!(city_name, "西村山郡河北町");
}

#[test]
fn 郡名と町名が一致している場合_最上郡最上町() {
let yamagata = Prefecture::yamagata();
let (rest, city_name) = VagueExpressionAdapter {}
.apply("最上町法田2672-2", &yamagata.cities)
.unwrap();
assert_eq!(rest, "法田2672-2");
assert_eq!(city_name, "最上郡最上町");
}

impl Prefecture {
fn yamagata() -> Self {
Prefecture {
name: "山形県".to_string(),
cities: vec![
"山形市".to_string(),
"米沢市".to_string(),
"鶴岡市".to_string(),
"酒田市".to_string(),
"新庄市".to_string(),
"寒河江市".to_string(),
"上山市".to_string(),
"村山市".to_string(),
"長井市".to_string(),
"天童市".to_string(),
"東根市".to_string(),
"尾花沢市".to_string(),
"南陽市".to_string(),
"東村山郡山辺町".to_string(),
"東村山郡中山町".to_string(),
"西村山郡河北町".to_string(),
"西村山郡西川町".to_string(),
"西村山郡朝日町".to_string(),
"西村山郡大江町".to_string(),
"北村山郡大石田町".to_string(),
"最上郡金山町".to_string(),
"最上郡最上町".to_string(),
"最上郡舟形町".to_string(),
"最上郡真室川町".to_string(),
"最上郡大蔵村".to_string(),
"最上郡鮭川村".to_string(),
"最上郡戸沢村".to_string(),
"東置賜郡高畠町".to_string(),
"東置賜郡川西町".to_string(),
"西置賜郡小国町".to_string(),
"西置賜郡白鷹町".to_string(),
"西置賜郡飯豊町".to_string(),
"東田川郡三川町".to_string(),
"東田川郡庄内町".to_string(),
"飽海郡遊佐町".to_string(),
],
}
}

fn hukui() -> Self {
Prefecture {
name: "福井県".to_string(),
Expand Down
1 change: 1 addition & 0 deletions core/src/util.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod converter;
pub mod sequence_matcher;
mod trimmer;
24 changes: 19 additions & 5 deletions core/src/util/sequence_matcher.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::util::trimmer::trim_city_name;
use rapidfuzz::distance::lcs_seq;

pub struct SequenceMatcher;
Expand All @@ -14,6 +15,23 @@ impl SequenceMatcher {
possibilities: &[String],
threshold: Option<f64>,
) -> Result<String, Error> {
let highest_matches: Vec<String> =
Self::get_most_similar_matches(input, possibilities, threshold)
.into_iter()
.filter(|candidate| input.starts_with(&trim_city_name(candidate)))
.collect();
match &highest_matches.len() {
0 => Err(Error::NoCandidateExist),
1 => Ok(highest_matches.first().unwrap().clone()),
_ => Err(Error::MoreThanOneCandidateExist(highest_matches)),
}
}

fn get_most_similar_matches(
input: &str,
possibilities: &[String],
threshold: Option<f64>,
) -> Vec<String> {
let mut highest_similarity: f64 = 0.0;
let mut highest_matches: Vec<String> = vec![];
let length_of_longest_possibility = Self::get_length_of_longest_one(possibilities).unwrap();
Expand All @@ -30,11 +48,7 @@ impl SequenceMatcher {
highest_similarity = similarity;
}
}
match &highest_matches.len() {
0 => Err(Error::NoCandidateExist),
1 => Ok(highest_matches.first().unwrap().clone()),
_ => Err(Error::MoreThanOneCandidateExist(highest_matches)),
}
highest_matches
}

fn get_length_of_longest_one(text_list: &[String]) -> Option<usize> {
Expand Down
18 changes: 18 additions & 0 deletions core/src/util/trimmer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
pub fn trim_city_name(input: &str) -> String {
match input.chars().position(|c| c == '郡' || c == '市') {
Some(position) => input.chars().skip(position + 1).collect::<String>(),
None => input.to_string(),
}
}

#[cfg(test)]
mod tests {
use crate::util::trimmer::trim_city_name;

#[test]
fn trim_city_name_成功() {
assert_eq!(trim_city_name("南会津郡下郷町"), "下郷町");
assert_eq!(trim_city_name("南会津郡只見町"), "只見町");
assert_eq!(trim_city_name("白河市新白河一丁目"), "新白河一丁目");
}
}
1 change: 0 additions & 1 deletion tests/integration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ async fn 郡が省略されている場合への対応テスト() {
}

#[tokio::test]
#[ignore]
async fn 郡名と町名が一致している場合() {
run_data_driven_tests("./test_data/郡名と町名が一致している場合.csv").await
}

0 comments on commit 9204588

Please sign in to comment.