Skip to content

Commit

Permalink
Merge pull request #294 from YuukiToriyama/release/v0.1.3-rc.1
Browse files Browse the repository at this point in the history
release/v0.1.3-rc.1をmainブランチにマージ
  • Loading branch information
YuukiToriyama authored Jun 14, 2024
2 parents 1a53e19 + 095ae87 commit b6c51b8
Show file tree
Hide file tree
Showing 8 changed files with 226 additions and 95 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ members = [
resolver = "2"

[workspace.package]
version = "0.1.2"
version = "0.1.3-rc.1"
edition = "2021"
description = "A Rust Library to parse japanese addresses."
repository = "https://github.com/YuukiToriyama/japanese-address-parser"
Expand Down
118 changes: 95 additions & 23 deletions core/src/parser/adapter/vague_expression_adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,54 +26,126 @@ impl VagueExpressionAdapter {

#[cfg(test)]
mod tests {
use crate::entity::Prefecture;
use crate::parser::adapter::vague_expression_adapter::VagueExpressionAdapter;

#[test]
fn 郡名が省略されている場合_吉田郡永平寺町() {
let hukui = Prefecture::hukui();
let (rest, city_name) = VagueExpressionAdapter {}
.apply("永平寺町志比5-5", &provide_city_name_list())
.apply("永平寺町志比5-5", &hukui.cities)
.unwrap();
assert_eq!(rest, "志比5-5");
assert_eq!(city_name, "吉田郡永平寺町");
}

#[test]
fn 郡名が省略されている場合_今立郡池田町() {
let hukui = Prefecture::hukui();
let (rest, city_name) = VagueExpressionAdapter {}
.apply("池田町稲荷28-7", &provide_city_name_list())
.apply("池田町稲荷28-7", &hukui.cities)
.unwrap();
assert_eq!(rest, "稲荷28-7");
assert_eq!(city_name, "今立郡池田町");
}

#[test]
fn 郡名が省略されている場合_南条郡南越前町() {
let hukui = Prefecture::hukui();
let (rest, city_name) = VagueExpressionAdapter {}
.apply("南越前町今庄74-7-1", &provide_city_name_list())
.apply("南越前町今庄74-7-1", &hukui.cities)
.unwrap();
assert_eq!(rest, "今庄74-7-1");
assert_eq!(city_name, "南条郡南越前町");
}

fn provide_city_name_list() -> Vec<String> {
vec![
"福井市".to_string(),
"敦賀市".to_string(),
"小浜市".to_string(),
"大野市".to_string(),
"勝山市".to_string(),
"鯖江市".to_string(),
"あわら市".to_string(),
"越前市".to_string(),
"坂井市".to_string(),
"吉田郡永平寺町".to_string(),
"今立郡池田町".to_string(),
"南条郡南越前町".to_string(),
"丹生郡越前町".to_string(),
"三方郡美浜町".to_string(),
"大飯郡高浜町".to_string(),
"大飯郡おおい町".to_string(),
"三方上中郡若狭町".to_string(),
]
#[test]
fn 郡名が省略されている場合_西村山郡河北町() {
let yamagata = Prefecture::yamagata();
let (rest, city_name) = VagueExpressionAdapter {}
.apply("河北町大字吉田字馬場261", &yamagata.cities)
.unwrap();
assert_eq!(rest, "大字吉田字馬場261");
assert_eq!(city_name, "西村山郡河北町");
}

#[test]
fn 郡名と町名が一致している場合_最上郡最上町() {
let yamagata = Prefecture::yamagata();
let (rest, city_name) = VagueExpressionAdapter {}
.apply("最上町法田2672-2", &yamagata.cities)
.unwrap();
assert_eq!(rest, "法田2672-2");
assert_eq!(city_name, "最上郡最上町");
}

impl Prefecture {
fn yamagata() -> Self {
Prefecture {
name: "山形県".to_string(),
cities: vec![
"山形市".to_string(),
"米沢市".to_string(),
"鶴岡市".to_string(),
"酒田市".to_string(),
"新庄市".to_string(),
"寒河江市".to_string(),
"上山市".to_string(),
"村山市".to_string(),
"長井市".to_string(),
"天童市".to_string(),
"東根市".to_string(),
"尾花沢市".to_string(),
"南陽市".to_string(),
"東村山郡山辺町".to_string(),
"東村山郡中山町".to_string(),
"西村山郡河北町".to_string(),
"西村山郡西川町".to_string(),
"西村山郡朝日町".to_string(),
"西村山郡大江町".to_string(),
"北村山郡大石田町".to_string(),
"最上郡金山町".to_string(),
"最上郡最上町".to_string(),
"最上郡舟形町".to_string(),
"最上郡真室川町".to_string(),
"最上郡大蔵村".to_string(),
"最上郡鮭川村".to_string(),
"最上郡戸沢村".to_string(),
"東置賜郡高畠町".to_string(),
"東置賜郡川西町".to_string(),
"西置賜郡小国町".to_string(),
"西置賜郡白鷹町".to_string(),
"西置賜郡飯豊町".to_string(),
"東田川郡三川町".to_string(),
"東田川郡庄内町".to_string(),
"飽海郡遊佐町".to_string(),
],
}
}

fn hukui() -> Self {
Prefecture {
name: "福井県".to_string(),
cities: vec![
"福井市".to_string(),
"敦賀市".to_string(),
"小浜市".to_string(),
"大野市".to_string(),
"勝山市".to_string(),
"鯖江市".to_string(),
"あわら市".to_string(),
"越前市".to_string(),
"坂井市".to_string(),
"吉田郡永平寺町".to_string(),
"今立郡池田町".to_string(),
"南条郡南越前町".to_string(),
"丹生郡越前町".to_string(),
"三方郡美浜町".to_string(),
"大飯郡高浜町".to_string(),
"大飯郡おおい町".to_string(),
"三方上中郡若狭町".to_string(),
],
}
}
}
}
1 change: 1 addition & 0 deletions core/src/util.rs
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
pub mod converter;
pub mod sequence_matcher;
mod trimmer;
160 changes: 91 additions & 69 deletions core/src/util/sequence_matcher.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
use crate::util::trimmer::trim_city_name;
use rapidfuzz::distance::lcs_seq;

pub struct SequenceMatcher;
Expand All @@ -14,6 +15,23 @@ impl SequenceMatcher {
possibilities: &[String],
threshold: Option<f64>,
) -> Result<String, Error> {
let highest_matches: Vec<String> =
Self::get_most_similar_matches(input, possibilities, threshold)
.into_iter()
.filter(|candidate| input.starts_with(&trim_city_name(candidate)))
.collect();
match &highest_matches.len() {
0 => Err(Error::NoCandidateExist),
1 => Ok(highest_matches.first().unwrap().clone()),
_ => Err(Error::MoreThanOneCandidateExist(highest_matches)),
}
}

fn get_most_similar_matches(
input: &str,
possibilities: &[String],
threshold: Option<f64>,
) -> Vec<String> {
let mut highest_similarity: f64 = 0.0;
let mut highest_matches: Vec<String> = vec![];
let length_of_longest_possibility = Self::get_length_of_longest_one(possibilities).unwrap();
Expand All @@ -30,11 +48,7 @@ impl SequenceMatcher {
highest_similarity = similarity;
}
}
match &highest_matches.len() {
0 => Err(Error::NoCandidateExist),
1 => Ok(highest_matches.first().unwrap().clone()),
_ => Err(Error::MoreThanOneCandidateExist(highest_matches)),
}
highest_matches
}

fn get_length_of_longest_one(text_list: &[String]) -> Option<usize> {
Expand All @@ -59,14 +73,16 @@ impl SequenceMatcher {

#[cfg(test)]
mod tests {
use crate::entity::Prefecture;
use crate::util::sequence_matcher::Error::{MoreThanOneCandidateExist, NoCandidateExist};
use crate::util::sequence_matcher::SequenceMatcher;

#[test]
fn get_length_of_longest_one() {
let fukushima = Prefecture::fukushima();
assert_eq!(SequenceMatcher::get_length_of_longest_one(&vec![]), None);
assert_eq!(
SequenceMatcher::get_length_of_longest_one(&generate_city_name_list()),
SequenceMatcher::get_length_of_longest_one(&fukushima.cities),
Some(8)
);
}
Expand Down Expand Up @@ -106,7 +122,8 @@ mod tests {

#[test]
fn get_most_similar_match() {
let possibilities = generate_city_name_list();
let fukushima = Prefecture::fukushima();
let possibilities = fukushima.cities;
let result = SequenceMatcher::get_most_similar_match(
"西郷村大字熊倉字折口原40番地",
&possibilities,
Expand Down Expand Up @@ -156,67 +173,72 @@ mod tests {
assert_eq!(result.err().unwrap(), NoCandidateExist);
}

fn generate_city_name_list() -> Vec<String> {
vec![
"福島市".to_string(),
"会津若松市".to_string(),
"郡山市".to_string(),
"いわき市".to_string(),
"白河市".to_string(),
"須賀川市".to_string(),
"喜多方市".to_string(),
"相馬市".to_string(),
"二本松市".to_string(),
"田村市".to_string(),
"南相馬市".to_string(),
"伊達市".to_string(),
"本宮市".to_string(),
"伊達郡桑折町".to_string(),
"伊達郡国見町".to_string(),
"伊達郡川俣町".to_string(),
"安達郡大玉村".to_string(),
"岩瀬郡鏡石町".to_string(),
"岩瀬郡天栄村".to_string(),
"南会津郡下郷町".to_string(),
"南会津郡檜枝岐村".to_string(),
"南会津郡只見町".to_string(),
"南会津郡南会津町".to_string(),
"耶麻郡北塩原村".to_string(),
"耶麻郡西会津町".to_string(),
"耶麻郡磐梯町".to_string(),
"耶麻郡猪苗代町".to_string(),
"河沼郡会津坂下町".to_string(),
"河沼郡湯川村".to_string(),
"河沼郡柳津町".to_string(),
"大沼郡三島町".to_string(),
"大沼郡金山町".to_string(),
"大沼郡昭和村".to_string(),
"大沼郡会津美里町".to_string(),
"西白河郡西郷村".to_string(),
"西白河郡泉崎村".to_string(),
"西白河郡中島村".to_string(),
"西白河郡矢吹町".to_string(),
"東白川郡棚倉町".to_string(),
"東白川郡矢祭町".to_string(),
"東白川郡塙町".to_string(),
"東白川郡鮫川村".to_string(),
"石川郡石川町".to_string(),
"石川郡玉川村".to_string(),
"石川郡平田村".to_string(),
"石川郡浅川町".to_string(),
"石川郡古殿町".to_string(),
"田村郡三春町".to_string(),
"田村郡小野町".to_string(),
"双葉郡広野町".to_string(),
"双葉郡楢葉町".to_string(),
"双葉郡富岡町".to_string(),
"双葉郡川内村".to_string(),
"双葉郡大熊町".to_string(),
"双葉郡双葉町".to_string(),
"双葉郡浪江町".to_string(),
"双葉郡葛尾村".to_string(),
"相馬郡新地町".to_string(),
"相馬郡飯舘村".to_string(),
]
impl Prefecture {
fn fukushima() -> Self {
Prefecture {
name: "福島県".to_string(),
cities: vec![
"福島市".to_string(),
"会津若松市".to_string(),
"郡山市".to_string(),
"いわき市".to_string(),
"白河市".to_string(),
"須賀川市".to_string(),
"喜多方市".to_string(),
"相馬市".to_string(),
"二本松市".to_string(),
"田村市".to_string(),
"南相馬市".to_string(),
"伊達市".to_string(),
"本宮市".to_string(),
"伊達郡桑折町".to_string(),
"伊達郡国見町".to_string(),
"伊達郡川俣町".to_string(),
"安達郡大玉村".to_string(),
"岩瀬郡鏡石町".to_string(),
"岩瀬郡天栄村".to_string(),
"南会津郡下郷町".to_string(),
"南会津郡檜枝岐村".to_string(),
"南会津郡只見町".to_string(),
"南会津郡南会津町".to_string(),
"耶麻郡北塩原村".to_string(),
"耶麻郡西会津町".to_string(),
"耶麻郡磐梯町".to_string(),
"耶麻郡猪苗代町".to_string(),
"河沼郡会津坂下町".to_string(),
"河沼郡湯川村".to_string(),
"河沼郡柳津町".to_string(),
"大沼郡三島町".to_string(),
"大沼郡金山町".to_string(),
"大沼郡昭和村".to_string(),
"大沼郡会津美里町".to_string(),
"西白河郡西郷村".to_string(),
"西白河郡泉崎村".to_string(),
"西白河郡中島村".to_string(),
"西白河郡矢吹町".to_string(),
"東白川郡棚倉町".to_string(),
"東白川郡矢祭町".to_string(),
"東白川郡塙町".to_string(),
"東白川郡鮫川村".to_string(),
"石川郡石川町".to_string(),
"石川郡玉川村".to_string(),
"石川郡平田村".to_string(),
"石川郡浅川町".to_string(),
"石川郡古殿町".to_string(),
"田村郡三春町".to_string(),
"田村郡小野町".to_string(),
"双葉郡広野町".to_string(),
"双葉郡楢葉町".to_string(),
"双葉郡富岡町".to_string(),
"双葉郡川内村".to_string(),
"双葉郡大熊町".to_string(),
"双葉郡双葉町".to_string(),
"双葉郡浪江町".to_string(),
"双葉郡葛尾村".to_string(),
"相馬郡新地町".to_string(),
"相馬郡飯舘村".to_string(),
],
}
}
}
}
18 changes: 18 additions & 0 deletions core/src/util/trimmer.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
pub fn trim_city_name(input: &str) -> String {
match input.chars().position(|c| c == '郡' || c == '市') {
Some(position) => input.chars().skip(position + 1).collect::<String>(),
None => input.to_string(),
}
}

#[cfg(test)]
mod tests {
use crate::util::trimmer::trim_city_name;

#[test]
fn trim_city_name_成功() {
assert_eq!(trim_city_name("南会津郡下郷町"), "下郷町");
assert_eq!(trim_city_name("南会津郡只見町"), "只見町");
assert_eq!(trim_city_name("白河市新白河一丁目"), "新白河一丁目");
}
}
5 changes: 5 additions & 0 deletions tests/integration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,8 @@ async fn 大字表記省略への対応テスト() {
async fn 郡が省略されている場合への対応テスト() {
run_data_driven_tests("./test_data/郡が省略されている場合への対応.csv").await
}

#[tokio::test]
async fn 郡名と町名が一致している場合() {
run_data_driven_tests("./test_data/郡名と町名が一致している場合.csv").await
}
Loading

0 comments on commit b6c51b8

Please sign in to comment.